diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 16:49:04 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 16:49:04 +0000 |
commit | 16f504a9dca3fe3b70568f67b7d41241ae485288 (patch) | |
tree | c60f36ada0496ba928b7161059ba5ab1ab224f9d /src/VBox/Devices/Bus | |
parent | Initial commit. (diff) | |
download | virtualbox-16f504a9dca3fe3b70568f67b7d41241ae485288.tar.xz virtualbox-16f504a9dca3fe3b70568f67b7d41241ae485288.zip |
Adding upstream version 7.0.6-dfsg.upstream/7.0.6-dfsgupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/VBox/Devices/Bus')
-rw-r--r-- | src/VBox/Devices/Bus/DevIommuAmd.cpp | 7363 | ||||
-rw-r--r-- | src/VBox/Devices/Bus/DevIommuAmd.h | 62 | ||||
-rw-r--r-- | src/VBox/Devices/Bus/DevIommuIntel.cpp | 4459 | ||||
-rw-r--r-- | src/VBox/Devices/Bus/DevIommuIntel.h | 49 | ||||
-rw-r--r-- | src/VBox/Devices/Bus/DevPCI.cpp | 1867 | ||||
-rw-r--r-- | src/VBox/Devices/Bus/DevPciIch9.cpp | 4042 | ||||
-rw-r--r-- | src/VBox/Devices/Bus/DevPciInternal.h | 279 | ||||
-rw-r--r-- | src/VBox/Devices/Bus/Makefile.kup | 0 | ||||
-rw-r--r-- | src/VBox/Devices/Bus/MsiCommon.cpp | 358 | ||||
-rw-r--r-- | src/VBox/Devices/Bus/MsiCommon.h | 51 | ||||
-rw-r--r-- | src/VBox/Devices/Bus/MsixCommon.cpp | 355 | ||||
-rw-r--r-- | src/VBox/Devices/Bus/PciInline.h | 115 | ||||
-rw-r--r-- | src/VBox/Devices/Bus/SrvPciRawR0.cpp | 1041 |
13 files changed, 20041 insertions, 0 deletions
diff --git a/src/VBox/Devices/Bus/DevIommuAmd.cpp b/src/VBox/Devices/Bus/DevIommuAmd.cpp new file mode 100644 index 00000000..46f02d08 --- /dev/null +++ b/src/VBox/Devices/Bus/DevIommuAmd.cpp @@ -0,0 +1,7363 @@ +/* $Id: DevIommuAmd.cpp $ */ +/** @file + * IOMMU - Input/Output Memory Management Unit - AMD implementation. + */ + +/* + * Copyright (C) 2020-2022 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_DEV_IOMMU +#include <VBox/msi.h> +#include <VBox/iommu-amd.h> +#include <VBox/vmm/pdmdev.h> + +#include <iprt/x86.h> +#include <iprt/string.h> +#include <iprt/avl.h> +#ifdef IN_RING3 +# include <iprt/mem.h> +#endif + +#include "VBoxDD.h" +#include "DevIommuAmd.h" + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ +/** Release log prefix string. */ +#define IOMMU_LOG_PFX "AMD-IOMMU" +/** The current saved state version. */ +#define IOMMU_SAVED_STATE_VERSION 1 +/** The IOMMU device instance magic. */ +#define IOMMU_MAGIC 0x10acce55 + +/** Enable the IOTLBE cache only in ring-3 for now, see @bugref{9654#c95}. */ +#ifdef IN_RING3 +# define IOMMU_WITH_IOTLBE_CACHE +#endif +/** Enable the interrupt cache. */ +#define IOMMU_WITH_IRTE_CACHE + +/* The DTE cache is mandatory for the IOTLB or interrupt cache to work. */ +#if defined(IOMMU_WITH_IOTLBE_CACHE) || defined(IOMMU_WITH_IRTE_CACHE) +# define IOMMU_WITH_DTE_CACHE +/** The maximum number of device IDs in the cache. */ +# define IOMMU_DEV_CACHE_COUNT 16 +/** An empty device ID. */ +# define IOMMU_DTE_CACHE_KEY_NIL 0 +#endif + +#ifdef IOMMU_WITH_IRTE_CACHE +/** The maximum number of IRTE cache entries. */ +# define IOMMU_IRTE_CACHE_COUNT 32 +/** A NIL IRTE cache entry key. */ +# define IOMMU_IRTE_CACHE_KEY_NIL (~(uint32_t)0U) +/** Gets the device ID from an IRTE cache entry key. */ +#define IOMMU_IRTE_CACHE_KEY_GET_DEVICE_ID(a_Key) RT_HIWORD(a_Key) +/** Gets the IOVA from the IOTLB entry key. */ +# define IOMMU_IRTE_CACHE_KEY_GET_OFF(a_Key) RT_LOWORD(a_Key) +/** Makes an IRTE cache entry key. + * + * Bits 31:16 is the device ID (Bus, Device, Function). + * Bits 15:0 is the the offset into the IRTE table. + */ +# define IOMMU_IRTE_CACHE_KEY_MAKE(a_DevId, a_off) RT_MAKE_U32(a_off, a_DevId) +#endif /* IOMMU_WITH_IRTE_CACHE */ + +#ifdef IOMMU_WITH_IOTLBE_CACHE +/** The maximum number of IOTLB entries. */ +# define IOMMU_IOTLBE_MAX 64 +/** The mask of bits covering the domain ID in the IOTLBE key. */ +# define IOMMU_IOTLB_DOMAIN_ID_MASK UINT64_C(0xffffff0000000000) +/** The mask of bits covering the IOVA in the IOTLBE key. */ +# define IOMMU_IOTLB_IOVA_MASK (~IOMMU_IOTLB_DOMAIN_ID_MASK) +/** The number of bits to shift for the domain ID of the IOTLBE key. */ +# define IOMMU_IOTLB_DOMAIN_ID_SHIFT 40 +/** A NIL IOTLB key. */ +# define IOMMU_IOTLB_KEY_NIL UINT64_C(0) +/** Gets the domain ID from an IOTLB entry key. */ +# define IOMMU_IOTLB_KEY_GET_DOMAIN_ID(a_Key) ((a_Key) >> IOMMU_IOTLB_DOMAIN_ID_SHIFT) +/** Gets the IOVA from the IOTLB entry key. */ +# define IOMMU_IOTLB_KEY_GET_IOVA(a_Key) (((a_Key) & IOMMU_IOTLB_IOVA_MASK) << X86_PAGE_4K_SHIFT) +/** Makes an IOTLB entry key. + * + * Address bits 63:52 of the IOVA are zero extended, so top 12 bits are free. + * Address bits 11:0 of the IOVA are offset into the minimum page size of 4K, + * so bottom 12 bits are free. + * + * Thus we use the top 24 bits of key to hold bits 15:0 of the domain ID. + * We use the bottom 40 bits of the key to hold bits 51:12 of the IOVA. + */ +# define IOMMU_IOTLB_KEY_MAKE(a_DomainId, a_uIova) ( ((uint64_t)(a_DomainId) << IOMMU_IOTLB_DOMAIN_ID_SHIFT) \ + | (((a_uIova) >> X86_PAGE_4K_SHIFT) & IOMMU_IOTLB_IOVA_MASK)) +#endif /* IOMMU_WITH_IOTLBE_CACHE */ + +#ifdef IOMMU_WITH_DTE_CACHE +/** @name IOMMU_DTE_CACHE_F_XXX: DTE cache flags. + * + * Some of these flags are "basic" i.e. they correspond directly to their bits in + * the DTE. The rest of the flags are based on checks or operations on several DTE + * bits. + * + * The basic flags are: + * - VALID (DTE.V) + * - IO_PERM_READ (DTE.IR) + * - IO_PERM_WRITE (DTE.IW) + * - IO_PERM_RSVD (bit following DTW.IW reserved for future & to keep + * masking consistent) + * - SUPPRESS_ALL_IOPF (DTE.SA) + * - SUPPRESS_IOPF (DTE.SE) + * - INTR_MAP_VALID (DTE.IV) + * - IGNORE_UNMAPPED_INTR (DTE.IG) + * + * @see iommuAmdGetBasicDevFlags() + * @{ */ +/** The DTE is present. */ +# define IOMMU_DTE_CACHE_F_PRESENT RT_BIT(0) +/** The DTE is valid. */ +# define IOMMU_DTE_CACHE_F_VALID RT_BIT(1) +/** The DTE permissions apply for address translations. */ +# define IOMMU_DTE_CACHE_F_IO_PERM RT_BIT(2) +/** DTE permission - I/O read allowed. */ +# define IOMMU_DTE_CACHE_F_IO_PERM_READ RT_BIT(3) +/** DTE permission - I/O write allowed. */ +# define IOMMU_DTE_CACHE_F_IO_PERM_WRITE RT_BIT(4) +/** DTE permission - reserved. */ +# define IOMMU_DTE_CACHE_F_IO_PERM_RSVD RT_BIT(5) +/** Address translation required. */ +# define IOMMU_DTE_CACHE_F_ADDR_TRANSLATE RT_BIT(6) +/** Suppress all I/O page faults. */ +# define IOMMU_DTE_CACHE_F_SUPPRESS_ALL_IOPF RT_BIT(7) +/** Suppress I/O page faults. */ +# define IOMMU_DTE_CACHE_F_SUPPRESS_IOPF RT_BIT(8) +/** Interrupt map valid. */ +# define IOMMU_DTE_CACHE_F_INTR_MAP_VALID RT_BIT(9) +/** Ignore unmapped interrupts. */ +# define IOMMU_DTE_CACHE_F_IGNORE_UNMAPPED_INTR RT_BIT(10) +/** An I/O page fault has been raised for this device. */ +# define IOMMU_DTE_CACHE_F_IO_PAGE_FAULT_RAISED RT_BIT(11) +/** Fixed and arbitrary interrupt control: Target Abort. */ +# define IOMMU_DTE_CACHE_F_INTR_CTRL_TARGET_ABORT RT_BIT(12) +/** Fixed and arbitrary interrupt control: Forward unmapped. */ +# define IOMMU_DTE_CACHE_F_INTR_CTRL_FWD_UNMAPPED RT_BIT(13) +/** Fixed and arbitrary interrupt control: Remapped. */ +# define IOMMU_DTE_CACHE_F_INTR_CTRL_REMAPPED RT_BIT(14) +/** Fixed and arbitrary interrupt control: Reserved. */ +# define IOMMU_DTE_CACHE_F_INTR_CTRL_RSVD RT_BIT(15) +/** @} */ + +/** The number of bits to shift I/O device flags for DTE permissions. */ +# define IOMMU_DTE_CACHE_F_IO_PERM_SHIFT 3 +/** The mask of DTE permissions in I/O device flags. */ +# define IOMMU_DTE_CACHE_F_IO_PERM_MASK 0x3 +/** The number of bits to shift I/O device flags for interrupt control bits. */ +# define IOMMU_DTE_CACHE_F_INTR_CTRL_SHIFT 12 +/** The mask of interrupt control bits in I/O device flags. */ +# define IOMMU_DTE_CACHE_F_INTR_CTRL_MASK 0x3 +/** The number of bits to shift for ignore-unmapped interrupts bit. */ +# define IOMMU_DTE_CACHE_F_IGNORE_UNMAPPED_INTR_SHIFT 10 + +/** Acquires the cache lock. */ +# define IOMMU_CACHE_LOCK(a_pDevIns, a_pThis) \ + do { \ + int const rcLock = PDMDevHlpCritSectEnter((a_pDevIns), &(a_pThis)->CritSectCache, VINF_SUCCESS); \ + PDM_CRITSECT_RELEASE_ASSERT_RC_DEV((a_pDevIns), &(a_pThis)->CritSectCache, rcLock); \ + } while (0) + +/** Releases the cache lock. */ +# define IOMMU_CACHE_UNLOCK(a_pDevIns, a_pThis) PDMDevHlpCritSectLeave((a_pDevIns), &(a_pThis)->CritSectCache) +#endif /* IOMMU_WITH_DTE_CACHE */ + +/** Acquires the IOMMU lock (returns a_rcBusy on contention). */ +#define IOMMU_LOCK_RET(a_pDevIns, a_pThisCC, a_rcBusy) \ + do { \ + int const rcLock = (a_pThisCC)->CTX_SUFF(pIommuHlp)->pfnLock((a_pDevIns), (a_rcBusy)); \ + if (RT_LIKELY(rcLock == VINF_SUCCESS)) \ + { /* likely */ } \ + else \ + return rcLock; \ + } while (0) + +/** Acquires the IOMMU lock (can fail under extraordinary circumstance in ring-0). */ +#define IOMMU_LOCK(a_pDevIns, a_pThisCC) \ + do { \ + int const rcLock = (a_pThisCC)->CTX_SUFF(pIommuHlp)->pfnLock((a_pDevIns), VINF_SUCCESS); \ + PDM_CRITSECT_RELEASE_ASSERT_RC_DEV((a_pDevIns), NULL, rcLock); \ + } while (0) + +/** Checks if the current thread owns the PDM lock. */ +# define IOMMU_ASSERT_LOCK_IS_OWNER(a_pDevIns, a_pThisCC) \ + do \ + { \ + Assert((a_pThisCC)->CTX_SUFF(pIommuHlp)->pfnLockIsOwner((a_pDevIns))); \ + NOREF(a_pThisCC); \ + } while (0) + +/** Releases the PDM lock. */ +# define IOMMU_UNLOCK(a_pDevIns, a_pThisCC) (a_pThisCC)->CTX_SUFF(pIommuHlp)->pfnUnlock((a_pDevIns)) + +/** Gets the maximum valid IOVA for the given I/O page-table level. */ +#define IOMMU_GET_MAX_VALID_IOVA(a_Level) ((X86_PAGE_4K_SIZE << ((a_Level) * 9)) - 1) + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ +/** + * IOMMU operation (transaction). + */ +typedef enum IOMMUOP +{ + /** Address translation request. */ + IOMMUOP_TRANSLATE_REQ = 0, + /** Memory read request. */ + IOMMUOP_MEM_READ, + /** Memory write request. */ + IOMMUOP_MEM_WRITE, + /** Interrupt request. */ + IOMMUOP_INTR_REQ, + /** Command. */ + IOMMUOP_CMD +} IOMMUOP; +/** Pointer to a IOMMU operation. */ +typedef IOMMUOP *PIOMMUOP; + +/** + * I/O page lookup. + */ +typedef struct IOPAGELOOKUP +{ + /** The translated system physical address. */ + RTGCPHYS GCPhysSpa; + /** The number of offset bits in the system physical address. */ + uint8_t cShift; + /** The I/O permissions for this translation, see IOMMU_IO_PERM_XXX. */ + uint8_t fPerm; +} IOPAGELOOKUP; +/** Pointer to an I/O page lookup. */ +typedef IOPAGELOOKUP *PIOPAGELOOKUP; +/** Pointer to a const I/O page lookup. */ +typedef IOPAGELOOKUP const *PCIOPAGELOOKUP; + +/** + * I/O address range. + */ +typedef struct IOADDRRANGE +{ + /** The address (virtual or physical). */ + uint64_t uAddr; + /** The size of the access in bytes. */ + size_t cb; + /** The I/O permissions for this translation, see IOMMU_IO_PERM_XXX. */ + uint8_t fPerm; +} IOADDRRANGE; +/** Pointer to an I/O address range. */ +typedef IOADDRRANGE *PIOADDRRANGE; +/** Pointer to a const I/O address range. */ +typedef IOADDRRANGE const *PCIOADDRRANGE; + +#ifdef IOMMU_WITH_DTE_CACHE +/** + * Device Table Entry Cache. + */ +typedef struct DTECACHE +{ + /** This device's flags, see IOMMU_DTE_CACHE_F_XXX. */ + uint16_t fFlags; + /** The domain ID assigned for this device by software. */ + uint16_t idDomain; +} DTECACHE; +/** Pointer to an I/O device struct. */ +typedef DTECACHE *PDTECACHE; +/** Pointer to a const I/O device struct. */ +typedef DTECACHE *PCDTECACHE; +AssertCompileSize(DTECACHE, 4); +#endif /* IOMMU_WITH_DTE_CACHE */ + +#ifdef IOMMU_WITH_IOTLBE_CACHE +/** + * I/O TLB Entry. + * Keep this as small and aligned as possible. + */ +typedef struct IOTLBE +{ + /** The AVL tree node. */ + AVLU64NODECORE Core; + /** The least recently used (LRU) list node. */ + RTLISTNODE NdLru; + /** The I/O page lookup results of the translation. */ + IOPAGELOOKUP PageLookup; + /** Whether the entry needs to be evicted from the cache. */ + bool fEvictPending; +} IOTLBE; +/** Pointer to an IOMMU I/O TLB entry struct. */ +typedef IOTLBE *PIOTLBE; +/** Pointer to a const IOMMU I/O TLB entry struct. */ +typedef IOTLBE const *PCIOTLBE; +AssertCompileSizeAlignment(IOTLBE, 8); +AssertCompileMemberOffset(IOTLBE, Core, 0); +#endif /* IOMMU_WITH_IOTLBE_CACHE */ + +#ifdef IOMMU_WITH_IRTE_CACHE +/** + * Interrupt Remap Table Entry Cache. + */ +typedef struct IRTECACHE +{ + /** The key, see IOMMU_IRTE_CACHE_KEY_MAKE. */ + uint32_t uKey; + /** The IRTE. */ + IRTE_T Irte; +} IRTECACHE; +/** Pointer to an IRTE cache struct. */ +typedef IRTECACHE *PIRTECACHE; +/** Pointer to a const IRTE cache struct. */ +typedef IRTECACHE const *PCIRTECACHE; +AssertCompileSizeAlignment(IRTECACHE, 4); +#endif /* IOMMU_WITH_IRTE_CACHE */ + +/** + * The shared IOMMU device state. + */ +typedef struct IOMMU +{ + /** IOMMU device index (0 is at the top of the PCI tree hierarchy). */ + uint32_t idxIommu; + /** IOMMU magic. */ + uint32_t u32Magic; + + /** The MMIO handle. */ + IOMMMIOHANDLE hMmio; + /** The event semaphore the command thread waits on. */ + SUPSEMEVENT hEvtCmdThread; + /** Whether the command thread has been signaled for wake up. */ + bool volatile fCmdThreadSignaled; + /** Padding. */ + bool afPadding0[3]; + /** The IOMMU PCI address. */ + PCIBDF uPciAddress; + +#ifdef IOMMU_WITH_DTE_CACHE + /** The critsect that protects the cache from concurrent access. */ + PDMCRITSECT CritSectCache; + /** Array of device IDs. */ + uint16_t aDeviceIds[IOMMU_DEV_CACHE_COUNT]; + /** Array of DTE cache entries. */ + DTECACHE aDteCache[IOMMU_DEV_CACHE_COUNT]; +#endif +#ifdef IOMMU_WITH_IRTE_CACHE + /** Array of IRTE cache entries. */ + IRTECACHE aIrteCache[IOMMU_IRTE_CACHE_COUNT]; +#endif + + /** @name PCI: Base capability block registers. + * @{ */ + IOMMU_BAR_T IommuBar; /**< IOMMU base address register. */ + /** @} */ + + /** @name MMIO: Control and status registers. + * @{ */ + DEV_TAB_BAR_T aDevTabBaseAddrs[8]; /**< Device table base address registers. */ + CMD_BUF_BAR_T CmdBufBaseAddr; /**< Command buffer base address register. */ + EVT_LOG_BAR_T EvtLogBaseAddr; /**< Event log base address register. */ + IOMMU_CTRL_T Ctrl; /**< IOMMU control register. */ + IOMMU_EXCL_RANGE_BAR_T ExclRangeBaseAddr; /**< IOMMU exclusion range base register. */ + IOMMU_EXCL_RANGE_LIMIT_T ExclRangeLimit; /**< IOMMU exclusion range limit. */ + IOMMU_EXT_FEAT_T ExtFeat; /**< IOMMU extended feature register. */ + /** @} */ + + /** @name MMIO: Peripheral Page Request (PPR) Log registers. + * @{ */ + PPR_LOG_BAR_T PprLogBaseAddr; /**< PPR Log base address register. */ + IOMMU_HW_EVT_HI_T HwEvtHi; /**< IOMMU hardware event register (Hi). */ + IOMMU_HW_EVT_LO_T HwEvtLo; /**< IOMMU hardware event register (Lo). */ + IOMMU_HW_EVT_STATUS_T HwEvtStatus; /**< IOMMU hardware event status. */ + /** @} */ + + /** @todo IOMMU: SMI filter. */ + + /** @name MMIO: Guest Virtual-APIC Log registers. + * @{ */ + GALOG_BAR_T GALogBaseAddr; /**< Guest Virtual-APIC Log base address register. */ + GALOG_TAIL_ADDR_T GALogTailAddr; /**< Guest Virtual-APIC Log Tail address register. */ + /** @} */ + + /** @name MMIO: Alternate PPR and Event Log registers. + * @{ */ + PPR_LOG_B_BAR_T PprLogBBaseAddr; /**< PPR Log B base address register. */ + EVT_LOG_B_BAR_T EvtLogBBaseAddr; /**< Event Log B base address register. */ + /** @} */ + + /** @name MMIO: Device-specific feature registers. + * @{ */ + DEV_SPECIFIC_FEAT_T DevSpecificFeat; /**< Device-specific feature extension register (DSFX). */ + DEV_SPECIFIC_CTRL_T DevSpecificCtrl; /**< Device-specific control extension register (DSCX). */ + DEV_SPECIFIC_STATUS_T DevSpecificStatus; /**< Device-specific status extension register (DSSX). */ + /** @} */ + + /** @name MMIO: MSI Capability Block registers. + * @{ */ + MSI_MISC_INFO_T MiscInfo; /**< MSI Misc. info registers / MSI Vector registers. */ + /** @} */ + + /** @name MMIO: Performance Optimization Control registers. + * @{ */ + IOMMU_PERF_OPT_CTRL_T PerfOptCtrl; /**< IOMMU Performance optimization control register. */ + /** @} */ + + /** @name MMIO: x2APIC Control registers. + * @{ */ + IOMMU_XT_GEN_INTR_CTRL_T XtGenIntrCtrl; /**< IOMMU X2APIC General interrupt control register. */ + IOMMU_XT_PPR_INTR_CTRL_T XtPprIntrCtrl; /**< IOMMU X2APIC PPR interrupt control register. */ + IOMMU_XT_GALOG_INTR_CTRL_T XtGALogIntrCtrl; /**< IOMMU X2APIC Guest Log interrupt control register. */ + /** @} */ + + /** @name MMIO: Memory Address Routing & Control (MARC) registers. + * @{ */ + MARC_APER_T aMarcApers[4]; /**< MARC Aperture Registers. */ + /** @} */ + + /** @name MMIO: Reserved register. + * @{ */ + IOMMU_RSVD_REG_T RsvdReg; /**< IOMMU Reserved Register. */ + /** @} */ + + /** @name MMIO: Command and Event Log pointer registers. + * @{ */ + CMD_BUF_HEAD_PTR_T CmdBufHeadPtr; /**< Command buffer head pointer register. */ + CMD_BUF_TAIL_PTR_T CmdBufTailPtr; /**< Command buffer tail pointer register. */ + EVT_LOG_HEAD_PTR_T EvtLogHeadPtr; /**< Event log head pointer register. */ + EVT_LOG_TAIL_PTR_T EvtLogTailPtr; /**< Event log tail pointer register. */ + /** @} */ + + /** @name MMIO: Command and Event Status register. + * @{ */ + IOMMU_STATUS_T Status; /**< IOMMU status register. */ + /** @} */ + + /** @name MMIO: PPR Log Head and Tail pointer registers. + * @{ */ + PPR_LOG_HEAD_PTR_T PprLogHeadPtr; /**< IOMMU PPR log head pointer register. */ + PPR_LOG_TAIL_PTR_T PprLogTailPtr; /**< IOMMU PPR log tail pointer register. */ + /** @} */ + + /** @name MMIO: Guest Virtual-APIC Log Head and Tail pointer registers. + * @{ */ + GALOG_HEAD_PTR_T GALogHeadPtr; /**< Guest Virtual-APIC log head pointer register. */ + GALOG_TAIL_PTR_T GALogTailPtr; /**< Guest Virtual-APIC log tail pointer register. */ + /** @} */ + + /** @name MMIO: PPR Log B Head and Tail pointer registers. + * @{ */ + PPR_LOG_B_HEAD_PTR_T PprLogBHeadPtr; /**< PPR log B head pointer register. */ + PPR_LOG_B_TAIL_PTR_T PprLogBTailPtr; /**< PPR log B tail pointer register. */ + /** @} */ + + /** @name MMIO: Event Log B Head and Tail pointer registers. + * @{ */ + EVT_LOG_B_HEAD_PTR_T EvtLogBHeadPtr; /**< Event log B head pointer register. */ + EVT_LOG_B_TAIL_PTR_T EvtLogBTailPtr; /**< Event log B tail pointer register. */ + /** @} */ + + /** @name MMIO: PPR Log Overflow protection registers. + * @{ */ + PPR_LOG_AUTO_RESP_T PprLogAutoResp; /**< PPR Log Auto Response register. */ + PPR_LOG_OVERFLOW_EARLY_T PprLogOverflowEarly; /**< PPR Log Overflow Early Indicator register. */ + PPR_LOG_B_OVERFLOW_EARLY_T PprLogBOverflowEarly; /**< PPR Log B Overflow Early Indicator register. */ + /** @} */ + + /** @todo IOMMU: IOMMU Event counter registers. */ + +#ifdef VBOX_WITH_STATISTICS + /** @name IOMMU: Stat counters. + * @{ */ + STAMCOUNTER StatMmioReadR3; /**< Number of MMIO reads in R3. */ + STAMCOUNTER StatMmioReadRZ; /**< Number of MMIO reads in RZ. */ + STAMCOUNTER StatMmioWriteR3; /**< Number of MMIO writes in R3. */ + STAMCOUNTER StatMmioWriteRZ; /**< Number of MMIO writes in RZ. */ + + STAMCOUNTER StatMsiRemapR3; /**< Number of MSI remap requests in R3. */ + STAMCOUNTER StatMsiRemapRZ; /**< Number of MSI remap requests in RZ. */ + + STAMCOUNTER StatMemReadR3; /**< Number of memory read translation requests in R3. */ + STAMCOUNTER StatMemReadRZ; /**< Number of memory read translation requests in RZ. */ + STAMCOUNTER StatMemWriteR3; /**< Number of memory write translation requests in R3. */ + STAMCOUNTER StatMemWriteRZ; /**< Number of memory write translation requests in RZ. */ + + STAMCOUNTER StatMemBulkReadR3; /**< Number of memory read bulk translation requests in R3. */ + STAMCOUNTER StatMemBulkReadRZ; /**< Number of memory read bulk translation requests in RZ. */ + STAMCOUNTER StatMemBulkWriteR3; /**< Number of memory write bulk translation requests in R3. */ + STAMCOUNTER StatMemBulkWriteRZ; /**< Number of memory write bulk translation requests in RZ. */ + + STAMCOUNTER StatCmd; /**< Number of commands processed in total. */ + STAMCOUNTER StatCmdCompWait; /**< Number of Completion Wait commands processed. */ + STAMCOUNTER StatCmdInvDte; /**< Number of Invalidate DTE commands processed. */ + STAMCOUNTER StatCmdInvIommuPages; /**< Number of Invalidate IOMMU pages commands processed. */ + STAMCOUNTER StatCmdInvIotlbPages; /**< Number of Invalidate IOTLB pages commands processed. */ + STAMCOUNTER StatCmdInvIntrTable; /**< Number of Invalidate Interrupt Table commands processed. */ + STAMCOUNTER StatCmdPrefIommuPages; /**< Number of Prefetch IOMMU Pages commands processed. */ + STAMCOUNTER StatCmdCompletePprReq; /**< Number of Complete PPR Requests commands processed. */ + STAMCOUNTER StatCmdInvIommuAll; /**< Number of Invalidate IOMMU All commands processed. */ + + STAMCOUNTER StatIotlbeCached; /**< Number of IOTLB entries in the cache. */ + STAMCOUNTER StatIotlbeLazyEvictReuse; /**< Number of IOTLB entries re-used after lazy eviction. */ + + STAMPROFILEADV StatProfDteLookup; /**< Profiling of I/O page walk (from memory). */ + STAMPROFILEADV StatProfIotlbeLookup; /**< Profiling of IOTLB entry lookup (from cache). */ + + STAMPROFILEADV StatProfIrteLookup; /**< Profiling of IRTE entry lookup (from memory). */ + STAMPROFILEADV StatProfIrteCacheLookup; /**< Profiling of IRTE entry lookup (from cache). */ + + STAMCOUNTER StatAccessCacheHit; /**< Number of IOTLB cache hits. */ + STAMCOUNTER StatAccessCacheHitFull; /**< Number of accesses that were fully looked up from the cache. */ + STAMCOUNTER StatAccessCacheMiss; /**< Number of cache misses (resulting in DTE lookups). */ + STAMCOUNTER StatAccessCacheNonContig; /**< Number of cache accesses resulting in non-contiguous access. */ + STAMCOUNTER StatAccessCachePermDenied; /**< Number of cache accesses resulting in insufficient permissions. */ + STAMCOUNTER StatAccessDteNonContig; /**< Number of DTE accesses resulting in non-contiguous access. */ + STAMCOUNTER StatAccessDtePermDenied; /**< Number of DTE accesses resulting in insufficient permissions. */ + + STAMCOUNTER StatIntrCacheHit; /**< Number of interrupt cache hits. */ + STAMCOUNTER StatIntrCacheMiss; /**< Number of interrupt cache misses. */ + + STAMCOUNTER StatNonStdPageSize; /**< Number of non-standard page size translations. */ + STAMCOUNTER StatIopfs; /**< Number of I/O page faults. */ + /** @} */ +#endif +} IOMMU; +/** Pointer to the IOMMU device state. */ +typedef IOMMU *PIOMMU; +/** Pointer to the const IOMMU device state. */ +typedef const IOMMU *PCIOMMU; +AssertCompileMemberAlignment(IOMMU, hMmio, 8); +#ifdef IOMMU_WITH_DTE_CACHE +AssertCompileMemberAlignment(IOMMU, CritSectCache, 8); +AssertCompileMemberAlignment(IOMMU, aDeviceIds, 8); +AssertCompileMemberAlignment(IOMMU, aDteCache, 8); +#endif +#ifdef IOMMU_WITH_IRTE_CACHE +AssertCompileMemberAlignment(IOMMU, aIrteCache, 8); +#endif +AssertCompileMemberAlignment(IOMMU, IommuBar, 8); +AssertCompileMemberAlignment(IOMMU, aDevTabBaseAddrs, 8); +AssertCompileMemberAlignment(IOMMU, CmdBufHeadPtr, 8); +AssertCompileMemberAlignment(IOMMU, Status, 8); + +/** + * The ring-3 IOMMU device state. + */ +typedef struct IOMMUR3 +{ + /** Device instance. */ + PPDMDEVINSR3 pDevInsR3; + /** The IOMMU helpers. */ + R3PTRTYPE(PCPDMIOMMUHLPR3) pIommuHlpR3; + /** The command thread handle. */ + R3PTRTYPE(PPDMTHREAD) pCmdThread; +#ifdef IOMMU_WITH_IOTLBE_CACHE + /** Pointer to array of pre-allocated IOTLBEs. */ + PIOTLBE paIotlbes; + /** Maps [DomainId,Iova] to [IOTLBE]. */ + AVLU64TREE TreeIotlbe; + /** LRU list anchor for IOTLB entries. */ + RTLISTANCHOR LstLruIotlbe; + /** Index of the next unused IOTLB. */ + uint32_t idxUnusedIotlbe; + /** Number of cached IOTLB entries in the tree. */ + uint32_t cCachedIotlbes; +#endif +} IOMMUR3; +/** Pointer to the ring-3 IOMMU device state. */ +typedef IOMMUR3 *PIOMMUR3; +/** Pointer to the const ring-3 IOMMU device state. */ +typedef const IOMMUR3 *PCIOMMUR3; +#ifdef IOMMU_WITH_IOTLBE_CACHE +AssertCompileMemberAlignment(IOMMUR3, paIotlbes, 8); +AssertCompileMemberAlignment(IOMMUR3, TreeIotlbe, 8); +AssertCompileMemberAlignment(IOMMUR3, LstLruIotlbe, 8); +#endif + +/** + * The ring-0 IOMMU device state. + */ +typedef struct IOMMUR0 +{ + /** Device instance. */ + PPDMDEVINSR0 pDevInsR0; + /** The IOMMU helpers. */ + R0PTRTYPE(PCPDMIOMMUHLPR0) pIommuHlpR0; +} IOMMUR0; +/** Pointer to the ring-0 IOMMU device state. */ +typedef IOMMUR0 *PIOMMUR0; + +/** + * The raw-mode IOMMU device state. + */ +typedef struct IOMMURC +{ + /** Device instance. */ + PPDMDEVINSRC pDevInsRC; + /** The IOMMU helpers. */ + RCPTRTYPE(PCPDMIOMMUHLPRC) pIommuHlpRC; +} IOMMURC; +/** Pointer to the raw-mode IOMMU device state. */ +typedef IOMMURC *PIOMMURC; + +/** The IOMMU device state for the current context. */ +typedef CTX_SUFF(IOMMU) IOMMUCC; +/** Pointer to the IOMMU device state for the current context. */ +typedef CTX_SUFF(PIOMMU) PIOMMUCC; + +/** + * IOMMU register access. + */ +typedef struct IOMMUREGACC +{ + const char *pszName; + VBOXSTRICTRC (*pfnRead)(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value); + VBOXSTRICTRC (*pfnWrite)(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value); +} IOMMUREGACC; +/** Pointer to an IOMMU register access. */ +typedef IOMMUREGACC *PIOMMUREGACC; +/** Pointer to a const IOMMU register access. */ +typedef IOMMUREGACC const *PCIOMMUREGACC; + +#ifdef IOMMU_WITH_IOTLBE_CACHE +/** + * IOTLBE flush argument. + */ +typedef struct IOTLBEFLUSHARG +{ + /** The ring-3 IOMMU device state. */ + PIOMMUR3 pIommuR3; + /** The domain ID to flush. */ + uint16_t idDomain; +} IOTLBEFLUSHARG; +/** Pointer to an IOTLBE flush argument. */ +typedef IOTLBEFLUSHARG *PIOTLBEFLUSHARG; +/** Pointer to a const IOTLBE flush argument. */ +typedef IOTLBEFLUSHARG const *PCIOTLBEFLUSHARG; + +/** + * IOTLBE Info. argument. + */ +typedef struct IOTLBEINFOARG +{ + /** The ring-3 IOMMU device state. */ + PIOMMUR3 pIommuR3; + /** The info helper. */ + PCDBGFINFOHLP pHlp; + /** The domain ID to dump IOTLB entry. */ + uint16_t idDomain; +} IOTLBEINFOARG; +/** Pointer to an IOTLBE flush argument. */ +typedef IOTLBEINFOARG *PIOTLBEINFOARG; +/** Pointer to a const IOTLBE flush argument. */ +typedef IOTLBEINFOARG const *PCIOTLBEINFOARG; +#endif + +/** + * IOMMU operation auxiliary info. + */ +typedef struct IOMMUOPAUX +{ + /** The IOMMU operation being performed. */ + IOMMUOP enmOp; + /** The device table entry (can be NULL). */ + PCDTE_T pDte; + /** The device ID (bus, device, function). */ + uint16_t idDevice; + /** The domain ID (when the DTE isn't provided). */ + uint16_t idDomain; +} IOMMUOPAUX; +/** Pointer to an I/O address lookup struct. */ +typedef IOMMUOPAUX *PIOMMUOPAUX; +/** Pointer to a const I/O address lookup struct. */ +typedef IOMMUOPAUX const *PCIOMMUOPAUX; + +typedef DECLCALLBACKTYPE(int, FNIOPAGELOOKUP,(PPDMDEVINS pDevIns, uint64_t uIovaPage, uint8_t fPerm, PCIOMMUOPAUX pAux, + PIOPAGELOOKUP pPageLookup)); +typedef FNIOPAGELOOKUP *PFNIOPAGELOOKUP; + + +/********************************************************************************************************************************* +* Global Variables * +*********************************************************************************************************************************/ +#ifdef IN_RING3 +/** + * An array of the number of device table segments supported. + * Indexed by u2DevTabSegSup. + */ +static uint8_t const g_acDevTabSegs[] = { 0, 2, 4, 8 }; +#endif + +#if (defined(IN_RING3) && defined(IOMMU_WITH_IOTLBE_CACHE)) || defined(LOG_ENABLED) +/** + * The IOMMU I/O permission names. + */ +static const char * const g_aszPerm[] = { "none", "read", "write", "read+write" }; +#endif + +/** + * An array of the masks to select the device table segment index from a device ID. + */ +static uint16_t const g_auDevTabSegMasks[] = { 0x0, 0x8000, 0xc000, 0xe000 }; + +/** + * An array of the shift values to select the device table segment index from a + * device ID. + */ +static uint8_t const g_auDevTabSegShifts[] = { 0, 15, 14, 13 }; + +/** + * The maximum size (inclusive) of each device table segment (0 to 7). + * Indexed by the device table segment index. + */ +static uint16_t const g_auDevTabSegMaxSizes[] = { 0x1ff, 0xff, 0x7f, 0x7f, 0x3f, 0x3f, 0x3f, 0x3f }; + + +#ifndef VBOX_DEVICE_STRUCT_TESTCASE +/** + * Gets the maximum number of buffer entries for the given buffer length. + * + * @returns Number of buffer entries. + * @param uEncodedLen The length (power-of-2 encoded). + */ +DECLINLINE(uint32_t) iommuAmdGetBufMaxEntries(uint8_t uEncodedLen) +{ + Assert(uEncodedLen > 7); + Assert(uEncodedLen < 16); + return 2 << (uEncodedLen - 1); +} + + +/** + * Gets the total length of the buffer given a base register's encoded length. + * + * @returns The length of the buffer in bytes. + * @param uEncodedLen The length (power-of-2 encoded). + */ +DECLINLINE(uint32_t) iommuAmdGetTotalBufLength(uint8_t uEncodedLen) +{ + Assert(uEncodedLen > 7); + Assert(uEncodedLen < 16); + return (2 << (uEncodedLen - 1)) << 4; +} + + +/** + * Gets the number of (unconsumed) entries in the event log. + * + * @returns The number of entries in the event log. + * @param pThis The shared IOMMU device state. + */ +static uint32_t iommuAmdGetEvtLogEntryCount(PIOMMU pThis) +{ + uint32_t const idxTail = pThis->EvtLogTailPtr.n.off >> IOMMU_EVT_GENERIC_SHIFT; + uint32_t const idxHead = pThis->EvtLogHeadPtr.n.off >> IOMMU_EVT_GENERIC_SHIFT; + if (idxTail >= idxHead) + return idxTail - idxHead; + + uint32_t const cMaxEvts = iommuAmdGetBufMaxEntries(pThis->EvtLogBaseAddr.n.u4Len); + return cMaxEvts - idxHead + idxTail; +} + + +#if (defined(IN_RING3) && defined(IOMMU_WITH_IOTLBE_CACHE)) || defined(LOG_ENABLED) +/** + * Gets the descriptive I/O permission name for a memory access. + * + * @returns The I/O permission name. + * @param fPerm The I/O permissions for the access, see IOMMU_IO_PERM_XXX. + */ +static const char *iommuAmdMemAccessGetPermName(uint8_t fPerm) +{ + /* We shouldn't construct an access with "none" or "read+write" (must be read or write) permissions. */ + Assert(fPerm > 0 && fPerm < RT_ELEMENTS(g_aszPerm)); + return g_aszPerm[fPerm & IOMMU_IO_PERM_MASK]; +} +#endif + + +#ifdef IOMMU_WITH_DTE_CACHE +/** + * Gets the basic I/O device flags for the given device table entry. + * + * @returns The basic I/O device flags. + * @param pDte The device table entry. + */ +static uint16_t iommuAmdGetBasicDevFlags(PCDTE_T pDte) +{ + /* Extract basic flags from bits 127:0 of the DTE. */ + uint16_t fFlags = 0; + if (pDte->n.u1Valid) + { + fFlags |= IOMMU_DTE_CACHE_F_VALID; + + /** @todo Skip the if checks here (shift/mask the relevant bits over). */ + if (pDte->n.u1SuppressAllPfEvents) + fFlags |= IOMMU_DTE_CACHE_F_SUPPRESS_ALL_IOPF; + if (pDte->n.u1SuppressPfEvents) + fFlags |= IOMMU_DTE_CACHE_F_SUPPRESS_IOPF; + + uint16_t const fDtePerm = (pDte->au64[0] >> IOMMU_IO_PERM_SHIFT) & IOMMU_IO_PERM_MASK; + AssertCompile(IOMMU_DTE_CACHE_F_IO_PERM_MASK == IOMMU_IO_PERM_MASK); + fFlags |= fDtePerm << IOMMU_DTE_CACHE_F_IO_PERM_SHIFT; + } + + /* Extract basic flags from bits 255:128 of the DTE. */ + if (pDte->n.u1IntrMapValid) + { + fFlags |= IOMMU_DTE_CACHE_F_INTR_MAP_VALID; + + /** @todo Skip the if check here (shift/mask the relevant bit over). */ + if (pDte->n.u1IgnoreUnmappedIntrs) + fFlags |= IOMMU_DTE_CACHE_F_IGNORE_UNMAPPED_INTR; + + uint16_t const fIntrCtrl = IOMMU_DTE_GET_INTR_CTRL(pDte); + AssertCompile(IOMMU_DTE_CACHE_F_INTR_CTRL_MASK == IOMMU_DTE_INTR_CTRL_MASK); + fFlags |= fIntrCtrl << IOMMU_DTE_CACHE_F_INTR_CTRL_SHIFT; + } + return fFlags; +} +#endif + + +/** + * Remaps the source MSI to the destination MSI given the IRTE. + * + * @param pMsiIn The source MSI. + * @param pMsiOut Where to store the remapped MSI. + * @param pIrte The IRTE used for the remapping. + */ +static void iommuAmdIrteRemapMsi(PCMSIMSG pMsiIn, PMSIMSG pMsiOut, PCIRTE_T pIrte) +{ + /* Preserve all bits from the source MSI address and data that don't map 1:1 from the IRTE. */ + *pMsiOut = *pMsiIn; + + pMsiOut->Addr.n.u1DestMode = pIrte->n.u1DestMode; + pMsiOut->Addr.n.u8DestId = pIrte->n.u8Dest; + + pMsiOut->Data.n.u8Vector = pIrte->n.u8Vector; + pMsiOut->Data.n.u3DeliveryMode = pIrte->n.u3IntrType; +} + + +#ifdef IOMMU_WITH_DTE_CACHE +/** + * Looks up an entry in the DTE cache for the given device ID. + * + * @returns The index of the entry, or the cache capacity if no entry was found. + * @param pThis The shared IOMMU device state. + * @param idDevice The device ID (bus, device, function). + */ +DECLINLINE(uint16_t) iommuAmdDteCacheEntryLookup(PIOMMU pThis, uint16_t idDevice) +{ + uint16_t const cDeviceIds = RT_ELEMENTS(pThis->aDeviceIds); + for (uint16_t i = 0; i < cDeviceIds; i++) + { + if (pThis->aDeviceIds[i] == idDevice) + return i; + } + return cDeviceIds; +} + + +/** + * Gets an free/unused DTE cache entry. + * + * @returns The index of an unused entry, or cache capacity if the cache is full. + * @param pThis The shared IOMMU device state. + */ +DECLINLINE(uint16_t) iommuAmdDteCacheEntryGetUnused(PCIOMMU pThis) +{ + /* + * ASSUMES device ID 0 is the PCI host bridge or the IOMMU itself + * (the latter being an ugly hack) and cannot be a valid device ID. + */ + uint16_t const cDeviceIds = RT_ELEMENTS(pThis->aDeviceIds); + for (uint16_t i = 0; i < cDeviceIds; i++) + { + if (!pThis->aDeviceIds[i]) + return i; + } + return cDeviceIds; +} + + +/** + * Adds a DTE cache entry at the given index. + * + * @param pThis The shared IOMMU device state. + * @param idxDte The index of the DTE cache entry. + * @param idDevice The device ID (bus, device, function). + * @param fFlags Device flags to set, see IOMMU_DTE_CACHE_F_XXX. + * @param idDomain The domain ID. + * + * @remarks Requires the cache lock to be taken. + */ +DECL_FORCE_INLINE(void) iommuAmdDteCacheAddAtIndex(PIOMMU pThis, uint16_t idxDte, uint16_t idDevice, uint16_t fFlags, + uint16_t idDomain) +{ + pThis->aDeviceIds[idxDte] = idDevice; + pThis->aDteCache[idxDte].fFlags = fFlags; + pThis->aDteCache[idxDte].idDomain = idDomain; +} + + +/** + * Adds a DTE cache entry. + * + * @param pDevIns The IOMMU instance data. + * @param idDevice The device ID (bus, device, function). + * @param pDte The device table entry. + */ +static void iommuAmdDteCacheAdd(PPDMDEVINS pDevIns, uint16_t idDevice, PCDTE_T pDte) +{ + uint16_t const fFlags = iommuAmdGetBasicDevFlags(pDte) | IOMMU_DTE_CACHE_F_PRESENT; + uint16_t const idDomain = pDte->n.u16DomainId; + + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + IOMMU_CACHE_LOCK(pDevIns, pThis); + + uint16_t const cDteCache = RT_ELEMENTS(pThis->aDteCache); + uint16_t idxDte = iommuAmdDteCacheEntryLookup(pThis, idDevice); + if ( idxDte >= cDteCache /* Not found. */ + && (idxDte = iommuAmdDteCacheEntryGetUnused(pThis)) < cDteCache) /* Get new/unused slot index. */ + iommuAmdDteCacheAddAtIndex(pThis, idxDte, idDevice, fFlags, idDomain); + + IOMMU_CACHE_UNLOCK(pDevIns, pThis); +} + + +/** + * Updates flags for an existing DTE cache entry given its index. + * + * @param pThis The shared IOMMU device state. + * @param idxDte The index of the DTE cache entry. + * @param fOrMask Device flags to add to the existing flags, see + * IOMMU_DTE_CACHE_F_XXX. + * @param fAndMask Device flags to remove from the existing flags, see + * IOMMU_DTE_CACHE_F_XXX. + * + * @remarks Requires the cache lock to be taken. + */ +DECL_FORCE_INLINE(void) iommuAmdDteCacheUpdateFlagsForIndex(PIOMMU pThis, uint16_t idxDte, uint16_t fOrMask, uint16_t fAndMask) +{ + uint16_t const fOldFlags = pThis->aDteCache[idxDte].fFlags; + uint16_t const fNewFlags = (fOldFlags | fOrMask) & ~fAndMask; + Assert(fOldFlags & IOMMU_DTE_CACHE_F_PRESENT); + pThis->aDteCache[idxDte].fFlags = fNewFlags; +} + + +#ifdef IOMMU_WITH_IOTLBE_CACHE +/** + * Adds a new DTE cache entry or updates flags for an existing DTE cache entry. + * If the cache is full, nothing happens. + * + * @param pDevIns The IOMMU instance data. + * @param pDte The device table entry. + * @param idDevice The device ID (bus, device, function). + * @param fOrMask Device flags to add to the existing flags, see + * IOMMU_DTE_CACHE_F_XXX. + * @param fAndMask Device flags to remove from the existing flags, see + * IOMMU_DTE_CACHE_F_XXX. + */ +static void iommuAmdDteCacheAddOrUpdateFlags(PPDMDEVINS pDevIns, PCDTE_T pDte, uint16_t idDevice, uint16_t fOrMask, + uint16_t fAndMask) +{ + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + IOMMU_CACHE_LOCK(pDevIns, pThis); + + uint16_t const cDteCache = RT_ELEMENTS(pThis->aDteCache); + uint16_t idxDte = iommuAmdDteCacheEntryLookup(pThis, idDevice); + if (idxDte < cDteCache) + iommuAmdDteCacheUpdateFlagsForIndex(pThis, idxDte, fOrMask, fAndMask); + else if ((idxDte = iommuAmdDteCacheEntryGetUnused(pThis)) < cDteCache) + { + uint16_t const fFlags = (iommuAmdGetBasicDevFlags(pDte) | IOMMU_DTE_CACHE_F_PRESENT | fOrMask) & ~fAndMask; + iommuAmdDteCacheAddAtIndex(pThis, idxDte, idDevice, fFlags, pDte->n.u16DomainId); + } + /* else: cache is full, shouldn't really happen. */ + + IOMMU_CACHE_UNLOCK(pDevIns, pThis); +} +#endif + + +/** + * Updates flags for an existing DTE cache entry. + * + * @param pDevIns The IOMMU instance data. + * @param idDevice The device ID (bus, device, function). + * @param fOrMask Device flags to add to the existing flags, see + * IOMMU_DTE_CACHE_F_XXX. + * @param fAndMask Device flags to remove from the existing flags, see + * IOMMU_DTE_CACHE_F_XXX. + */ +static void iommuAmdDteCacheUpdateFlags(PPDMDEVINS pDevIns, uint16_t idDevice, uint16_t fOrMask, uint16_t fAndMask) +{ + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + IOMMU_CACHE_LOCK(pDevIns, pThis); + + uint16_t const cDteCache = RT_ELEMENTS(pThis->aDteCache); + uint16_t const idxDte = iommuAmdDteCacheEntryLookup(pThis, idDevice); + if (idxDte < cDteCache) + iommuAmdDteCacheUpdateFlagsForIndex(pThis, idxDte, fOrMask, fAndMask); + + IOMMU_CACHE_UNLOCK(pDevIns, pThis); +} + + +# ifdef IN_RING3 +/** + * Removes a DTE cache entry. + * + * @param pDevIns The IOMMU instance data. + * @param idDevice The device ID to remove cache entries for. + */ +static void iommuAmdDteCacheRemove(PPDMDEVINS pDevIns, uint16_t idDevice) +{ + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + IOMMU_CACHE_LOCK(pDevIns, pThis); + + uint16_t const cDteCache = RT_ELEMENTS(pThis->aDteCache); + uint16_t const idxDte = iommuAmdDteCacheEntryLookup(pThis, idDevice); + if (idxDte < cDteCache) + { + pThis->aDteCache[idxDte].fFlags = 0; + pThis->aDteCache[idxDte].idDomain = 0; + } + + IOMMU_CACHE_UNLOCK(pDevIns, pThis); +} + + +/** + * Removes all entries in the device table entry cache. + * + * @param pDevIns The IOMMU instance data. + */ +static void iommuAmdDteCacheRemoveAll(PPDMDEVINS pDevIns) +{ + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + IOMMU_CACHE_LOCK(pDevIns, pThis); + RT_ZERO(pThis->aDeviceIds); + RT_ZERO(pThis->aDteCache); + IOMMU_CACHE_UNLOCK(pDevIns, pThis); +} +# endif /* IN_RING3 */ +#endif /* IOMMU_WITH_DTE_CACHE */ + + +#ifdef IOMMU_WITH_IOTLBE_CACHE +/** + * Moves the IOTLB entry to the least recently used slot. + * + * @param pThisR3 The ring-3 IOMMU device state. + * @param pIotlbe The IOTLB entry to move. + */ +DECLINLINE(void) iommuAmdIotlbEntryMoveToLru(PIOMMUR3 pThisR3, PIOTLBE pIotlbe) +{ + if (!RTListNodeIsFirst(&pThisR3->LstLruIotlbe, &pIotlbe->NdLru)) + { + RTListNodeRemove(&pIotlbe->NdLru); + RTListPrepend(&pThisR3->LstLruIotlbe, &pIotlbe->NdLru); + } +} + + +/** + * Moves the IOTLB entry to the most recently used slot. + * + * @param pThisR3 The ring-3 IOMMU device state. + * @param pIotlbe The IOTLB entry to move. + */ +DECLINLINE(void) iommuAmdIotlbEntryMoveToMru(PIOMMUR3 pThisR3, PIOTLBE pIotlbe) +{ + if (!RTListNodeIsLast(&pThisR3->LstLruIotlbe, &pIotlbe->NdLru)) + { + RTListNodeRemove(&pIotlbe->NdLru); + RTListAppend(&pThisR3->LstLruIotlbe, &pIotlbe->NdLru); + } +} + + +# ifdef IN_RING3 +/** + * Dumps the IOTLB entry via the debug info helper. + * + * @returns VINF_SUCCESS. + * @param pNode Pointer to an IOTLB entry to dump info. + * @param pvUser Pointer to an IOTLBEINFOARG. + */ +static DECLCALLBACK(int) iommuAmdR3IotlbEntryInfo(PAVLU64NODECORE pNode, void *pvUser) +{ + /* Validate. */ + PCIOTLBEINFOARG pArgs = (PCIOTLBEINFOARG)pvUser; + AssertPtr(pArgs); + AssertPtr(pArgs->pIommuR3); + AssertPtr(pArgs->pHlp); + //Assert(pArgs->pIommuR3->u32Magic == IOMMU_MAGIC); + + uint16_t const idDomain = IOMMU_IOTLB_KEY_GET_DOMAIN_ID(pNode->Key); + if (idDomain == pArgs->idDomain) + { + PCIOTLBE pIotlbe = (PCIOTLBE)pNode; + AVLU64KEY const uKey = pIotlbe->Core.Key; + uint64_t const uIova = IOMMU_IOTLB_KEY_GET_IOVA(uKey); + RTGCPHYS const GCPhysSpa = pIotlbe->PageLookup.GCPhysSpa; + uint8_t const cShift = pIotlbe->PageLookup.cShift; + size_t const cbPage = RT_BIT_64(cShift); + uint8_t const fPerm = pIotlbe->PageLookup.fPerm; + const char *pszPerm = iommuAmdMemAccessGetPermName(fPerm); + bool const fEvictPending = pIotlbe->fEvictPending; + + PCDBGFINFOHLP pHlp = pArgs->pHlp; + pHlp->pfnPrintf(pHlp, " Key = %#RX64 (%#RX64)\n", uKey, uIova); + pHlp->pfnPrintf(pHlp, " GCPhys = %#RGp\n", GCPhysSpa); + pHlp->pfnPrintf(pHlp, " cShift = %u (%zu bytes)\n", cShift, cbPage); + pHlp->pfnPrintf(pHlp, " fPerm = %#x (%s)\n", fPerm, pszPerm); + pHlp->pfnPrintf(pHlp, " fEvictPending = %RTbool\n", fEvictPending); + } + + return VINF_SUCCESS; +} +# endif /* IN_RING3 */ + + +/** + * Removes the IOTLB entry if it's associated with the specified domain ID. + * + * @returns VINF_SUCCESS. + * @param pNode Pointer to an IOTLBE. + * @param pvUser Pointer to an IOTLBEFLUSHARG containing the domain ID. + */ +static DECLCALLBACK(int) iommuAmdIotlbEntryRemoveDomainId(PAVLU64NODECORE pNode, void *pvUser) +{ + /* Validate. */ + PCIOTLBEFLUSHARG pArgs = (PCIOTLBEFLUSHARG)pvUser; + AssertPtr(pArgs); + AssertPtr(pArgs->pIommuR3); + //Assert(pArgs->pIommuR3->u32Magic == IOMMU_MAGIC); + + uint16_t const idDomain = IOMMU_IOTLB_KEY_GET_DOMAIN_ID(pNode->Key); + if (idDomain == pArgs->idDomain) + { + /* Mark this entry is as invalidated and needs to be evicted later. */ + PIOTLBE pIotlbe = (PIOTLBE)pNode; + pIotlbe->fEvictPending = true; + iommuAmdIotlbEntryMoveToLru(pArgs->pIommuR3, (PIOTLBE)pNode); + } + return VINF_SUCCESS; +} + + +/** + * Destroys an IOTLB entry that's in the tree. + * + * @returns VINF_SUCCESS. + * @param pNode Pointer to an IOTLBE. + * @param pvUser Opaque data. Currently not used, will be NULL. + */ +static DECLCALLBACK(int) iommuAmdIotlbEntryDestroy(PAVLU64NODECORE pNode, void *pvUser) +{ + RT_NOREF(pvUser); + PIOTLBE pIotlbe = (PIOTLBE)pNode; + Assert(pIotlbe); + pIotlbe->NdLru.pNext = NULL; + pIotlbe->NdLru.pPrev = NULL; + RT_ZERO(pIotlbe->PageLookup); + pIotlbe->fEvictPending = false; + return VINF_SUCCESS; +} + + +/** + * Inserts an IOTLB entry into the cache. + * + * @param pThis The shared IOMMU device state. + * @param pThisR3 The ring-3 IOMMU device state. + * @param pIotlbe The IOTLB entry to initialize and insert. + * @param idDomain The domain ID. + * @param uIova The I/O virtual address. + * @param pPageLookup The I/O page lookup result of the access. + */ +static void iommuAmdIotlbEntryInsert(PIOMMU pThis, PIOMMUR3 pThisR3, PIOTLBE pIotlbe, uint16_t idDomain, uint64_t uIova, + PCIOPAGELOOKUP pPageLookup) +{ + /* Initialize the IOTLB entry with results of the I/O page walk. */ + AVLU64KEY const uKey = IOMMU_IOTLB_KEY_MAKE(idDomain, uIova); + Assert(uKey != IOMMU_IOTLB_KEY_NIL); + + /* Check if the entry already exists. */ + PIOTLBE pFound = (PIOTLBE)RTAvlU64Get(&pThisR3->TreeIotlbe, uKey); + if (!pFound) + { + /* Insert the entry into the cache. */ + pIotlbe->Core.Key = uKey; + pIotlbe->PageLookup = *pPageLookup; + Assert(!pIotlbe->fEvictPending); + + bool const fInserted = RTAvlU64Insert(&pThisR3->TreeIotlbe, &pIotlbe->Core); + Assert(fInserted); NOREF(fInserted); + Assert(pThisR3->cCachedIotlbes < IOMMU_IOTLBE_MAX); + ++pThisR3->cCachedIotlbes; + STAM_COUNTER_INC(&pThis->StatIotlbeCached); NOREF(pThis); + } + else + { + /* Update the existing entry. */ + Assert(pFound->Core.Key == uKey); + if (pFound->fEvictPending) + { + pFound->fEvictPending = false; + STAM_COUNTER_INC(&pThis->StatIotlbeLazyEvictReuse); NOREF(pThis); + } + pFound->PageLookup = *pPageLookup; + } +} + + +/** + * Removes an IOTLB entry from the cache for the given key. + * + * @returns Pointer to the removed IOTLB entry, NULL if the entry wasn't found in + * the tree. + * @param pThis The shared IOMMU device state. + * @param pThisR3 The ring-3 IOMMU device state. + * @param uKey The key of the IOTLB entry to remove. + */ +static PIOTLBE iommuAmdIotlbEntryRemove(PIOMMU pThis, PIOMMUR3 pThisR3, AVLU64KEY uKey) +{ + PIOTLBE pIotlbe = (PIOTLBE)RTAvlU64Remove(&pThisR3->TreeIotlbe, uKey); + if (pIotlbe) + { + if (pIotlbe->fEvictPending) + STAM_COUNTER_INC(&pThis->StatIotlbeLazyEvictReuse); + + RT_ZERO(pIotlbe->Core); + RT_ZERO(pIotlbe->PageLookup); + /* We must not erase the LRU node connections here! */ + pIotlbe->fEvictPending = false; + Assert(pIotlbe->Core.Key == IOMMU_IOTLB_KEY_NIL); + + Assert(pThisR3->cCachedIotlbes > 0); + --pThisR3->cCachedIotlbes; + STAM_COUNTER_DEC(&pThis->StatIotlbeCached); NOREF(pThis); + } + return pIotlbe; +} + + +/** + * Looks up an IOTLB from the cache. + * + * @returns Pointer to IOTLB entry if found, NULL otherwise. + * @param pThis The shared IOMMU device state. + * @param pThisR3 The ring-3 IOMMU device state. + * @param idDomain The domain ID. + * @param uIova The I/O virtual address. + */ +static PIOTLBE iommuAmdIotlbLookup(PIOMMU pThis, PIOMMUR3 pThisR3, uint64_t idDomain, uint64_t uIova) +{ + RT_NOREF(pThis); + + uint64_t const uKey = IOMMU_IOTLB_KEY_MAKE(idDomain, uIova); + PIOTLBE pIotlbe = (PIOTLBE)RTAvlU64Get(&pThisR3->TreeIotlbe, uKey); + if ( pIotlbe + && !pIotlbe->fEvictPending) + return pIotlbe; + + /* + * Domain Id wildcard invalidations only marks entries for eviction later but doesn't remove + * them from the cache immediately. We found an entry pending eviction, just return that + * nothing was found (rather than evicting now). + */ + return NULL; +} + + +/** + * Adds an IOTLB entry to the cache. + * + * @param pThis The shared IOMMU device state. + * @param pThisR3 The ring-3 IOMMU device state. + * @param idDomain The domain ID. + * @param uIovaPage The I/O virtual address (must be 4K aligned). + * @param pPageLookup The I/O page lookup result of the access. + */ +static void iommuAmdIotlbAdd(PIOMMU pThis, PIOMMUR3 pThisR3, uint16_t idDomain, uint64_t uIovaPage, PCIOPAGELOOKUP pPageLookup) +{ + Assert(!(uIovaPage & X86_PAGE_4K_OFFSET_MASK)); + Assert(pPageLookup); + Assert(pPageLookup->cShift <= 51); + Assert(pPageLookup->fPerm != IOMMU_IO_PERM_NONE); + + /* + * If there are no unused IOTLB entries, evict the LRU entry. + * Otherwise, get a new IOTLB entry from the pre-allocated list. + */ + if (pThisR3->idxUnusedIotlbe == IOMMU_IOTLBE_MAX) + { + /* Grab the least recently used entry. */ + PIOTLBE pIotlbe = RTListGetFirst(&pThisR3->LstLruIotlbe, IOTLBE, NdLru); + Assert(pIotlbe); + + /* If the entry is in the cache, remove it. */ + if (pIotlbe->Core.Key != IOMMU_IOTLB_KEY_NIL) + iommuAmdIotlbEntryRemove(pThis, pThisR3, pIotlbe->Core.Key); + + /* Initialize and insert the IOTLB entry into the cache. */ + iommuAmdIotlbEntryInsert(pThis, pThisR3, pIotlbe, idDomain, uIovaPage, pPageLookup); + + /* Move the entry to the most recently used slot. */ + iommuAmdIotlbEntryMoveToMru(pThisR3, pIotlbe); + } + else + { + /* Grab an unused IOTLB entry from the pre-allocated list. */ + PIOTLBE pIotlbe = &pThisR3->paIotlbes[pThisR3->idxUnusedIotlbe]; + ++pThisR3->idxUnusedIotlbe; + + /* Initialize and insert the IOTLB entry into the cache. */ + iommuAmdIotlbEntryInsert(pThis, pThisR3, pIotlbe, idDomain, uIovaPage, pPageLookup); + + /* Add the entry to the most recently used slot. */ + RTListAppend(&pThisR3->LstLruIotlbe, &pIotlbe->NdLru); + } +} + + +/** + * Removes all IOTLB entries from the cache. + * + * @param pDevIns The IOMMU instance data. + */ +static void iommuAmdIotlbRemoveAll(PPDMDEVINS pDevIns) +{ + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3); + IOMMU_CACHE_LOCK(pDevIns, pThis); + + if (pThisR3->cCachedIotlbes > 0) + { + RTAvlU64Destroy(&pThisR3->TreeIotlbe, iommuAmdIotlbEntryDestroy, NULL /* pvParam */); + RTListInit(&pThisR3->LstLruIotlbe); + pThisR3->idxUnusedIotlbe = 0; + pThisR3->cCachedIotlbes = 0; + STAM_COUNTER_RESET(&pThis->StatIotlbeCached); + } + + IOMMU_CACHE_UNLOCK(pDevIns, pThis); +} + + +/** + * Removes IOTLB entries for the range of I/O virtual addresses and the specified + * domain ID from the cache. + * + * @param pDevIns The IOMMU instance data. + * @param idDomain The domain ID. + * @param uIova The I/O virtual address to invalidate. + * @param cbInvalidate The size of the invalidation (must be 4K aligned). + */ +static void iommuAmdIotlbRemoveRange(PPDMDEVINS pDevIns, uint16_t idDomain, uint64_t uIova, size_t cbInvalidate) +{ + /* Validate. */ + Assert(!(uIova & X86_PAGE_4K_OFFSET_MASK)); + Assert(!(cbInvalidate & X86_PAGE_4K_OFFSET_MASK)); + Assert(cbInvalidate >= X86_PAGE_4K_SIZE); + + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3); + IOMMU_CACHE_LOCK(pDevIns, pThis); + + do + { + uint64_t const uKey = IOMMU_IOTLB_KEY_MAKE(idDomain, uIova); + PIOTLBE pIotlbe = iommuAmdIotlbEntryRemove(pThis, pThisR3, uKey); + if (pIotlbe) + iommuAmdIotlbEntryMoveToLru(pThisR3, pIotlbe); + uIova += X86_PAGE_4K_SIZE; + cbInvalidate -= X86_PAGE_4K_SIZE; + } while (cbInvalidate > 0); + + IOMMU_CACHE_UNLOCK(pDevIns, pThis); +} + + +/** + * Removes all IOTLB entries for the specified domain ID. + * + * @param pDevIns The IOMMU instance data. + * @param idDomain The domain ID. + */ +static void iommuAmdIotlbRemoveDomainId(PPDMDEVINS pDevIns, uint16_t idDomain) +{ + /* + * We need to iterate the tree and search based on the domain ID. + * But it seems we cannot remove items while iterating the tree. + * Thus, we simply mark entries for eviction later but move them to the LRU + * so they will eventually get evicted and re-cycled as the cache gets re-populated. + */ + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3); + IOMMU_CACHE_LOCK(pDevIns, pThis); + + IOTLBEFLUSHARG Args; + Args.pIommuR3 = pThisR3; + Args.idDomain = idDomain; + RTAvlU64DoWithAll(&pThisR3->TreeIotlbe, true /* fFromLeft */, iommuAmdIotlbEntryRemoveDomainId, &Args); + + IOMMU_CACHE_UNLOCK(pDevIns, pThis); +} + + +/** + * Adds or updates IOTLB entries for the given range of I/O virtual addresses. + * + * @param pDevIns The IOMMU instance data. + * @param idDomain The domain ID. + * @param uIovaPage The I/O virtual address (must be 4K aligned). + * @param cbContiguous The size of the access. + * @param pAddrOut The translated I/O address lookup. + * + * @remarks All pages in the range specified by @c cbContiguous must have identical + * permissions and page sizes. + */ +static void iommuAmdIotlbAddRange(PPDMDEVINS pDevIns, uint16_t idDomain, uint64_t uIovaPage, size_t cbContiguous, + PCIOPAGELOOKUP pAddrOut) +{ + Assert(!(uIovaPage & X86_PAGE_4K_OFFSET_MASK)); + + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3); + + IOPAGELOOKUP PageLookup; + PageLookup.GCPhysSpa = pAddrOut->GCPhysSpa & X86_PAGE_4K_BASE_MASK; + PageLookup.cShift = pAddrOut->cShift; + PageLookup.fPerm = pAddrOut->fPerm; + + size_t const cbIova = RT_ALIGN_Z(cbContiguous, X86_PAGE_4K_SIZE); + Assert(!(cbIova & X86_PAGE_4K_OFFSET_MASK)); + Assert(cbIova >= X86_PAGE_4K_SIZE); + + size_t cPages = cbIova / X86_PAGE_4K_SIZE; + cPages = RT_MIN(cPages, IOMMU_IOTLBE_MAX); + + IOMMU_CACHE_LOCK(pDevIns, pThis); + /** @todo Re-check DTE cache? */ + /* + * Add IOTLB entries for every page in the access. + * The page size and permissions are assumed to be identical to every + * page in this access. + */ + while (cPages > 0) + { + iommuAmdIotlbAdd(pThis, pThisR3, idDomain, uIovaPage, &PageLookup); + uIovaPage += X86_PAGE_4K_SIZE; + PageLookup.GCPhysSpa += X86_PAGE_4K_SIZE; + --cPages; + } + IOMMU_CACHE_UNLOCK(pDevIns, pThis); +} +#endif /* IOMMU_WITH_IOTLBE_CACHE */ + + +#ifdef IOMMU_WITH_IRTE_CACHE +/** + * Looks up an IRTE cache entry. + * + * @returns Index of the found entry, or cache capacity if not found. + * @param pThis The shared IOMMU device state. + * @param idDevice The device ID (bus, device, function). + * @param offIrte The offset into the interrupt remap table. + */ +static uint16_t iommuAmdIrteCacheEntryLookup(PCIOMMU pThis, uint16_t idDevice, uint16_t offIrte) +{ + /** @todo Consider sorting and binary search when the cache capacity grows. + * For the IRTE cache this should be okay since typically guests do not alter the + * interrupt remapping once programmed, so hopefully sorting shouldn't happen + * often. */ + uint32_t const uKey = IOMMU_IRTE_CACHE_KEY_MAKE(idDevice, offIrte); + uint16_t const cIrteCache = RT_ELEMENTS(pThis->aIrteCache); + for (uint16_t i = 0; i < cIrteCache; i++) + if (pThis->aIrteCache[i].uKey == uKey) + return i; + return cIrteCache; +} + + +/** + * Gets a free/unused IRTE cache entry. + * + * @returns The index of an unused entry, or cache capacity if the cache is full. + * @param pThis The shared IOMMU device state. + */ +static uint16_t iommuAmdIrteCacheEntryGetUnused(PCIOMMU pThis) +{ + uint16_t const cIrteCache = RT_ELEMENTS(pThis->aIrteCache); + for (uint16_t i = 0; i < cIrteCache; i++) + if (pThis->aIrteCache[i].uKey == IOMMU_IRTE_CACHE_KEY_NIL) + { + Assert(!pThis->aIrteCache[i].Irte.u32); + return i; + } + return cIrteCache; +} + + +/** + * Looks up the IRTE cache for the given MSI. + * + * @returns VBox status code. + * @param pDevIns The IOMMU instance data. + * @param idDevice The device ID (bus, device, function). + * @param enmOp The IOMMU operation being performed. + * @param pMsiIn The source MSI. + * @param pMsiOut Where to store the remapped MSI. + */ +static int iommuAmdIrteCacheLookup(PPDMDEVINS pDevIns, uint16_t idDevice, IOMMUOP enmOp, PCMSIMSG pMsiIn, PMSIMSG pMsiOut) +{ + RT_NOREF(enmOp); /* May need it if we have to report errors (currently we fallback to the slower path to do that). */ + + int rc = VERR_NOT_FOUND; + /* Deal with such cases in the slower/fallback path. */ + if ((pMsiIn->Addr.u64 & VBOX_MSI_ADDR_ADDR_MASK) == VBOX_MSI_ADDR_BASE) + { /* likely */ } + else + return rc; + + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + IOMMU_CACHE_LOCK(pDevIns, pThis); + + uint16_t const idxDteCache = iommuAmdDteCacheEntryLookup(pThis, idDevice); + if (idxDteCache < RT_ELEMENTS(pThis->aDteCache)) + { + PCDTECACHE pDteCache = &pThis->aDteCache[idxDteCache]; + if ((pDteCache->fFlags & (IOMMU_DTE_CACHE_F_PRESENT | IOMMU_DTE_CACHE_F_INTR_MAP_VALID)) + == (IOMMU_DTE_CACHE_F_PRESENT | IOMMU_DTE_CACHE_F_INTR_MAP_VALID)) + { + Assert((pMsiIn->Addr.u64 & VBOX_MSI_ADDR_ADDR_MASK) == VBOX_MSI_ADDR_BASE); /* Paranoia. */ + + /* Currently, we only cache remapping of fixed and arbitrated interrupts. */ + uint8_t const u8DeliveryMode = pMsiIn->Data.n.u3DeliveryMode; + if (u8DeliveryMode <= VBOX_MSI_DELIVERY_MODE_LOWEST_PRIO) + { + uint8_t const uIntrCtrl = (pDteCache->fFlags >> IOMMU_DTE_CACHE_F_INTR_CTRL_SHIFT) + & IOMMU_DTE_CACHE_F_INTR_CTRL_MASK; + if (uIntrCtrl == IOMMU_INTR_CTRL_REMAP) + { + /* Interrupt table length has been verified prior to adding entries to the cache. */ + uint16_t const offIrte = IOMMU_GET_IRTE_OFF(pMsiIn->Data.u32); + uint16_t const idxIrteCache = iommuAmdIrteCacheEntryLookup(pThis, idDevice, offIrte); + if (idxIrteCache < RT_ELEMENTS(pThis->aIrteCache)) + { + PCIRTE_T pIrte = &pThis->aIrteCache[idxIrteCache].Irte; + Assert(pIrte->n.u1RemapEnable); + Assert(pIrte->n.u3IntrType <= VBOX_MSI_DELIVERY_MODE_LOWEST_PRIO); + iommuAmdIrteRemapMsi(pMsiIn, pMsiOut, pIrte); + rc = VINF_SUCCESS; + } + } + else if (uIntrCtrl == IOMMU_INTR_CTRL_FWD_UNMAPPED) + { + *pMsiOut = *pMsiIn; + rc = VINF_SUCCESS; + } + } + } + else if (pDteCache->fFlags & IOMMU_DTE_CACHE_F_PRESENT) + { + *pMsiOut = *pMsiIn; + rc = VINF_SUCCESS; + } + } + + IOMMU_CACHE_UNLOCK(pDevIns, pThis); + return rc; +} + + +/** + * Adds or updates the IRTE cache for the given IRTE. + * + * @returns VBox status code. + * @retval VERR_OUT_OF_RESOURCES if the cache is full. + * + * @param pDevIns The IOMMU instance data. + * @param idDevice The device ID (bus, device, function). + * @param offIrte The offset into the interrupt remap table. + * @param pIrte The IRTE to cache. + */ +static int iommuAmdIrteCacheAdd(PPDMDEVINS pDevIns, uint16_t idDevice, uint16_t offIrte, PCIRTE_T pIrte) +{ + Assert(offIrte != 0xffff); /* Shouldn't be a valid IRTE table offset since sizeof(IRTE) is a multiple of 4. */ + + int rc = VINF_SUCCESS; + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + Assert(idDevice != pThis->uPciAddress); + IOMMU_CACHE_LOCK(pDevIns, pThis); + + /* Find an existing entry or get an unused slot. */ + uint16_t const cIrteCache = RT_ELEMENTS(pThis->aIrteCache); + uint16_t idxIrteCache = iommuAmdIrteCacheEntryLookup(pThis, idDevice, offIrte); + if ( idxIrteCache < cIrteCache + || (idxIrteCache = iommuAmdIrteCacheEntryGetUnused(pThis)) < cIrteCache) + { + pThis->aIrteCache[idxIrteCache].uKey = IOMMU_IRTE_CACHE_KEY_MAKE(idDevice, offIrte); + pThis->aIrteCache[idxIrteCache].Irte = *pIrte; + } + else + rc = VERR_OUT_OF_RESOURCES; + + IOMMU_CACHE_UNLOCK(pDevIns, pThis); + return rc; +} + + +# ifdef IN_RING3 +/** + * Removes IRTE cache entries for the given device ID. + * + * @param pDevIns The IOMMU instance data. + * @param idDevice The device ID (bus, device, function). + */ +static void iommuAmdIrteCacheRemove(PPDMDEVINS pDevIns, uint16_t idDevice) +{ + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + IOMMU_CACHE_LOCK(pDevIns, pThis); + uint16_t const cIrteCache = RT_ELEMENTS(pThis->aIrteCache); + for (uint16_t i = 0; i < cIrteCache; i++) + { + PIRTECACHE pIrteCache = &pThis->aIrteCache[i]; + if (idDevice == IOMMU_IRTE_CACHE_KEY_GET_DEVICE_ID(pIrteCache->uKey)) + { + pIrteCache->uKey = IOMMU_IRTE_CACHE_KEY_NIL; + pIrteCache->Irte.u32 = 0; + /* There could multiple IRTE entries for a device ID, continue searching. */ + } + } + IOMMU_CACHE_UNLOCK(pDevIns, pThis); +} + + +/** + * Removes all IRTE cache entries. + * + * @param pDevIns The IOMMU instance data. + */ +static void iommuAmdIrteCacheRemoveAll(PPDMDEVINS pDevIns) +{ + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + IOMMU_CACHE_LOCK(pDevIns, pThis); + uint16_t const cIrteCache = RT_ELEMENTS(pThis->aIrteCache); + for (uint16_t i = 0; i < cIrteCache; i++) + { + pThis->aIrteCache[i].uKey = IOMMU_IRTE_CACHE_KEY_NIL; + pThis->aIrteCache[i].Irte.u32 = 0; + } + IOMMU_CACHE_UNLOCK(pDevIns, pThis); +} +# endif /* IN_RING3 */ +#endif /* IOMMU_WITH_IRTE_CACHE */ + + +/** + * Atomically reads the control register without locking the IOMMU device. + * + * @returns The control register. + * @param pThis The shared IOMMU device state. + */ +DECL_FORCE_INLINE(IOMMU_CTRL_T) iommuAmdGetCtrlUnlocked(PCIOMMU pThis) +{ + IOMMU_CTRL_T Ctrl; + Ctrl.u64 = ASMAtomicReadU64((volatile uint64_t *)&pThis->Ctrl.u64); + return Ctrl; +} + + +/** + * Returns whether MSI is enabled for the IOMMU. + * + * @returns Whether MSI is enabled. + * @param pDevIns The IOMMU device instance. + * + * @note There should be a PCIDevXxx function for this. + */ +static bool iommuAmdIsMsiEnabled(PPDMDEVINS pDevIns) +{ + MSI_CAP_HDR_T MsiCapHdr; + MsiCapHdr.u32 = PDMPciDevGetDWord(pDevIns->apPciDevs[0], IOMMU_PCI_OFF_MSI_CAP_HDR); + return MsiCapHdr.n.u1MsiEnable; +} + + +/** + * Signals a PCI target abort. + * + * @param pDevIns The IOMMU device instance. + */ +static void iommuAmdSetPciTargetAbort(PPDMDEVINS pDevIns) +{ + PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0]; + uint16_t const u16Status = PDMPciDevGetStatus(pPciDev) | VBOX_PCI_STATUS_SIG_TARGET_ABORT; + PDMPciDevSetStatus(pPciDev, u16Status); +} + + +/** + * Wakes up the command thread if there are commands to be processed. + * + * @param pDevIns The IOMMU device instance. + * + * @remarks The IOMMU lock must be held while calling this! + */ +static void iommuAmdCmdThreadWakeUpIfNeeded(PPDMDEVINS pDevIns) +{ + Log4Func(("\n")); + + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + if ( pThis->Status.n.u1CmdBufRunning + && pThis->CmdBufTailPtr.n.off != pThis->CmdBufHeadPtr.n.off + && !ASMAtomicXchgBool(&pThis->fCmdThreadSignaled, true)) + { + Log4Func(("Signaling command thread\n")); + PDMDevHlpSUPSemEventSignal(pDevIns, pThis->hEvtCmdThread); + } +} + + +/** + * Reads the Device Table Base Address Register. + */ +static VBOXSTRICTRC iommuAmdDevTabBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) +{ + RT_NOREF(pDevIns, offReg); + *pu64Value = pThis->aDevTabBaseAddrs[0].u64; + return VINF_SUCCESS; +} + + +/** + * Reads the Command Buffer Base Address Register. + */ +static VBOXSTRICTRC iommuAmdCmdBufBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) +{ + RT_NOREF(pDevIns, offReg); + *pu64Value = pThis->CmdBufBaseAddr.u64; + return VINF_SUCCESS; +} + + +/** + * Reads the Event Log Base Address Register. + */ +static VBOXSTRICTRC iommuAmdEvtLogBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) +{ + RT_NOREF(pDevIns, offReg); + *pu64Value = pThis->EvtLogBaseAddr.u64; + return VINF_SUCCESS; +} + + +/** + * Reads the Control Register. + */ +static VBOXSTRICTRC iommuAmdCtrl_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) +{ + RT_NOREF(pDevIns, offReg); + *pu64Value = pThis->Ctrl.u64; + return VINF_SUCCESS; +} + + +/** + * Reads the Exclusion Range Base Address Register. + */ +static VBOXSTRICTRC iommuAmdExclRangeBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) +{ + RT_NOREF(pDevIns, offReg); + *pu64Value = pThis->ExclRangeBaseAddr.u64; + return VINF_SUCCESS; +} + + +/** + * Reads to the Exclusion Range Limit Register. + */ +static VBOXSTRICTRC iommuAmdExclRangeLimit_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) +{ + RT_NOREF(pDevIns, offReg); + *pu64Value = pThis->ExclRangeLimit.u64; + return VINF_SUCCESS; +} + + +/** + * Reads to the Extended Feature Register. + */ +static VBOXSTRICTRC iommuAmdExtFeat_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) +{ + RT_NOREF(pDevIns, offReg); + *pu64Value = pThis->ExtFeat.u64; + return VINF_SUCCESS; +} + + +/** + * Reads to the PPR Log Base Address Register. + */ +static VBOXSTRICTRC iommuAmdPprLogBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) +{ + RT_NOREF(pDevIns, offReg); + *pu64Value = pThis->PprLogBaseAddr.u64; + return VINF_SUCCESS; +} + + +/** + * Writes the Hardware Event Register (Hi). + */ +static VBOXSTRICTRC iommuAmdHwEvtHi_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) +{ + RT_NOREF(pDevIns, offReg); + *pu64Value = pThis->HwEvtHi.u64; + return VINF_SUCCESS; +} + + +/** + * Reads the Hardware Event Register (Lo). + */ +static VBOXSTRICTRC iommuAmdHwEvtLo_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) +{ + RT_NOREF(pDevIns, offReg); + *pu64Value = pThis->HwEvtLo; + return VINF_SUCCESS; +} + + +/** + * Reads the Hardware Event Status Register. + */ +static VBOXSTRICTRC iommuAmdHwEvtStatus_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) +{ + RT_NOREF(pDevIns, offReg); + *pu64Value = pThis->HwEvtStatus.u64; + return VINF_SUCCESS; +} + + +/** + * Reads to the GA Log Base Address Register. + */ +static VBOXSTRICTRC iommuAmdGALogBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) +{ + RT_NOREF(pDevIns, offReg); + *pu64Value = pThis->GALogBaseAddr.u64; + return VINF_SUCCESS; +} + + +/** + * Reads to the PPR Log B Base Address Register. + */ +static VBOXSTRICTRC iommuAmdPprLogBBaseAddr_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) +{ + RT_NOREF(pDevIns, offReg); + *pu64Value = pThis->PprLogBBaseAddr.u64; + return VINF_SUCCESS; +} + + +/** + * Reads to the Event Log B Base Address Register. + */ +static VBOXSTRICTRC iommuAmdEvtLogBBaseAddr_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) +{ + RT_NOREF(pDevIns, offReg); + *pu64Value = pThis->EvtLogBBaseAddr.u64; + return VINF_SUCCESS; +} + + +/** + * Reads the Device Table Segment Base Address Register. + */ +static VBOXSTRICTRC iommuAmdDevTabSegBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) +{ + RT_NOREF(pDevIns); + + /* Figure out which segment is being written. */ + uint8_t const offSegment = (offReg - IOMMU_MMIO_OFF_DEV_TAB_SEG_FIRST) >> 3; + uint8_t const idxSegment = offSegment + 1; + Assert(idxSegment < RT_ELEMENTS(pThis->aDevTabBaseAddrs)); + + *pu64Value = pThis->aDevTabBaseAddrs[idxSegment].u64; + return VINF_SUCCESS; +} + + +/** + * Reads the Device Specific Feature Extension (DSFX) Register. + */ +static VBOXSTRICTRC iommuAmdDevSpecificFeat_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) +{ + RT_NOREF(pDevIns, offReg); + *pu64Value = pThis->DevSpecificFeat.u64; + return VINF_SUCCESS; +} + +/** + * Reads the Device Specific Control Extension (DSCX) Register. + */ +static VBOXSTRICTRC iommuAmdDevSpecificCtrl_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) +{ + RT_NOREF(pDevIns, offReg); + *pu64Value = pThis->DevSpecificCtrl.u64; + return VINF_SUCCESS; +} + + +/** + * Reads the Device Specific Status Extension (DSSX) Register. + */ +static VBOXSTRICTRC iommuAmdDevSpecificStatus_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) +{ + RT_NOREF(pDevIns, offReg); + *pu64Value = pThis->DevSpecificStatus.u64; + return VINF_SUCCESS; +} + + +/** + * Reads the MSI Vector Register 0 (32-bit) and the MSI Vector Register 1 (32-bit). + */ +static VBOXSTRICTRC iommuAmdDevMsiVector_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) +{ + RT_NOREF(pDevIns, offReg); + uint32_t const uLo = pThis->MiscInfo.au32[0]; + uint32_t const uHi = pThis->MiscInfo.au32[1]; + *pu64Value = RT_MAKE_U64(uLo, uHi); + return VINF_SUCCESS; +} + + +/** + * Reads the MSI Capability Header Register (32-bit) and the MSI Address (Lo) + * Register (32-bit). + */ +static VBOXSTRICTRC iommuAmdMsiCapHdrAndAddrLo_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) +{ + RT_NOREF(pThis, offReg); + PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0]; + PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev); + uint32_t const uLo = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_CAP_HDR); + uint32_t const uHi = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_LO); + *pu64Value = RT_MAKE_U64(uLo, uHi); + return VINF_SUCCESS; +} + + +/** + * Reads the MSI Address (Hi) Register (32-bit) and the MSI data register (32-bit). + */ +static VBOXSTRICTRC iommuAmdMsiAddrHiAndData_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) +{ + RT_NOREF(pThis, offReg); + PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0]; + PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev); + uint32_t const uLo = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_HI); + uint32_t const uHi = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_DATA); + *pu64Value = RT_MAKE_U64(uLo, uHi); + return VINF_SUCCESS; +} + + +/** + * Reads the Command Buffer Head Pointer Register. + */ +static VBOXSTRICTRC iommuAmdCmdBufHeadPtr_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) +{ + RT_NOREF(pDevIns, offReg); + *pu64Value = pThis->CmdBufHeadPtr.u64; + return VINF_SUCCESS; +} + + +/** + * Reads the Command Buffer Tail Pointer Register. + */ +static VBOXSTRICTRC iommuAmdCmdBufTailPtr_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) +{ + RT_NOREF(pDevIns, offReg); + *pu64Value = pThis->CmdBufTailPtr.u64; + return VINF_SUCCESS; +} + + +/** + * Reads the Event Log Head Pointer Register. + */ +static VBOXSTRICTRC iommuAmdEvtLogHeadPtr_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) +{ + RT_NOREF(pDevIns, offReg); + *pu64Value = pThis->EvtLogHeadPtr.u64; + return VINF_SUCCESS; +} + + +/** + * Reads the Event Log Tail Pointer Register. + */ +static VBOXSTRICTRC iommuAmdEvtLogTailPtr_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) +{ + RT_NOREF(pDevIns, offReg); + *pu64Value = pThis->EvtLogTailPtr.u64; + return VINF_SUCCESS; +} + + +/** + * Reads the Status Register. + */ +static VBOXSTRICTRC iommuAmdStatus_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) +{ + RT_NOREF(pDevIns, offReg); + *pu64Value = pThis->Status.u64; + return VINF_SUCCESS; +} + + +/** + * Writes the Device Table Base Address Register. + */ +static VBOXSTRICTRC iommuAmdDevTabBar_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value) +{ + RT_NOREF(pDevIns, offReg); + + /* Mask out all unrecognized bits. */ + u64Value &= IOMMU_DEV_TAB_BAR_VALID_MASK; + + /* Update the register. */ + pThis->aDevTabBaseAddrs[0].u64 = u64Value; + + /* Paranoia. */ + Assert(pThis->aDevTabBaseAddrs[0].n.u9Size <= g_auDevTabSegMaxSizes[0]); + return VINF_SUCCESS; +} + + +/** + * Writes the Command Buffer Base Address Register. + */ +static VBOXSTRICTRC iommuAmdCmdBufBar_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value) +{ + RT_NOREF(pDevIns, offReg); + + /* + * While this is not explicitly specified like the event log base address register, + * the AMD IOMMU spec. does specify "CmdBufRun must be 0b to modify the command buffer registers properly". + * Inconsistent specs :/ + */ + if (pThis->Status.n.u1CmdBufRunning) + { + LogFunc(("Setting CmdBufBar (%#RX64) when command buffer is running -> Ignored\n", u64Value)); + return VINF_SUCCESS; + } + + /* Mask out all unrecognized bits. */ + CMD_BUF_BAR_T CmdBufBaseAddr; + CmdBufBaseAddr.u64 = u64Value & IOMMU_CMD_BUF_BAR_VALID_MASK; + + /* Validate the length. */ + if (CmdBufBaseAddr.n.u4Len >= 8) + { + /* Update the register. */ + pThis->CmdBufBaseAddr.u64 = CmdBufBaseAddr.u64; + + /* + * Writing the command buffer base address, clears the command buffer head and tail pointers. + * See AMD IOMMU spec. 2.4 "Commands". + */ + pThis->CmdBufHeadPtr.u64 = 0; + pThis->CmdBufTailPtr.u64 = 0; + } + else + LogFunc(("Command buffer length (%#x) invalid -> Ignored\n", CmdBufBaseAddr.n.u4Len)); + + return VINF_SUCCESS; +} + + +/** + * Writes the Event Log Base Address Register. + */ +static VBOXSTRICTRC iommuAmdEvtLogBar_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value) +{ + RT_NOREF(pDevIns, offReg); + + /* + * IOMMU behavior is undefined when software writes this register when event logging is running. + * In our emulation, we ignore the write entirely. + * See AMD IOMMU spec. "Event Log Base Address Register". + */ + if (pThis->Status.n.u1EvtLogRunning) + { + LogFunc(("Setting EvtLogBar (%#RX64) when event logging is running -> Ignored\n", u64Value)); + return VINF_SUCCESS; + } + + /* Mask out all unrecognized bits. */ + u64Value &= IOMMU_EVT_LOG_BAR_VALID_MASK; + EVT_LOG_BAR_T EvtLogBaseAddr; + EvtLogBaseAddr.u64 = u64Value; + + /* Validate the length. */ + if (EvtLogBaseAddr.n.u4Len >= 8) + { + /* Update the register. */ + pThis->EvtLogBaseAddr.u64 = EvtLogBaseAddr.u64; + + /* + * Writing the event log base address, clears the event log head and tail pointers. + * See AMD IOMMU spec. 2.5 "Event Logging". + */ + pThis->EvtLogHeadPtr.u64 = 0; + pThis->EvtLogTailPtr.u64 = 0; + } + else + LogFunc(("Event log length (%#x) invalid -> Ignored\n", EvtLogBaseAddr.n.u4Len)); + + return VINF_SUCCESS; +} + + +/** + * Writes the Control Register. + */ +static VBOXSTRICTRC iommuAmdCtrl_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value) +{ + RT_NOREF(pDevIns, offReg); + + /* Mask out all unrecognized bits. */ + u64Value &= IOMMU_CTRL_VALID_MASK; + IOMMU_CTRL_T NewCtrl; + NewCtrl.u64 = u64Value; + + /* Ensure the device table segments are within limits. */ + if (NewCtrl.n.u3DevTabSegEn <= pThis->ExtFeat.n.u2DevTabSegSup) + { + IOMMU_CTRL_T const OldCtrl = pThis->Ctrl; + + /* Update the register. */ + ASMAtomicWriteU64(&pThis->Ctrl.u64, NewCtrl.u64); + + bool const fNewIommuEn = NewCtrl.n.u1IommuEn; + bool const fOldIommuEn = OldCtrl.n.u1IommuEn; + + /* Enable or disable event logging when the bit transitions. */ + bool const fOldEvtLogEn = OldCtrl.n.u1EvtLogEn; + bool const fNewEvtLogEn = NewCtrl.n.u1EvtLogEn; + if ( fOldEvtLogEn != fNewEvtLogEn + || fOldIommuEn != fNewIommuEn) + { + if ( fNewIommuEn + && fNewEvtLogEn) + { + ASMAtomicAndU64(&pThis->Status.u64, ~IOMMU_STATUS_EVT_LOG_OVERFLOW); + ASMAtomicOrU64(&pThis->Status.u64, IOMMU_STATUS_EVT_LOG_RUNNING); + } + else + ASMAtomicAndU64(&pThis->Status.u64, ~IOMMU_STATUS_EVT_LOG_RUNNING); + } + + /* Enable or disable command buffer processing when the bit transitions. */ + bool const fOldCmdBufEn = OldCtrl.n.u1CmdBufEn; + bool const fNewCmdBufEn = NewCtrl.n.u1CmdBufEn; + if ( fOldCmdBufEn != fNewCmdBufEn + || fOldIommuEn != fNewIommuEn) + { + if ( fNewCmdBufEn + && fNewIommuEn) + { + ASMAtomicOrU64(&pThis->Status.u64, IOMMU_STATUS_CMD_BUF_RUNNING); + LogFunc(("Command buffer enabled\n")); + + /* Wake up the command thread to start processing commands if any. */ + iommuAmdCmdThreadWakeUpIfNeeded(pDevIns); + } + else + { + ASMAtomicAndU64(&pThis->Status.u64, ~IOMMU_STATUS_CMD_BUF_RUNNING); + LogFunc(("Command buffer disabled\n")); + } + } + } + else + { + LogFunc(("Invalid number of device table segments enabled, exceeds %#x (%#RX64) -> Ignored!\n", + pThis->ExtFeat.n.u2DevTabSegSup, NewCtrl.u64)); + } + + return VINF_SUCCESS; +} + + +/** + * Writes to the Exclusion Range Base Address Register. + */ +static VBOXSTRICTRC iommuAmdExclRangeBar_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value) +{ + RT_NOREF(pDevIns, offReg); + pThis->ExclRangeBaseAddr.u64 = u64Value & IOMMU_EXCL_RANGE_BAR_VALID_MASK; + return VINF_SUCCESS; +} + + +/** + * Writes to the Exclusion Range Limit Register. + */ +static VBOXSTRICTRC iommuAmdExclRangeLimit_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value) +{ + RT_NOREF(pDevIns, offReg); + u64Value &= IOMMU_EXCL_RANGE_LIMIT_VALID_MASK; + u64Value |= UINT64_C(0xfff); + pThis->ExclRangeLimit.u64 = u64Value; + return VINF_SUCCESS; +} + + +/** + * Writes the Hardware Event Register (Hi). + */ +static VBOXSTRICTRC iommuAmdHwEvtHi_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value) +{ + /** @todo IOMMU: Why the heck is this marked read/write by the AMD IOMMU spec? */ + RT_NOREF(pDevIns, offReg); + LogFlowFunc(("Writing %#RX64 to hardware event (Hi) register!\n", u64Value)); + pThis->HwEvtHi.u64 = u64Value; + return VINF_SUCCESS; +} + + +/** + * Writes the Hardware Event Register (Lo). + */ +static VBOXSTRICTRC iommuAmdHwEvtLo_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value) +{ + /** @todo IOMMU: Why the heck is this marked read/write by the AMD IOMMU spec? */ + RT_NOREF(pDevIns, offReg); + LogFlowFunc(("Writing %#RX64 to hardware event (Lo) register!\n", u64Value)); + pThis->HwEvtLo = u64Value; + return VINF_SUCCESS; +} + + +/** + * Writes the Hardware Event Status Register. + */ +static VBOXSTRICTRC iommuAmdHwEvtStatus_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value) +{ + RT_NOREF(pDevIns, offReg); + + /* Mask out all unrecognized bits. */ + u64Value &= IOMMU_HW_EVT_STATUS_VALID_MASK; + + /* + * The two bits (HEO and HEV) are RW1C (Read/Write 1-to-Clear; writing 0 has no effect). + * If the current status bits or the bits being written are both 0, we've nothing to do. + * The Overflow bit (bit 1) is only valid when the Valid bit (bit 0) is 1. + */ + uint64_t HwStatus = pThis->HwEvtStatus.u64; + if (!(HwStatus & RT_BIT(0))) + return VINF_SUCCESS; + if (u64Value & HwStatus & RT_BIT_64(0)) + HwStatus &= ~RT_BIT_64(0); + if (u64Value & HwStatus & RT_BIT_64(1)) + HwStatus &= ~RT_BIT_64(1); + + /* Update the register. */ + pThis->HwEvtStatus.u64 = HwStatus; + return VINF_SUCCESS; +} + + +/** + * Writes the Device Table Segment Base Address Register. + */ +static VBOXSTRICTRC iommuAmdDevTabSegBar_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value) +{ + RT_NOREF(pDevIns); + + /* Figure out which segment is being written. */ + uint8_t const offSegment = (offReg - IOMMU_MMIO_OFF_DEV_TAB_SEG_FIRST) >> 3; + uint8_t const idxSegment = offSegment + 1; + Assert(idxSegment < RT_ELEMENTS(pThis->aDevTabBaseAddrs)); + + /* Mask out all unrecognized bits. */ + u64Value &= IOMMU_DEV_TAB_SEG_BAR_VALID_MASK; + DEV_TAB_BAR_T DevTabSegBar; + DevTabSegBar.u64 = u64Value; + + /* Validate the size. */ + uint16_t const uSegSize = DevTabSegBar.n.u9Size; + uint16_t const uMaxSegSize = g_auDevTabSegMaxSizes[idxSegment]; + if (uSegSize <= uMaxSegSize) + { + /* Update the register. */ + pThis->aDevTabBaseAddrs[idxSegment].u64 = u64Value; + } + else + LogFunc(("Device table segment (%u) size invalid (%#RX32) -> Ignored\n", idxSegment, uSegSize)); + + return VINF_SUCCESS; +} + + +/** + * Writes the MSI Vector Register 0 (32-bit) and the MSI Vector Register 1 (32-bit). + */ +static VBOXSTRICTRC iommuAmdDevMsiVector_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value) +{ + RT_NOREF(pDevIns, offReg); + + /* MSI Vector Register 0 is read-only. */ + /* MSI Vector Register 1. */ + uint32_t const uReg = u64Value >> 32; + pThis->MiscInfo.au32[1] = uReg & IOMMU_MSI_VECTOR_1_VALID_MASK; + return VINF_SUCCESS; +} + + +/** + * Writes the MSI Capability Header Register (32-bit) or the MSI Address (Lo) + * Register (32-bit). + */ +static VBOXSTRICTRC iommuAmdMsiCapHdrAndAddrLo_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value) +{ + RT_NOREF(pThis, offReg); + PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0]; + PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev); + + /* MSI capability header. */ + { + uint32_t const uReg = u64Value; + MSI_CAP_HDR_T MsiCapHdr; + MsiCapHdr.u32 = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_CAP_HDR); + MsiCapHdr.n.u1MsiEnable = RT_BOOL(uReg & IOMMU_MSI_CAP_HDR_MSI_EN_MASK); + PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_CAP_HDR, MsiCapHdr.u32); + } + + /* MSI Address Lo. */ + { + uint32_t const uReg = u64Value >> 32; + uint32_t const uMsiAddrLo = uReg & VBOX_MSI_ADDR_VALID_MASK; + PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_LO, uMsiAddrLo); + } + + return VINF_SUCCESS; +} + + +/** + * Writes the MSI Address (Hi) Register (32-bit) or the MSI data register (32-bit). + */ +static VBOXSTRICTRC iommuAmdMsiAddrHiAndData_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value) +{ + RT_NOREF(pThis, offReg); + PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0]; + PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev); + + /* MSI Address Hi. */ + { + uint32_t const uReg = u64Value; + PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_HI, uReg); + } + + /* MSI Data. */ + { + uint32_t const uReg = u64Value >> 32; + uint32_t const uMsiData = uReg & VBOX_MSI_DATA_VALID_MASK; + PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_DATA, uMsiData); + } + + return VINF_SUCCESS; +} + + +/** + * Writes the Command Buffer Head Pointer Register. + */ +static VBOXSTRICTRC iommuAmdCmdBufHeadPtr_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value) +{ + RT_NOREF(pDevIns, offReg); + + /* + * IOMMU behavior is undefined when software writes this register when the command buffer is running. + * In our emulation, we ignore the write entirely. + * See AMD IOMMU spec. 3.3.13 "Command and Event Log Pointer Registers". + */ + if (pThis->Status.n.u1CmdBufRunning) + { + LogFunc(("Setting CmdBufHeadPtr (%#RX64) when command buffer is running -> Ignored\n", u64Value)); + return VINF_SUCCESS; + } + + /* + * IOMMU behavior is undefined when software writes a value outside the buffer length. + * In our emulation, we ignore the write entirely. + */ + uint32_t const offBuf = u64Value & IOMMU_CMD_BUF_HEAD_PTR_VALID_MASK; + uint32_t const cbBuf = iommuAmdGetTotalBufLength(pThis->CmdBufBaseAddr.n.u4Len); + Assert(cbBuf <= _512K); + if (offBuf >= cbBuf) + { + LogFunc(("Setting CmdBufHeadPtr (%#RX32) to a value that exceeds buffer length (%#RX23) -> Ignored\n", offBuf, cbBuf)); + return VINF_SUCCESS; + } + + /* Update the register. */ + pThis->CmdBufHeadPtr.au32[0] = offBuf; + + iommuAmdCmdThreadWakeUpIfNeeded(pDevIns); + + Log4Func(("Set CmdBufHeadPtr to %#RX32\n", offBuf)); + return VINF_SUCCESS; +} + + +/** + * Writes the Command Buffer Tail Pointer Register. + */ +static VBOXSTRICTRC iommuAmdCmdBufTailPtr_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value) +{ + RT_NOREF(pDevIns, offReg); + + /* + * IOMMU behavior is undefined when software writes a value outside the buffer length. + * In our emulation, we ignore the write entirely. + * See AMD IOMMU spec. 3.3.13 "Command and Event Log Pointer Registers". + */ + uint32_t const offBuf = u64Value & IOMMU_CMD_BUF_TAIL_PTR_VALID_MASK; + uint32_t const cbBuf = iommuAmdGetTotalBufLength(pThis->CmdBufBaseAddr.n.u4Len); + Assert(cbBuf <= _512K); + if (offBuf >= cbBuf) + { + LogFunc(("Setting CmdBufTailPtr (%#RX32) to a value that exceeds buffer length (%#RX32) -> Ignored\n", offBuf, cbBuf)); + return VINF_SUCCESS; + } + + /* + * IOMMU behavior is undefined if software advances the tail pointer equal to or beyond the + * head pointer after adding one or more commands to the buffer. + * + * However, we cannot enforce this strictly because it's legal for software to shrink the + * command queue (by reducing the offset) as well as wrap around the pointer (when head isn't + * at 0). Software might even make the queue empty by making head and tail equal which is + * allowed. I don't think we can or should try too hard to prevent software shooting itself + * in the foot here. As long as we make sure the offset value is within the circular buffer + * bounds (which we do by masking bits above) it should be sufficient. + */ + pThis->CmdBufTailPtr.au32[0] = offBuf; + + iommuAmdCmdThreadWakeUpIfNeeded(pDevIns); + + Log4Func(("Set CmdBufTailPtr to %#RX32\n", offBuf)); + return VINF_SUCCESS; +} + + +/** + * Writes the Event Log Head Pointer Register. + */ +static VBOXSTRICTRC iommuAmdEvtLogHeadPtr_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value) +{ + RT_NOREF(pDevIns, offReg); + + /* + * IOMMU behavior is undefined when software writes a value outside the buffer length. + * In our emulation, we ignore the write entirely. + * See AMD IOMMU spec. 3.3.13 "Command and Event Log Pointer Registers". + */ + uint32_t const offBuf = u64Value & IOMMU_EVT_LOG_HEAD_PTR_VALID_MASK; + uint32_t const cbBuf = iommuAmdGetTotalBufLength(pThis->EvtLogBaseAddr.n.u4Len); + Assert(cbBuf <= _512K); + if (offBuf >= cbBuf) + { + LogFunc(("Setting EvtLogHeadPtr (%#RX32) to a value that exceeds buffer length (%#RX32) -> Ignored\n", offBuf, cbBuf)); + return VINF_SUCCESS; + } + + /* Update the register. */ + pThis->EvtLogHeadPtr.au32[0] = offBuf; + + Log4Func(("Set EvtLogHeadPtr to %#RX32\n", offBuf)); + return VINF_SUCCESS; +} + + +/** + * Writes the Event Log Tail Pointer Register. + */ +static VBOXSTRICTRC iommuAmdEvtLogTailPtr_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value) +{ + RT_NOREF(pDevIns, offReg); + NOREF(pThis); + + /* + * IOMMU behavior is undefined when software writes this register when the event log is running. + * In our emulation, we ignore the write entirely. + * See AMD IOMMU spec. 3.3.13 "Command and Event Log Pointer Registers". + */ + if (pThis->Status.n.u1EvtLogRunning) + { + LogFunc(("Setting EvtLogTailPtr (%#RX64) when event log is running -> Ignored\n", u64Value)); + return VINF_SUCCESS; + } + + /* + * IOMMU behavior is undefined when software writes a value outside the buffer length. + * In our emulation, we ignore the write entirely. + */ + uint32_t const offBuf = u64Value & IOMMU_EVT_LOG_TAIL_PTR_VALID_MASK; + uint32_t const cbBuf = iommuAmdGetTotalBufLength(pThis->EvtLogBaseAddr.n.u4Len); + Assert(cbBuf <= _512K); + if (offBuf >= cbBuf) + { + LogFunc(("Setting EvtLogTailPtr (%#RX32) to a value that exceeds buffer length (%#RX32) -> Ignored\n", offBuf, cbBuf)); + return VINF_SUCCESS; + } + + /* Update the register. */ + pThis->EvtLogTailPtr.au32[0] = offBuf; + + Log4Func(("Set EvtLogTailPtr to %#RX32\n", offBuf)); + return VINF_SUCCESS; +} + + +/** + * Writes the Status Register. + */ +static VBOXSTRICTRC iommuAmdStatus_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value) +{ + RT_NOREF(pDevIns, offReg); + + /* Mask out all unrecognized bits. */ + u64Value &= IOMMU_STATUS_VALID_MASK; + + /* + * Compute RW1C (read-only, write-1-to-clear) bits and preserve the rest (which are read-only). + * Writing 0 to an RW1C bit has no effect. Writing 1 to an RW1C bit, clears the bit if it's already 1. + */ + IOMMU_STATUS_T const OldStatus = pThis->Status; + uint64_t const fOldRw1cBits = (OldStatus.u64 & IOMMU_STATUS_RW1C_MASK); + uint64_t const fOldRoBits = (OldStatus.u64 & ~IOMMU_STATUS_RW1C_MASK); + uint64_t const fNewRw1cBits = (u64Value & IOMMU_STATUS_RW1C_MASK); + + uint64_t const uNewStatus = (fOldRw1cBits & ~fNewRw1cBits) | fOldRoBits; + + /* Update the register. */ + ASMAtomicWriteU64(&pThis->Status.u64, uNewStatus); + return VINF_SUCCESS; +} + + +/** + * Register access table 0. + * The MMIO offset of each entry must be a multiple of 8! + */ +static const IOMMUREGACC g_aRegAccess0[] = +{ + /* MMIO off. Register name Read function Write function */ + { /* 0x00 */ "DEV_TAB_BAR", iommuAmdDevTabBar_r, iommuAmdDevTabBar_w }, + { /* 0x08 */ "CMD_BUF_BAR", iommuAmdCmdBufBar_r, iommuAmdCmdBufBar_w }, + { /* 0x10 */ "EVT_LOG_BAR", iommuAmdEvtLogBar_r, iommuAmdEvtLogBar_w }, + { /* 0x18 */ "CTRL", iommuAmdCtrl_r, iommuAmdCtrl_w }, + { /* 0x20 */ "EXCL_BAR", iommuAmdExclRangeBar_r, iommuAmdExclRangeBar_w }, + { /* 0x28 */ "EXCL_RANGE_LIMIT", iommuAmdExclRangeLimit_r, iommuAmdExclRangeLimit_w }, + { /* 0x30 */ "EXT_FEAT", iommuAmdExtFeat_r, NULL }, + { /* 0x38 */ "PPR_LOG_BAR", iommuAmdPprLogBar_r, NULL }, + { /* 0x40 */ "HW_EVT_HI", iommuAmdHwEvtHi_r, iommuAmdHwEvtHi_w }, + { /* 0x48 */ "HW_EVT_LO", iommuAmdHwEvtLo_r, iommuAmdHwEvtLo_w }, + { /* 0x50 */ "HW_EVT_STATUS", iommuAmdHwEvtStatus_r, iommuAmdHwEvtStatus_w }, + { /* 0x58 */ NULL, NULL, NULL }, + + { /* 0x60 */ "SMI_FLT_0", NULL, NULL }, + { /* 0x68 */ "SMI_FLT_1", NULL, NULL }, + { /* 0x70 */ "SMI_FLT_2", NULL, NULL }, + { /* 0x78 */ "SMI_FLT_3", NULL, NULL }, + { /* 0x80 */ "SMI_FLT_4", NULL, NULL }, + { /* 0x88 */ "SMI_FLT_5", NULL, NULL }, + { /* 0x90 */ "SMI_FLT_6", NULL, NULL }, + { /* 0x98 */ "SMI_FLT_7", NULL, NULL }, + { /* 0xa0 */ "SMI_FLT_8", NULL, NULL }, + { /* 0xa8 */ "SMI_FLT_9", NULL, NULL }, + { /* 0xb0 */ "SMI_FLT_10", NULL, NULL }, + { /* 0xb8 */ "SMI_FLT_11", NULL, NULL }, + { /* 0xc0 */ "SMI_FLT_12", NULL, NULL }, + { /* 0xc8 */ "SMI_FLT_13", NULL, NULL }, + { /* 0xd0 */ "SMI_FLT_14", NULL, NULL }, + { /* 0xd8 */ "SMI_FLT_15", NULL, NULL }, + + { /* 0xe0 */ "GALOG_BAR", iommuAmdGALogBar_r, NULL }, + { /* 0xe8 */ "GALOG_TAIL_ADDR", NULL, NULL }, + { /* 0xf0 */ "PPR_LOG_B_BAR", iommuAmdPprLogBBaseAddr_r, NULL }, + { /* 0xf8 */ "PPR_EVT_B_BAR", iommuAmdEvtLogBBaseAddr_r, NULL }, + + { /* 0x100 */ "DEV_TAB_SEG_1", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w }, + { /* 0x108 */ "DEV_TAB_SEG_2", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w }, + { /* 0x110 */ "DEV_TAB_SEG_3", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w }, + { /* 0x118 */ "DEV_TAB_SEG_4", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w }, + { /* 0x120 */ "DEV_TAB_SEG_5", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w }, + { /* 0x128 */ "DEV_TAB_SEG_6", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w }, + { /* 0x130 */ "DEV_TAB_SEG_7", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w }, + + { /* 0x138 */ "DEV_SPECIFIC_FEAT", iommuAmdDevSpecificFeat_r, NULL }, + { /* 0x140 */ "DEV_SPECIFIC_CTRL", iommuAmdDevSpecificCtrl_r, NULL }, + { /* 0x148 */ "DEV_SPECIFIC_STATUS", iommuAmdDevSpecificStatus_r, NULL }, + + { /* 0x150 */ "MSI_VECTOR_0 or MSI_VECTOR_1", iommuAmdDevMsiVector_r, iommuAmdDevMsiVector_w }, + { /* 0x158 */ "MSI_CAP_HDR or MSI_ADDR_LO", iommuAmdMsiCapHdrAndAddrLo_r, iommuAmdMsiCapHdrAndAddrLo_w }, + { /* 0x160 */ "MSI_ADDR_HI or MSI_DATA", iommuAmdMsiAddrHiAndData_r, iommuAmdMsiAddrHiAndData_w }, + { /* 0x168 */ "MSI_MAPPING_CAP_HDR or PERF_OPT_CTRL", NULL, NULL }, + + { /* 0x170 */ "XT_GEN_INTR_CTRL", NULL, NULL }, + { /* 0x178 */ "XT_PPR_INTR_CTRL", NULL, NULL }, + { /* 0x180 */ "XT_GALOG_INT_CTRL", NULL, NULL }, +}; +AssertCompile(RT_ELEMENTS(g_aRegAccess0) == (IOMMU_MMIO_OFF_QWORD_TABLE_0_END - IOMMU_MMIO_OFF_QWORD_TABLE_0_START) / 8); + +/** + * Register access table 1. + * The MMIO offset of each entry must be a multiple of 8! + */ +static const IOMMUREGACC g_aRegAccess1[] = +{ + /* MMIO offset Register name Read function Write function */ + { /* 0x200 */ "MARC_APER_BAR_0", NULL, NULL }, + { /* 0x208 */ "MARC_APER_RELOC_0", NULL, NULL }, + { /* 0x210 */ "MARC_APER_LEN_0", NULL, NULL }, + { /* 0x218 */ "MARC_APER_BAR_1", NULL, NULL }, + { /* 0x220 */ "MARC_APER_RELOC_1", NULL, NULL }, + { /* 0x228 */ "MARC_APER_LEN_1", NULL, NULL }, + { /* 0x230 */ "MARC_APER_BAR_2", NULL, NULL }, + { /* 0x238 */ "MARC_APER_RELOC_2", NULL, NULL }, + { /* 0x240 */ "MARC_APER_LEN_2", NULL, NULL }, + { /* 0x248 */ "MARC_APER_BAR_3", NULL, NULL }, + { /* 0x250 */ "MARC_APER_RELOC_3", NULL, NULL }, + { /* 0x258 */ "MARC_APER_LEN_3", NULL, NULL } +}; +AssertCompile(RT_ELEMENTS(g_aRegAccess1) == (IOMMU_MMIO_OFF_QWORD_TABLE_1_END - IOMMU_MMIO_OFF_QWORD_TABLE_1_START) / 8); + +/** + * Register access table 2. + * The MMIO offset of each entry must be a multiple of 8! + */ +static const IOMMUREGACC g_aRegAccess2[] = +{ + /* MMIO offset Register name Read Function Write function */ + { /* 0x1ff8 */ "RSVD_REG", NULL, NULL }, + + { /* 0x2000 */ "CMD_BUF_HEAD_PTR", iommuAmdCmdBufHeadPtr_r, iommuAmdCmdBufHeadPtr_w }, + { /* 0x2008 */ "CMD_BUF_TAIL_PTR", iommuAmdCmdBufTailPtr_r , iommuAmdCmdBufTailPtr_w }, + { /* 0x2010 */ "EVT_LOG_HEAD_PTR", iommuAmdEvtLogHeadPtr_r, iommuAmdEvtLogHeadPtr_w }, + { /* 0x2018 */ "EVT_LOG_TAIL_PTR", iommuAmdEvtLogTailPtr_r, iommuAmdEvtLogTailPtr_w }, + + { /* 0x2020 */ "STATUS", iommuAmdStatus_r, iommuAmdStatus_w }, + { /* 0x2028 */ NULL, NULL, NULL }, + + { /* 0x2030 */ "PPR_LOG_HEAD_PTR", NULL, NULL }, + { /* 0x2038 */ "PPR_LOG_TAIL_PTR", NULL, NULL }, + + { /* 0x2040 */ "GALOG_HEAD_PTR", NULL, NULL }, + { /* 0x2048 */ "GALOG_TAIL_PTR", NULL, NULL }, + + { /* 0x2050 */ "PPR_LOG_B_HEAD_PTR", NULL, NULL }, + { /* 0x2058 */ "PPR_LOG_B_TAIL_PTR", NULL, NULL }, + + { /* 0x2060 */ NULL, NULL, NULL }, + { /* 0x2068 */ NULL, NULL, NULL }, + + { /* 0x2070 */ "EVT_LOG_B_HEAD_PTR", NULL, NULL }, + { /* 0x2078 */ "EVT_LOG_B_TAIL_PTR", NULL, NULL }, + + { /* 0x2080 */ "PPR_LOG_AUTO_RESP", NULL, NULL }, + { /* 0x2088 */ "PPR_LOG_OVERFLOW_EARLY", NULL, NULL }, + { /* 0x2090 */ "PPR_LOG_B_OVERFLOW_EARLY", NULL, NULL } +}; +AssertCompile(RT_ELEMENTS(g_aRegAccess2) == (IOMMU_MMIO_OFF_QWORD_TABLE_2_END - IOMMU_MMIO_OFF_QWORD_TABLE_2_START) / 8); + + +/** + * Gets the register access structure given its MMIO offset. + * + * @returns The register access structure, or NULL if the offset is invalid. + * @param off The MMIO offset of the register being accessed. + */ +static PCIOMMUREGACC iommuAmdGetRegAccess(uint32_t off) +{ + /* Figure out which table the register belongs to and validate its index. */ + PCIOMMUREGACC pReg; + if (off < IOMMU_MMIO_OFF_QWORD_TABLE_0_END) + { + uint32_t const idxReg = off >> 3; + Assert(idxReg < RT_ELEMENTS(g_aRegAccess0)); + pReg = &g_aRegAccess0[idxReg]; + } + else if ( off < IOMMU_MMIO_OFF_QWORD_TABLE_1_END + && off >= IOMMU_MMIO_OFF_QWORD_TABLE_1_START) + { + uint32_t const idxReg = (off - IOMMU_MMIO_OFF_QWORD_TABLE_1_START) >> 3; + Assert(idxReg < RT_ELEMENTS(g_aRegAccess1)); + pReg = &g_aRegAccess1[idxReg]; + } + else if ( off < IOMMU_MMIO_OFF_QWORD_TABLE_2_END + && off >= IOMMU_MMIO_OFF_QWORD_TABLE_2_START) + { + uint32_t const idxReg = (off - IOMMU_MMIO_OFF_QWORD_TABLE_2_START) >> 3; + Assert(idxReg < RT_ELEMENTS(g_aRegAccess2)); + pReg = &g_aRegAccess2[idxReg]; + } + else + pReg = NULL; + return pReg; +} + + +/** + * Writes an IOMMU register (32-bit and 64-bit). + * + * @returns Strict VBox status code. + * @param pDevIns The IOMMU device instance. + * @param off MMIO byte offset to the register. + * @param cb The size of the write access. + * @param uValue The value being written. + * + * @thread EMT. + */ +static VBOXSTRICTRC iommuAmdRegisterWrite(PPDMDEVINS pDevIns, uint32_t off, uint8_t cb, uint64_t uValue) +{ + /* + * Validate the access in case of IOM bug or incorrect assumption. + */ + Assert(off < IOMMU_MMIO_REGION_SIZE); + AssertMsgReturn(cb == 4 || cb == 8, ("Invalid access size %u\n", cb), VINF_SUCCESS); + AssertMsgReturn(!(off & 3), ("Invalid offset %#x\n", off), VINF_SUCCESS); + + Log4Func(("off=%#x cb=%u uValue=%#RX64\n", off, cb, uValue)); + + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC); + PCIOMMUREGACC pReg = iommuAmdGetRegAccess(off); + if (pReg) + { /* likely */ } + else + { + LogFunc(("Writing unknown register %#x with %#RX64 -> Ignored\n", off, uValue)); + return VINF_SUCCESS; + } + + /* If a write handler doesn't exist, it's either a reserved or read-only register. */ + if (pReg->pfnWrite) + { /* likely */ } + else + { + LogFunc(("Writing reserved or read-only register off=%#x (cb=%u) with %#RX64 -> Ignored\n", off, cb, uValue)); + return VINF_SUCCESS; + } + + /* + * If the write access is 64-bits and aligned on a 64-bit boundary, dispatch right away. + * This handles writes to 64-bit registers as well as aligned, 64-bit writes to two + * consecutive 32-bit registers. + */ + if (cb == 8) + { + if (!(off & 7)) + { + IOMMU_LOCK_RET(pDevIns, pThisCC, VINF_IOM_R3_MMIO_WRITE); + VBOXSTRICTRC rcStrict = pReg->pfnWrite(pDevIns, pThis, off, uValue); + IOMMU_UNLOCK(pDevIns, pThisCC); + return rcStrict; + } + + LogFunc(("Misaligned access while writing register at off=%#x (cb=%u) with %#RX64 -> Ignored\n", off, cb, uValue)); + return VINF_SUCCESS; + } + + /* We shouldn't get sizes other than 32 bits here as we've specified so with IOM. */ + Assert(cb == 4); + if (!(off & 7)) + { + VBOXSTRICTRC rcStrict; + IOMMU_LOCK_RET(pDevIns, pThisCC, VINF_IOM_R3_MMIO_WRITE); + + /* + * Lower 32 bits of a 64-bit register or a 32-bit register is being written. + * Merge with higher 32 bits (after reading the full 64-bits) and perform a 64-bit write. + */ + uint64_t u64Read; + if (pReg->pfnRead) + rcStrict = pReg->pfnRead(pDevIns, pThis, off, &u64Read); + else + { + rcStrict = VINF_SUCCESS; + u64Read = 0; + } + + if (RT_SUCCESS(rcStrict)) + { + uValue = (u64Read & UINT64_C(0xffffffff00000000)) | uValue; + rcStrict = pReg->pfnWrite(pDevIns, pThis, off, uValue); + } + else + LogFunc(("Reading off %#x during split write failed! rc=%Rrc\n -> Ignored", off, VBOXSTRICTRC_VAL(rcStrict))); + + IOMMU_UNLOCK(pDevIns, pThisCC); + return rcStrict; + } + + /* + * Higher 32 bits of a 64-bit register or a 32-bit register at a 32-bit boundary is being written. + * Merge with lower 32 bits (after reading the full 64-bits) and perform a 64-bit write. + */ + VBOXSTRICTRC rcStrict; + Assert(!(off & 3)); + Assert(off & 7); + Assert(off >= 4); + uint64_t u64Read; + IOMMU_LOCK_RET(pDevIns, pThisCC, VINF_IOM_R3_MMIO_WRITE); + if (pReg->pfnRead) + rcStrict = pReg->pfnRead(pDevIns, pThis, off - 4, &u64Read); + else + { + rcStrict = VINF_SUCCESS; + u64Read = 0; + } + + if (RT_SUCCESS(rcStrict)) + { + uValue = (uValue << 32) | (u64Read & UINT64_C(0xffffffff)); + rcStrict = pReg->pfnWrite(pDevIns, pThis, off - 4, uValue); + } + else + LogFunc(("Reading off %#x during split write failed! rc=%Rrc\n -> Ignored", off, VBOXSTRICTRC_VAL(rcStrict))); + + IOMMU_UNLOCK(pDevIns, pThisCC); + return rcStrict; +} + + +/** + * Reads an IOMMU register (64-bit) given its MMIO offset. + * + * All reads are 64-bit but reads to 32-bit registers that are aligned on an 8-byte + * boundary include the lower half of the subsequent register. + * + * This is because most registers are 64-bit and aligned on 8-byte boundaries but + * some are really 32-bit registers aligned on an 8-byte boundary. We cannot assume + * software will only perform 32-bit reads on those 32-bit registers that are + * aligned on 8-byte boundaries. + * + * @returns Strict VBox status code. + * @param pDevIns The IOMMU device instance. + * @param off The MMIO offset of the register in bytes. + * @param puResult Where to store the value being read. + * + * @thread EMT. + */ +static VBOXSTRICTRC iommuAmdRegisterRead(PPDMDEVINS pDevIns, uint32_t off, uint64_t *puResult) +{ + Assert(off < IOMMU_MMIO_REGION_SIZE); + Assert(!(off & 7) || !(off & 3)); + + Log4Func(("off=%#x\n", off)); + + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC); + PCPDMPCIDEV pPciDev = pDevIns->apPciDevs[0]; + PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev); NOREF(pPciDev); + + PCIOMMUREGACC pReg = iommuAmdGetRegAccess(off); + if (pReg) + { /* likely */ } + else + { + LogFunc(("Reading unknown register %#x -> Ignored\n", off)); + return VINF_IOM_MMIO_UNUSED_FF; + } + + /* If a read handler doesn't exist, it's a reserved or unknown register. */ + if (pReg->pfnRead) + { /* likely */ } + else + { + LogFunc(("Reading reserved or unknown register off=%#x -> returning 0s\n", off)); + return VINF_IOM_MMIO_UNUSED_00; + } + + /* + * If the read access is aligned on a 64-bit boundary, read the full 64-bits and return. + * The caller takes care of truncating upper 32 bits for 32-bit reads. + */ + if (!(off & 7)) + { + IOMMU_LOCK_RET(pDevIns, pThisCC, VINF_IOM_R3_MMIO_READ); + VBOXSTRICTRC rcStrict = pReg->pfnRead(pDevIns, pThis, off, puResult); + IOMMU_UNLOCK(pDevIns, pThisCC); + return rcStrict; + } + + /* + * High 32 bits of a 64-bit register or a 32-bit register at a non 64-bit boundary is being read. + * Read full 64 bits at the previous 64-bit boundary but return only the high 32 bits. + */ + Assert(!(off & 3)); + Assert(off & 7); + Assert(off >= 4); + IOMMU_LOCK_RET(pDevIns, pThisCC, VINF_IOM_R3_MMIO_READ); + VBOXSTRICTRC rcStrict = pReg->pfnRead(pDevIns, pThis, off - 4, puResult); + IOMMU_UNLOCK(pDevIns, pThisCC); + if (RT_SUCCESS(rcStrict)) + *puResult >>= 32; + else + { + *puResult = 0; + LogFunc(("Reading off %#x during split read failed! rc=%Rrc\n -> Ignored", off, VBOXSTRICTRC_VAL(rcStrict))); + } + + return rcStrict; +} + + +/** + * Raises the MSI interrupt for the IOMMU device. + * + * @param pDevIns The IOMMU device instance. + * + * @thread Any. + * @remarks The IOMMU lock may or may not be held. + */ +static void iommuAmdMsiInterruptRaise(PPDMDEVINS pDevIns) +{ + LogFlowFunc(("\n")); + if (iommuAmdIsMsiEnabled(pDevIns)) + { + LogFunc(("Raising MSI\n")); + PDMDevHlpPCISetIrq(pDevIns, 0, PDM_IRQ_LEVEL_HIGH); + } +} + +#if 0 +/** + * Clears the MSI interrupt for the IOMMU device. + * + * @param pDevIns The IOMMU device instance. + * + * @thread Any. + * @remarks The IOMMU lock may or may not be held. + */ +static void iommuAmdMsiInterruptClear(PPDMDEVINS pDevIns) +{ + if (iommuAmdIsMsiEnabled(pDevIns)) + PDMDevHlpPCISetIrq(pDevIns, 0, PDM_IRQ_LEVEL_LOW); +} +#endif + +/** + * Writes an entry to the event log in memory. + * + * @returns VBox status code. + * @param pDevIns The IOMMU device instance. + * @param pEvent The event to log. + * + * @thread Any. + * @remarks The IOMMU lock must be held while calling this function. + */ +static int iommuAmdEvtLogEntryWrite(PPDMDEVINS pDevIns, PCEVT_GENERIC_T pEvent) +{ + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC); + + IOMMU_LOCK(pDevIns, pThisCC); + + /* Check if event logging is active and the log has not overflowed. */ + IOMMU_STATUS_T const Status = pThis->Status; + if ( Status.n.u1EvtLogRunning + && !Status.n.u1EvtOverflow) + { + uint32_t const cbEvt = sizeof(*pEvent); + + /* Get the offset we need to write the event to in memory (circular buffer offset). */ + uint32_t const offEvt = pThis->EvtLogTailPtr.n.off; + Assert(!(offEvt & ~IOMMU_EVT_LOG_TAIL_PTR_VALID_MASK)); + + /* Ensure we have space in the event log. */ + uint32_t const cMaxEvts = iommuAmdGetBufMaxEntries(pThis->EvtLogBaseAddr.n.u4Len); + uint32_t const cEvts = iommuAmdGetEvtLogEntryCount(pThis); + if (cEvts + 1 < cMaxEvts) + { + /* Write the event log entry to memory. */ + RTGCPHYS const GCPhysEvtLog = pThis->EvtLogBaseAddr.n.u40Base << X86_PAGE_4K_SHIFT; + RTGCPHYS const GCPhysEvtLogEntry = GCPhysEvtLog + offEvt; + int rc = PDMDevHlpPCIPhysWrite(pDevIns, GCPhysEvtLogEntry, pEvent, cbEvt); + if (RT_FAILURE(rc)) + LogFunc(("Failed to write event log entry at %#RGp. rc=%Rrc\n", GCPhysEvtLogEntry, rc)); + + /* Increment the event log tail pointer. */ + uint32_t const cbEvtLog = iommuAmdGetTotalBufLength(pThis->EvtLogBaseAddr.n.u4Len); + pThis->EvtLogTailPtr.n.off = (offEvt + cbEvt) % cbEvtLog; + + /* Indicate that an event log entry was written. */ + ASMAtomicOrU64(&pThis->Status.u64, IOMMU_STATUS_EVT_LOG_INTR); + + /* Check and signal an interrupt if software wants to receive one when an event log entry is written. */ + if (pThis->Ctrl.n.u1EvtIntrEn) + iommuAmdMsiInterruptRaise(pDevIns); + } + else + { + /* Indicate that the event log has overflowed. */ + ASMAtomicOrU64(&pThis->Status.u64, IOMMU_STATUS_EVT_LOG_OVERFLOW); + + /* Check and signal an interrupt if software wants to receive one when the event log has overflowed. */ + if (pThis->Ctrl.n.u1EvtIntrEn) + iommuAmdMsiInterruptRaise(pDevIns); + } + } + + IOMMU_UNLOCK(pDevIns, pThisCC); + + return VINF_SUCCESS; +} + + +/** + * Sets an event in the hardware error registers. + * + * @param pDevIns The IOMMU device instance. + * @param pEvent The event. + * + * @thread Any. + */ +static void iommuAmdHwErrorSet(PPDMDEVINS pDevIns, PCEVT_GENERIC_T pEvent) +{ + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + if (pThis->ExtFeat.n.u1HwErrorSup) + { + if (pThis->HwEvtStatus.n.u1Valid) + pThis->HwEvtStatus.n.u1Overflow = 1; + pThis->HwEvtStatus.n.u1Valid = 1; + pThis->HwEvtHi.u64 = RT_MAKE_U64(pEvent->au32[0], pEvent->au32[1]); + pThis->HwEvtLo = RT_MAKE_U64(pEvent->au32[2], pEvent->au32[3]); + Assert( pThis->HwEvtHi.n.u4EvtCode == IOMMU_EVT_DEV_TAB_HW_ERROR + || pThis->HwEvtHi.n.u4EvtCode == IOMMU_EVT_PAGE_TAB_HW_ERROR + || pThis->HwEvtHi.n.u4EvtCode == IOMMU_EVT_COMMAND_HW_ERROR); + } +} + + +/** + * Initializes a PAGE_TAB_HARDWARE_ERROR event. + * + * @param idDevice The device ID (bus, device, function). + * @param idDomain The domain ID. + * @param GCPhysPtEntity The system physical address of the page table + * entity. + * @param enmOp The IOMMU operation being performed. + * @param pEvtPageTabHwErr Where to store the initialized event. + */ +static void iommuAmdPageTabHwErrorEventInit(uint16_t idDevice, uint16_t idDomain, RTGCPHYS GCPhysPtEntity, IOMMUOP enmOp, + PEVT_PAGE_TAB_HW_ERR_T pEvtPageTabHwErr) +{ + memset(pEvtPageTabHwErr, 0, sizeof(*pEvtPageTabHwErr)); + pEvtPageTabHwErr->n.u16DevId = idDevice; + pEvtPageTabHwErr->n.u16DomainOrPasidLo = idDomain; + pEvtPageTabHwErr->n.u1GuestOrNested = 0; + pEvtPageTabHwErr->n.u1Interrupt = RT_BOOL(enmOp == IOMMUOP_INTR_REQ); + pEvtPageTabHwErr->n.u1ReadWrite = RT_BOOL(enmOp == IOMMUOP_MEM_WRITE); + pEvtPageTabHwErr->n.u1Translation = RT_BOOL(enmOp == IOMMUOP_TRANSLATE_REQ); + pEvtPageTabHwErr->n.u2Type = enmOp == IOMMUOP_CMD ? HWEVTTYPE_DATA_ERROR : HWEVTTYPE_TARGET_ABORT; + pEvtPageTabHwErr->n.u4EvtCode = IOMMU_EVT_PAGE_TAB_HW_ERROR; + pEvtPageTabHwErr->n.u64Addr = GCPhysPtEntity; +} + + +/** + * Raises a PAGE_TAB_HARDWARE_ERROR event. + * + * @param pDevIns The IOMMU device instance. + * @param enmOp The IOMMU operation being performed. + * @param pEvtPageTabHwErr The page table hardware error event. + * + * @thread Any. + */ +static void iommuAmdPageTabHwErrorEventRaise(PPDMDEVINS pDevIns, IOMMUOP enmOp, PEVT_PAGE_TAB_HW_ERR_T pEvtPageTabHwErr) +{ + AssertCompile(sizeof(EVT_GENERIC_T) == sizeof(EVT_PAGE_TAB_HW_ERR_T)); + PCEVT_GENERIC_T pEvent = (PCEVT_GENERIC_T)pEvtPageTabHwErr; + + PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC); + IOMMU_LOCK(pDevIns, pThisCC); + + iommuAmdHwErrorSet(pDevIns, (PCEVT_GENERIC_T)pEvent); + iommuAmdEvtLogEntryWrite(pDevIns, (PCEVT_GENERIC_T)pEvent); + if (enmOp != IOMMUOP_CMD) + iommuAmdSetPciTargetAbort(pDevIns); + + IOMMU_UNLOCK(pDevIns, pThisCC); + + LogFunc(("Raised PAGE_TAB_HARDWARE_ERROR. idDevice=%#x idDomain=%#x GCPhysPtEntity=%#RGp enmOp=%u u2Type=%u\n", + pEvtPageTabHwErr->n.u16DevId, pEvtPageTabHwErr->n.u16DomainOrPasidLo, pEvtPageTabHwErr->n.u64Addr, enmOp, + pEvtPageTabHwErr->n.u2Type)); +} + + +#ifdef IN_RING3 +/** + * Initializes a COMMAND_HARDWARE_ERROR event. + * + * @param GCPhysAddr The system physical address the IOMMU attempted to access. + * @param pEvtCmdHwErr Where to store the initialized event. + */ +static void iommuAmdCmdHwErrorEventInit(RTGCPHYS GCPhysAddr, PEVT_CMD_HW_ERR_T pEvtCmdHwErr) +{ + memset(pEvtCmdHwErr, 0, sizeof(*pEvtCmdHwErr)); + pEvtCmdHwErr->n.u2Type = HWEVTTYPE_DATA_ERROR; + pEvtCmdHwErr->n.u4EvtCode = IOMMU_EVT_COMMAND_HW_ERROR; + pEvtCmdHwErr->n.u64Addr = GCPhysAddr; +} + + +/** + * Raises a COMMAND_HARDWARE_ERROR event. + * + * @param pDevIns The IOMMU device instance. + * @param pEvtCmdHwErr The command hardware error event. + * + * @thread Any. + */ +static void iommuAmdCmdHwErrorEventRaise(PPDMDEVINS pDevIns, PCEVT_CMD_HW_ERR_T pEvtCmdHwErr) +{ + AssertCompile(sizeof(EVT_GENERIC_T) == sizeof(EVT_CMD_HW_ERR_T)); + PCEVT_GENERIC_T pEvent = (PCEVT_GENERIC_T)pEvtCmdHwErr; + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + + PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC); + IOMMU_LOCK(pDevIns, pThisCC); + + iommuAmdHwErrorSet(pDevIns, (PCEVT_GENERIC_T)pEvent); + iommuAmdEvtLogEntryWrite(pDevIns, (PCEVT_GENERIC_T)pEvent); + ASMAtomicAndU64(&pThis->Status.u64, ~IOMMU_STATUS_CMD_BUF_RUNNING); + + IOMMU_UNLOCK(pDevIns, pThisCC); + + LogFunc(("Raised COMMAND_HARDWARE_ERROR. GCPhysCmd=%#RGp u2Type=%u\n", pEvtCmdHwErr->n.u64Addr, pEvtCmdHwErr->n.u2Type)); +} +#endif /* IN_RING3 */ + + +/** + * Initializes a DEV_TAB_HARDWARE_ERROR event. + * + * @param idDevice The device ID (bus, device, function). + * @param GCPhysDte The system physical address of the failed device table + * access. + * @param enmOp The IOMMU operation being performed. + * @param pEvtDevTabHwErr Where to store the initialized event. + */ +static void iommuAmdDevTabHwErrorEventInit(uint16_t idDevice, RTGCPHYS GCPhysDte, IOMMUOP enmOp, + PEVT_DEV_TAB_HW_ERROR_T pEvtDevTabHwErr) +{ + memset(pEvtDevTabHwErr, 0, sizeof(*pEvtDevTabHwErr)); + pEvtDevTabHwErr->n.u16DevId = idDevice; + pEvtDevTabHwErr->n.u1Intr = RT_BOOL(enmOp == IOMMUOP_INTR_REQ); + /** @todo IOMMU: Any other transaction type that can set read/write bit? */ + pEvtDevTabHwErr->n.u1ReadWrite = RT_BOOL(enmOp == IOMMUOP_MEM_WRITE); + pEvtDevTabHwErr->n.u1Translation = RT_BOOL(enmOp == IOMMUOP_TRANSLATE_REQ); + pEvtDevTabHwErr->n.u2Type = enmOp == IOMMUOP_CMD ? HWEVTTYPE_DATA_ERROR : HWEVTTYPE_TARGET_ABORT; + pEvtDevTabHwErr->n.u4EvtCode = IOMMU_EVT_DEV_TAB_HW_ERROR; + pEvtDevTabHwErr->n.u64Addr = GCPhysDte; +} + + +/** + * Raises a DEV_TAB_HARDWARE_ERROR event. + * + * @param pDevIns The IOMMU device instance. + * @param enmOp The IOMMU operation being performed. + * @param pEvtDevTabHwErr The device table hardware error event. + * + * @thread Any. + */ +static void iommuAmdDevTabHwErrorEventRaise(PPDMDEVINS pDevIns, IOMMUOP enmOp, PEVT_DEV_TAB_HW_ERROR_T pEvtDevTabHwErr) +{ + AssertCompile(sizeof(EVT_GENERIC_T) == sizeof(EVT_DEV_TAB_HW_ERROR_T)); + PCEVT_GENERIC_T pEvent = (PCEVT_GENERIC_T)pEvtDevTabHwErr; + + PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC); + IOMMU_LOCK(pDevIns, pThisCC); + + iommuAmdHwErrorSet(pDevIns, (PCEVT_GENERIC_T)pEvent); + iommuAmdEvtLogEntryWrite(pDevIns, (PCEVT_GENERIC_T)pEvent); + if (enmOp != IOMMUOP_CMD) + iommuAmdSetPciTargetAbort(pDevIns); + + IOMMU_UNLOCK(pDevIns, pThisCC); + + LogFunc(("Raised DEV_TAB_HARDWARE_ERROR. idDevice=%#x GCPhysDte=%#RGp enmOp=%u u2Type=%u\n", pEvtDevTabHwErr->n.u16DevId, + pEvtDevTabHwErr->n.u64Addr, enmOp, pEvtDevTabHwErr->n.u2Type)); +} + + +#ifdef IN_RING3 +/** + * Initializes an ILLEGAL_COMMAND_ERROR event. + * + * @param GCPhysCmd The system physical address of the failed command + * access. + * @param pEvtIllegalCmd Where to store the initialized event. + */ +static void iommuAmdIllegalCmdEventInit(RTGCPHYS GCPhysCmd, PEVT_ILLEGAL_CMD_ERR_T pEvtIllegalCmd) +{ + Assert(!(GCPhysCmd & UINT64_C(0xf))); + memset(pEvtIllegalCmd, 0, sizeof(*pEvtIllegalCmd)); + pEvtIllegalCmd->n.u4EvtCode = IOMMU_EVT_ILLEGAL_CMD_ERROR; + pEvtIllegalCmd->n.u64Addr = GCPhysCmd; +} + + +/** + * Raises an ILLEGAL_COMMAND_ERROR event. + * + * @param pDevIns The IOMMU device instance. + * @param pEvtIllegalCmd The illegal command error event. + */ +static void iommuAmdIllegalCmdEventRaise(PPDMDEVINS pDevIns, PCEVT_ILLEGAL_CMD_ERR_T pEvtIllegalCmd) +{ + AssertCompile(sizeof(EVT_GENERIC_T) == sizeof(EVT_ILLEGAL_DTE_T)); + PCEVT_GENERIC_T pEvent = (PCEVT_GENERIC_T)pEvtIllegalCmd; + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + + iommuAmdEvtLogEntryWrite(pDevIns, pEvent); + ASMAtomicAndU64(&pThis->Status.u64, ~IOMMU_STATUS_CMD_BUF_RUNNING); + + LogFunc(("Raised ILLEGAL_COMMAND_ERROR. Addr=%#RGp\n", pEvtIllegalCmd->n.u64Addr)); +} +#endif /* IN_RING3 */ + + +/** + * Initializes an ILLEGAL_DEV_TABLE_ENTRY event. + * + * @param idDevice The device ID (bus, device, function). + * @param uIova The I/O virtual address. + * @param fRsvdNotZero Whether reserved bits are not zero. Pass @c false if the + * event was caused by an invalid level encoding in the + * DTE. + * @param enmOp The IOMMU operation being performed. + * @param pEvtIllegalDte Where to store the initialized event. + */ +static void iommuAmdIllegalDteEventInit(uint16_t idDevice, uint64_t uIova, bool fRsvdNotZero, IOMMUOP enmOp, + PEVT_ILLEGAL_DTE_T pEvtIllegalDte) +{ + memset(pEvtIllegalDte, 0, sizeof(*pEvtIllegalDte)); + pEvtIllegalDte->n.u16DevId = idDevice; + pEvtIllegalDte->n.u1Interrupt = RT_BOOL(enmOp == IOMMUOP_INTR_REQ); + pEvtIllegalDte->n.u1ReadWrite = RT_BOOL(enmOp == IOMMUOP_MEM_WRITE); + pEvtIllegalDte->n.u1RsvdNotZero = fRsvdNotZero; + pEvtIllegalDte->n.u1Translation = RT_BOOL(enmOp == IOMMUOP_TRANSLATE_REQ); + pEvtIllegalDte->n.u4EvtCode = IOMMU_EVT_ILLEGAL_DEV_TAB_ENTRY; + pEvtIllegalDte->n.u64Addr = uIova & ~UINT64_C(0x3); + /** @todo r=ramshankar: Not sure why the last 2 bits are marked as reserved by the + * IOMMU spec here but not for this field for I/O page fault event. */ + Assert(!(uIova & UINT64_C(0x3))); +} + + +/** + * Raises an ILLEGAL_DEV_TABLE_ENTRY event. + * + * @param pDevIns The IOMMU instance data. + * @param enmOp The IOMMU operation being performed. + * @param pEvtIllegalDte The illegal device table entry event. + * @param enmEvtType The illegal device table entry event type. + * + * @thread Any. + */ +static void iommuAmdIllegalDteEventRaise(PPDMDEVINS pDevIns, IOMMUOP enmOp, PCEVT_ILLEGAL_DTE_T pEvtIllegalDte, + EVT_ILLEGAL_DTE_TYPE_T enmEvtType) +{ + AssertCompile(sizeof(EVT_GENERIC_T) == sizeof(EVT_ILLEGAL_DTE_T)); + PCEVT_GENERIC_T pEvent = (PCEVT_GENERIC_T)pEvtIllegalDte; + + iommuAmdEvtLogEntryWrite(pDevIns, pEvent); + if (enmOp != IOMMUOP_CMD) + iommuAmdSetPciTargetAbort(pDevIns); + + LogFunc(("Raised ILLEGAL_DTE_EVENT. idDevice=%#x uIova=%#RX64 enmOp=%u enmEvtType=%u\n", pEvtIllegalDte->n.u16DevId, + pEvtIllegalDte->n.u64Addr, enmOp, enmEvtType)); + NOREF(enmEvtType); +} + + +/** + * Initializes an IO_PAGE_FAULT event. + * + * @param idDevice The device ID (bus, device, function). + * @param idDomain The domain ID. + * @param uIova The I/O virtual address being accessed. + * @param fPresent Transaction to a page marked as present (including + * DTE.V=1) or interrupt marked as remapped + * (IRTE.RemapEn=1). + * @param fRsvdNotZero Whether reserved bits are not zero. Pass @c false if + * the I/O page fault was caused by invalid level + * encoding. + * @param fPermDenied Permission denied for the address being accessed. + * @param enmOp The IOMMU operation being performed. + * @param pEvtIoPageFault Where to store the initialized event. + */ +static void iommuAmdIoPageFaultEventInit(uint16_t idDevice, uint16_t idDomain, uint64_t uIova, bool fPresent, bool fRsvdNotZero, + bool fPermDenied, IOMMUOP enmOp, PEVT_IO_PAGE_FAULT_T pEvtIoPageFault) +{ + Assert(!fPermDenied || fPresent); + memset(pEvtIoPageFault, 0, sizeof(*pEvtIoPageFault)); + pEvtIoPageFault->n.u16DevId = idDevice; + //pEvtIoPageFault->n.u4PasidHi = 0; + pEvtIoPageFault->n.u16DomainOrPasidLo = idDomain; + //pEvtIoPageFault->n.u1GuestOrNested = 0; + //pEvtIoPageFault->n.u1NoExecute = 0; + //pEvtIoPageFault->n.u1User = 0; + pEvtIoPageFault->n.u1Interrupt = RT_BOOL(enmOp == IOMMUOP_INTR_REQ); + pEvtIoPageFault->n.u1Present = fPresent; + pEvtIoPageFault->n.u1ReadWrite = RT_BOOL(enmOp == IOMMUOP_MEM_WRITE); + pEvtIoPageFault->n.u1PermDenied = fPermDenied; + pEvtIoPageFault->n.u1RsvdNotZero = fRsvdNotZero; + pEvtIoPageFault->n.u1Translation = RT_BOOL(enmOp == IOMMUOP_TRANSLATE_REQ); + pEvtIoPageFault->n.u4EvtCode = IOMMU_EVT_IO_PAGE_FAULT; + pEvtIoPageFault->n.u64Addr = uIova; +} + + +/** + * Raises an IO_PAGE_FAULT event. + * + * @param pDevIns The IOMMU instance data. + * @param fIoDevFlags The I/O device flags, see IOMMU_DTE_CACHE_F_XXX. + * @param pIrte The interrupt remapping table entry, can be NULL. + * @param enmOp The IOMMU operation being performed. + * @param pEvtIoPageFault The I/O page fault event. + * @param enmEvtType The I/O page fault event type. + * + * @thread Any. + */ +static void iommuAmdIoPageFaultEventRaise(PPDMDEVINS pDevIns, uint16_t fIoDevFlags, PCIRTE_T pIrte, IOMMUOP enmOp, + PCEVT_IO_PAGE_FAULT_T pEvtIoPageFault, EVT_IO_PAGE_FAULT_TYPE_T enmEvtType) +{ + AssertCompile(sizeof(EVT_GENERIC_T) == sizeof(EVT_IO_PAGE_FAULT_T)); + PCEVT_GENERIC_T pEvent = (PCEVT_GENERIC_T)pEvtIoPageFault; + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + STAM_COUNTER_INC(&pThis->StatIopfs); NOREF(pThis); + +#ifdef IOMMU_WITH_DTE_CACHE +# define IOMMU_DTE_CACHE_SET_PF_RAISED(a_pDevIns, a_DevId) iommuAmdDteCacheUpdateFlags((a_pDevIns), (a_DevId), \ + IOMMU_DTE_CACHE_F_IO_PAGE_FAULT_RAISED, \ + 0 /* fAndMask */) +#else +# define IOMMU_DTE_CACHE_SET_PF_RAISED(a_pDevIns, a_DevId) do { } while (0) +#endif + + bool fSuppressEvtLogging = false; + if ( enmOp == IOMMUOP_MEM_READ + || enmOp == IOMMUOP_MEM_WRITE) + { + uint16_t const fSuppressIopf = IOMMU_DTE_CACHE_F_VALID + | IOMMU_DTE_CACHE_F_SUPPRESS_IOPF | IOMMU_DTE_CACHE_F_IO_PAGE_FAULT_RAISED; + uint16_t const fSuppressAllIopf = IOMMU_DTE_CACHE_F_VALID | IOMMU_DTE_CACHE_F_SUPPRESS_ALL_IOPF; + if ( (fIoDevFlags & fSuppressAllIopf) == fSuppressAllIopf + || (fIoDevFlags & fSuppressIopf) == fSuppressIopf) + { + fSuppressEvtLogging = true; + } + } + else if (enmOp == IOMMUOP_INTR_REQ) + { + uint16_t const fSuppressIopf = IOMMU_DTE_CACHE_F_INTR_MAP_VALID | IOMMU_DTE_CACHE_F_IGNORE_UNMAPPED_INTR; + if ((fIoDevFlags & fSuppressIopf) == fSuppressIopf) + fSuppressEvtLogging = true; + else if (pIrte) /** @todo Make this compulsary and assert if it isn't provided. */ + fSuppressEvtLogging = pIrte->n.u1SuppressIoPf; + } + /* else: Events are never suppressed for commands. */ + + switch (enmEvtType) + { + case kIoPageFaultType_PermDenied: + { + /* Cannot be triggered by a command. */ + Assert(enmOp != IOMMUOP_CMD); + RT_FALL_THRU(); + } + case kIoPageFaultType_DteRsvdPagingMode: + case kIoPageFaultType_PteInvalidPageSize: + case kIoPageFaultType_PteInvalidLvlEncoding: + case kIoPageFaultType_SkippedLevelIovaNotZero: + case kIoPageFaultType_PteRsvdNotZero: + case kIoPageFaultType_PteValidNotSet: + case kIoPageFaultType_DteTranslationDisabled: + case kIoPageFaultType_PasidInvalidRange: + { + /* + * For a translation request, the IOMMU doesn't signal an I/O page fault nor does it + * create an event log entry. See AMD IOMMU spec. 2.1.3.2 "I/O Page Faults". + */ + if (enmOp != IOMMUOP_TRANSLATE_REQ) + { + if (!fSuppressEvtLogging) + { + iommuAmdEvtLogEntryWrite(pDevIns, pEvent); + IOMMU_DTE_CACHE_SET_PF_RAISED(pDevIns, pEvtIoPageFault->n.u16DevId); + } + if (enmOp != IOMMUOP_CMD) + iommuAmdSetPciTargetAbort(pDevIns); + } + break; + } + + case kIoPageFaultType_UserSupervisor: + { + /* Access is blocked and only creates an event log entry. */ + if (!fSuppressEvtLogging) + { + iommuAmdEvtLogEntryWrite(pDevIns, pEvent); + IOMMU_DTE_CACHE_SET_PF_RAISED(pDevIns, pEvtIoPageFault->n.u16DevId); + } + break; + } + + case kIoPageFaultType_IrteAddrInvalid: + case kIoPageFaultType_IrteRsvdNotZero: + case kIoPageFaultType_IrteRemapEn: + case kIoPageFaultType_IrteRsvdIntType: + case kIoPageFaultType_IntrReqAborted: + case kIoPageFaultType_IntrWithPasid: + { + /* Only trigerred by interrupt requests. */ + Assert(enmOp == IOMMUOP_INTR_REQ); + if (!fSuppressEvtLogging) + { + iommuAmdEvtLogEntryWrite(pDevIns, pEvent); + IOMMU_DTE_CACHE_SET_PF_RAISED(pDevIns, pEvtIoPageFault->n.u16DevId); + } + iommuAmdSetPciTargetAbort(pDevIns); + break; + } + + case kIoPageFaultType_SmiFilterMismatch: + { + /* Not supported and probably will never be, assert. */ + AssertMsgFailed(("kIoPageFaultType_SmiFilterMismatch - Upstream SMI requests not supported/implemented.")); + break; + } + + case kIoPageFaultType_DevId_Invalid: + { + /* Cannot be triggered by a command. */ + Assert(enmOp != IOMMUOP_CMD); + Assert(enmOp != IOMMUOP_TRANSLATE_REQ); /** @todo IOMMU: We don't support translation requests yet. */ + if (!fSuppressEvtLogging) + { + iommuAmdEvtLogEntryWrite(pDevIns, pEvent); + IOMMU_DTE_CACHE_SET_PF_RAISED(pDevIns, pEvtIoPageFault->n.u16DevId); + } + if ( enmOp == IOMMUOP_MEM_READ + || enmOp == IOMMUOP_MEM_WRITE) + iommuAmdSetPciTargetAbort(pDevIns); + break; + } + } + +#undef IOMMU_DTE_CACHE_SET_PF_RAISED +} + + +/** + * Raises an IO_PAGE_FAULT event given the DTE. + * + * @param pDevIns The IOMMU instance data. + * @param pDte The device table entry. + * @param pIrte The interrupt remapping table entry, can be NULL. + * @param enmOp The IOMMU operation being performed. + * @param pEvtIoPageFault The I/O page fault event. + * @param enmEvtType The I/O page fault event type. + * + * @thread Any. + */ +static void iommuAmdIoPageFaultEventRaiseWithDte(PPDMDEVINS pDevIns, PCDTE_T pDte, PCIRTE_T pIrte, IOMMUOP enmOp, + PCEVT_IO_PAGE_FAULT_T pEvtIoPageFault, EVT_IO_PAGE_FAULT_TYPE_T enmEvtType) +{ + Assert(pDte); + uint16_t const fIoDevFlags = iommuAmdGetBasicDevFlags(pDte); + return iommuAmdIoPageFaultEventRaise(pDevIns, fIoDevFlags, pIrte, enmOp, pEvtIoPageFault, enmEvtType); +} + + +/** + * Reads a device table entry for the given the device ID. + * + * @returns VBox status code. + * @param pDevIns The IOMMU device instance. + * @param idDevice The device ID (bus, device, function). + * @param enmOp The IOMMU operation being performed. + * @param pDte Where to store the device table entry. + * + * @thread Any. + */ +static int iommuAmdDteRead(PPDMDEVINS pDevIns, uint16_t idDevice, IOMMUOP enmOp, PDTE_T pDte) +{ + PCIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC); + + IOMMU_LOCK(pDevIns, pThisCC); + + /* Figure out which device table segment is being accessed. */ + uint8_t const idxSegsEn = pThis->Ctrl.n.u3DevTabSegEn; + Assert(idxSegsEn < RT_ELEMENTS(g_auDevTabSegShifts)); + + uint8_t const idxSeg = (idDevice & g_auDevTabSegMasks[idxSegsEn]) >> g_auDevTabSegShifts[idxSegsEn]; + Assert(idxSeg < RT_ELEMENTS(pThis->aDevTabBaseAddrs)); + AssertCompile(RT_ELEMENTS(g_auDevTabSegShifts) == RT_ELEMENTS(g_auDevTabSegMasks)); + + RTGCPHYS const GCPhysDevTab = pThis->aDevTabBaseAddrs[idxSeg].n.u40Base << X86_PAGE_4K_SHIFT; + uint32_t const offDte = (idDevice & ~g_auDevTabSegMasks[idxSegsEn]) * sizeof(DTE_T); + RTGCPHYS const GCPhysDte = GCPhysDevTab + offDte; + + /* Ensure the DTE falls completely within the device table segment. */ + uint32_t const cbDevTabSeg = (pThis->aDevTabBaseAddrs[idxSeg].n.u9Size + 1) << X86_PAGE_4K_SHIFT; + + IOMMU_UNLOCK(pDevIns, pThisCC); + + if (offDte + sizeof(DTE_T) <= cbDevTabSeg) + { + /* Read the device table entry from guest memory. */ + Assert(!(GCPhysDevTab & X86_PAGE_4K_OFFSET_MASK)); + int rc = PDMDevHlpPCIPhysRead(pDevIns, GCPhysDte, pDte, sizeof(*pDte)); + if (RT_SUCCESS(rc)) + return VINF_SUCCESS; + + /* Raise a device table hardware error. */ + LogFunc(("Failed to read device table entry at %#RGp. rc=%Rrc -> DevTabHwError\n", GCPhysDte, rc)); + + EVT_DEV_TAB_HW_ERROR_T EvtDevTabHwErr; + iommuAmdDevTabHwErrorEventInit(idDevice, GCPhysDte, enmOp, &EvtDevTabHwErr); + iommuAmdDevTabHwErrorEventRaise(pDevIns, enmOp, &EvtDevTabHwErr); + return VERR_IOMMU_DTE_READ_FAILED; + } + + /* Raise an I/O page fault for out-of-bounds acccess. */ + LogFunc(("Out-of-bounds device table entry. idDevice=%#x offDte=%u cbDevTabSeg=%u -> IOPF\n", idDevice, offDte, cbDevTabSeg)); + EVT_IO_PAGE_FAULT_T EvtIoPageFault; + iommuAmdIoPageFaultEventInit(idDevice, 0 /* idDomain */, 0 /* uIova */, false /* fPresent */, false /* fRsvdNotZero */, + false /* fPermDenied */, enmOp, &EvtIoPageFault); + iommuAmdIoPageFaultEventRaise(pDevIns, 0 /* fIoDevFlags */, NULL /* pIrte */, enmOp, &EvtIoPageFault, + kIoPageFaultType_DevId_Invalid); + return VERR_IOMMU_DTE_BAD_OFFSET; +} + + +/** + * Performs pre-translation checks for the given device table entry. + * + * @returns VBox status code. + * @retval VINF_SUCCESS if the DTE is valid and supports address translation. + * @retval VINF_IOMMU_ADDR_TRANSLATION_DISABLED if the DTE is valid but address + * translation is disabled. + * @retval VERR_IOMMU_ADDR_TRANSLATION_FAILED if an error occurred and any + * corresponding event was raised. + * @retval VERR_IOMMU_ADDR_ACCESS_DENIED if the DTE denies the requested + * permissions. + * + * @param pDevIns The IOMMU device instance. + * @param uIova The I/O virtual address to translate. + * @param idDevice The device ID (bus, device, function). + * @param fPerm The I/O permissions for this access, see + * IOMMU_IO_PERM_XXX. + * @param pDte The device table entry. + * @param enmOp The IOMMU operation being performed. + * + * @thread Any. + */ +static int iommuAmdPreTranslateChecks(PPDMDEVINS pDevIns, uint16_t idDevice, uint64_t uIova, uint8_t fPerm, PCDTE_T pDte, + IOMMUOP enmOp) +{ + /* + * Check if the translation is valid, otherwise raise an I/O page fault. + */ + if (pDte->n.u1TranslationValid) + { /* likely */ } + else + { + /** @todo r=ramshankar: The AMD IOMMU spec. says page walk is terminated but + * doesn't explicitly say whether an I/O page fault is raised. From other + * places in the spec. it seems early page walk terminations (starting with + * the DTE) return the state computed so far and raises an I/O page fault. So + * returning an invalid translation rather than skipping translation. */ + LogFunc(("Translation valid bit not set -> IOPF\n")); + EVT_IO_PAGE_FAULT_T EvtIoPageFault; + iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, uIova, false /* fPresent */, false /* fRsvdNotZero */, + false /* fPermDenied */, enmOp, &EvtIoPageFault); + iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault, + kIoPageFaultType_DteTranslationDisabled); + return VERR_IOMMU_ADDR_TRANSLATION_FAILED; + } + + /* + * Check permissions bits in the DTE. + * Note: This MUST be checked prior to checking the root page table level below! + */ + uint8_t const fDtePerm = (pDte->au64[0] >> IOMMU_IO_PERM_SHIFT) & IOMMU_IO_PERM_MASK; + if ((fPerm & fDtePerm) == fPerm) + { /* likely */ } + else + { + LogFunc(("Permission denied by DTE (fPerm=%#x fDtePerm=%#x) -> IOPF\n", fPerm, fDtePerm)); + EVT_IO_PAGE_FAULT_T EvtIoPageFault; + iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, uIova, true /* fPresent */, false /* fRsvdNotZero */, + true /* fPermDenied */, enmOp, &EvtIoPageFault); + iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault, + kIoPageFaultType_PermDenied); + return VERR_IOMMU_ADDR_ACCESS_DENIED; + } + + /* + * If the root page table level is 0, translation is disabled and GPA=SPA and + * the DTE.IR and DTE.IW bits control permissions (verified above). + */ + uint8_t const uMaxLevel = pDte->n.u3Mode; + if (uMaxLevel != 0) + { /* likely */ } + else + { + Assert((fPerm & fDtePerm) == fPerm); /* Verify we've checked permissions. */ + return VINF_IOMMU_ADDR_TRANSLATION_DISABLED; + } + + /* + * If the root page table level exceeds the allowed host-address translation level, + * page walk is terminated and translation fails. + */ + if (uMaxLevel <= IOMMU_MAX_HOST_PT_LEVEL) + { /* likely */ } + else + { + /** @todo r=ramshankar: I cannot make out from the AMD IOMMU spec. if I should be + * raising an ILLEGAL_DEV_TABLE_ENTRY event or an IO_PAGE_FAULT event here. + * I'm just going with I/O page fault. */ + LogFunc(("Invalid root page table level %#x (idDevice=%#x) -> IOPF\n", uMaxLevel, idDevice)); + EVT_IO_PAGE_FAULT_T EvtIoPageFault; + iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, uIova, true /* fPresent */, false /* fRsvdNotZero */, + false /* fPermDenied */, enmOp, &EvtIoPageFault); + iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault, + kIoPageFaultType_PteInvalidLvlEncoding); + return VERR_IOMMU_ADDR_TRANSLATION_FAILED; + } + + /* The DTE allows translations for this device. */ + return VINF_SUCCESS; +} + + +/** + * Walks the I/O page table to translate the I/O virtual address to a system + * physical address. + * + * @returns VBox status code. + * @param pDevIns The IOMMU device instance. + * @param uIova The I/O virtual address to translate. Must be 4K aligned. + * @param fPerm The I/O permissions for this access, see + * IOMMU_IO_PERM_XXX. + * @param idDevice The device ID (bus, device, function). + * @param pDte The device table entry. + * @param enmOp The IOMMU operation being performed. + * @param pPageLookup Where to store the results of the I/O page lookup. This + * is only updated when VINF_SUCCESS is returned. + * + * @thread Any. + */ +static int iommuAmdIoPageTableWalk(PPDMDEVINS pDevIns, uint64_t uIova, uint8_t fPerm, uint16_t idDevice, PCDTE_T pDte, + IOMMUOP enmOp, PIOPAGELOOKUP pPageLookup) +{ + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + Assert(pDte->n.u1Valid); + Assert(!(uIova & X86_PAGE_4K_OFFSET_MASK)); + + /* The virtual address bits indexing table. */ + static uint8_t const s_acIovaLevelShifts[] = { 0, 12, 21, 30, 39, 48, 57, 0 }; + AssertCompile(RT_ELEMENTS(s_acIovaLevelShifts) > IOMMU_MAX_HOST_PT_LEVEL); + + /* + * Traverse the I/O page table starting with the page directory in the DTE. + * + * The Valid (Present bit), Translation Valid and Mode (Next-Level bits) in + * the DTE have been validated already, see iommuAmdPreTranslateChecks. + */ + IOPTENTITY_T PtEntity; + PtEntity.u64 = pDte->au64[0]; + for (;;) + { + uint8_t const uLevel = PtEntity.n.u3NextLevel; + + /* Read the page table entity at the current level. */ + { + Assert(uLevel > 0 && uLevel < RT_ELEMENTS(s_acIovaLevelShifts)); + Assert(uLevel <= IOMMU_MAX_HOST_PT_LEVEL); + uint16_t const idxPte = (uIova >> s_acIovaLevelShifts[uLevel]) & UINT64_C(0x1ff); + uint64_t const offPte = idxPte << 3; + RTGCPHYS const GCPhysPtEntity = (PtEntity.u64 & IOMMU_PTENTITY_ADDR_MASK) + offPte; + int rc = PDMDevHlpPCIPhysRead(pDevIns, GCPhysPtEntity, &PtEntity.u64, sizeof(PtEntity)); + if (RT_FAILURE(rc)) + { + LogFunc(("Failed to read page table entry at %#RGp. rc=%Rrc -> PageTabHwError\n", GCPhysPtEntity, rc)); + EVT_PAGE_TAB_HW_ERR_T EvtPageTabHwErr; + iommuAmdPageTabHwErrorEventInit(idDevice, pDte->n.u16DomainId, GCPhysPtEntity, enmOp, &EvtPageTabHwErr); + iommuAmdPageTabHwErrorEventRaise(pDevIns, enmOp, &EvtPageTabHwErr); + return VERR_IOMMU_IPE_2; + } + } + + /* Check present bit. */ + if (PtEntity.n.u1Present) + { /* likely */ } + else + { + LogFunc(("Page table entry not present. idDevice=%#x uIova=%#RX64 -> IOPF\n", idDevice, uIova)); + EVT_IO_PAGE_FAULT_T EvtIoPageFault; + iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, uIova, false /* fPresent */, false /* fRsvdNotZero */, + false /* fPermDenied */, enmOp, &EvtIoPageFault); + iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault, + kIoPageFaultType_PermDenied); + return VERR_IOMMU_ADDR_TRANSLATION_FAILED; + } + + /* Validate the encoding of the next level. */ + uint8_t const uNextLevel = PtEntity.n.u3NextLevel; +#if IOMMU_MAX_HOST_PT_LEVEL < 6 + if (uNextLevel <= IOMMU_MAX_HOST_PT_LEVEL) + { /* likely */ } + else + { + LogFunc(("Next-level/paging-mode field of the paging entity invalid. uNextLevel=%#x -> IOPF\n", uNextLevel)); + EVT_IO_PAGE_FAULT_T EvtIoPageFault; + iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, uIova, true /* fPresent */, true /* fRsvdNotZero */, + false /* fPermDenied */, enmOp, &EvtIoPageFault); + iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault, + kIoPageFaultType_PteInvalidLvlEncoding); + return VERR_IOMMU_ADDR_TRANSLATION_FAILED; + } +#endif + + /* Check reserved bits. */ + uint64_t const fRsvdMask = uNextLevel == 0 || uNextLevel == 7 ? IOMMU_PTE_RSVD_MASK : IOMMU_PDE_RSVD_MASK; + if (!(PtEntity.u64 & fRsvdMask)) + { /* likely */ } + else + { + LogFunc(("Page table entity (%#RX64 level=%u) reserved bits set -> IOPF\n", PtEntity.u64, uNextLevel)); + EVT_IO_PAGE_FAULT_T EvtIoPageFault; + iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, uIova, true /* fPresent */, true /* fRsvdNotZero */, + false /* fPermDenied */, enmOp, &EvtIoPageFault); + iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault, + kIoPageFaultType_PteRsvdNotZero); + return VERR_IOMMU_ADDR_TRANSLATION_FAILED; + } + + /* Check permission bits. */ + uint8_t const fPtePerm = (PtEntity.u64 >> IOMMU_IO_PERM_SHIFT) & IOMMU_IO_PERM_MASK; + if ((fPerm & fPtePerm) == fPerm) + { /* likely */ } + else + { + LogFunc(("Page table entry access denied. idDevice=%#x fPerm=%#x fPtePerm=%#x -> IOPF\n", idDevice, fPerm, fPtePerm)); + EVT_IO_PAGE_FAULT_T EvtIoPageFault; + iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, uIova, true /* fPresent */, false /* fRsvdNotZero */, + true /* fPermDenied */, enmOp, &EvtIoPageFault); + iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault, + kIoPageFaultType_PermDenied); + return VERR_IOMMU_ADDR_ACCESS_DENIED; + } + + /* If the next level is 0 or 7, this is the final level PTE. */ + if (uNextLevel == 0) + { + /* The page size of the translation is the default size for the level. */ + uint8_t const cShift = s_acIovaLevelShifts[uLevel]; + RTGCPHYS const GCPhysPte = PtEntity.u64 & IOMMU_PTENTITY_ADDR_MASK; + pPageLookup->GCPhysSpa = GCPhysPte & X86_GET_PAGE_BASE_MASK(cShift); + pPageLookup->cShift = cShift; + pPageLookup->fPerm = fPtePerm; + return VINF_SUCCESS; + } + if (uNextLevel == 7) + { + /* The default page size of the translation is overridden. */ + uint8_t cShift = X86_PAGE_4K_SHIFT; + RTGCPHYS const GCPhysPte = PtEntity.u64 & IOMMU_PTENTITY_ADDR_MASK; + while (GCPhysPte & RT_BIT_64(cShift++)) + ; + + /* The page size must be larger than the default size and lower than the default size of the higher level. */ + if ( cShift > s_acIovaLevelShifts[uLevel] + && cShift < s_acIovaLevelShifts[uLevel + 1]) + { + pPageLookup->GCPhysSpa = GCPhysPte & X86_GET_PAGE_BASE_MASK(cShift); + pPageLookup->cShift = cShift; + pPageLookup->fPerm = fPtePerm; + STAM_COUNTER_INC(&pThis->StatNonStdPageSize); NOREF(pThis); + return VINF_SUCCESS; + } + + LogFunc(("Page size invalid. idDevice=%#x cShift=%u -> IOPF\n", idDevice, cShift)); + EVT_IO_PAGE_FAULT_T EvtIoPageFault; + iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, uIova, true /* fPresent */, false /* fRsvdNotZero */, + false /* fPermDenied */, enmOp, &EvtIoPageFault); + iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault, + kIoPageFaultType_PteInvalidPageSize); + return VERR_IOMMU_ADDR_TRANSLATION_FAILED; + } + + /* Validate level transition. */ + if (uNextLevel < uLevel) + { /* likely */ } + else + { + LogFunc(("Next level (%#x) must be less than the current level (%#x) -> IOPF\n", uNextLevel, uLevel)); + EVT_IO_PAGE_FAULT_T EvtIoPageFault; + iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, uIova, true /* fPresent */, false /* fRsvdNotZero */, + false /* fPermDenied */, enmOp, &EvtIoPageFault); + iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault, + kIoPageFaultType_PteInvalidLvlEncoding); + return VERR_IOMMU_ADDR_TRANSLATION_FAILED; + } + + /* Ensure IOVA bits of skipped levels (if any) are zero. */ + uint64_t const fIovaSkipMask = IOMMU_GET_MAX_VALID_IOVA(uLevel - 1) - IOMMU_GET_MAX_VALID_IOVA(uNextLevel); + if (!(uIova & fIovaSkipMask)) + { /* likely */ } + else + { + LogFunc(("IOVA of skipped levels are not zero. uIova=%#RX64 fSkipMask=%#RX64 -> IOPF\n", uIova, fIovaSkipMask)); + EVT_IO_PAGE_FAULT_T EvtIoPageFault; + iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, uIova, true /* fPresent */, false /* fRsvdNotZero */, + false /* fPermDenied */, enmOp, &EvtIoPageFault); + iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault, + kIoPageFaultType_SkippedLevelIovaNotZero); + return VERR_IOMMU_ADDR_TRANSLATION_FAILED; + } + + /* Traverse to the next level. */ + } +} + + +/** + * Page lookup callback for finding an I/O page from guest memory. + * + * @returns VBox status code. + * @retval VINF_SUCCESS when the page is found and has the right permissions. + * @retval VERR_IOMMU_ADDR_TRANSLATION_FAILED when address translation fails. + * @retval VERR_IOMMU_ADDR_ACCESS_DENIED when the page is found but permissions are + * insufficient to what is requested. + * + * @param pDevIns The IOMMU instance data. + * @param uIovaPage The I/O virtual address to lookup in the cache (must be + * 4K aligned). + * @param fPerm The I/O permissions for this access, see + * IOMMU_IO_PERM_XXX. + * @param pAux The auxiliary information required during lookup. + * @param pPageLookup Where to store the looked up I/O page. + */ +static DECLCALLBACK(int) iommuAmdDteLookupPage(PPDMDEVINS pDevIns, uint64_t uIovaPage, uint8_t fPerm, PCIOMMUOPAUX pAux, + PIOPAGELOOKUP pPageLookup) +{ + AssertPtr(pAux); + AssertPtr(pPageLookup); + Assert(!(uIovaPage & X86_PAGE_4K_OFFSET_MASK)); + + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + STAM_PROFILE_ADV_START(&pThis->StatProfDteLookup, a); + int rc = iommuAmdIoPageTableWalk(pDevIns, uIovaPage, fPerm, pAux->idDevice, pAux->pDte, pAux->enmOp, pPageLookup); + STAM_PROFILE_ADV_STOP(&pThis->StatProfDteLookup, a); NOREF(pThis); + return rc; +} + + +/** + * Looks up a range of I/O virtual addresses. + * + * @returns VBox status code. + * @param pDevIns The IOMMU instance data. + * @param pfnIoPageLookup The lookup function to use. + * @param pAddrIn The I/O address range to lookup. + * @param pAux The auxiliary information required by the lookup + * function. + * @param pAddrOut Where to store the translated I/O address page + * lookup. + * @param pcbContiguous Where to store the size of the access. + */ +static int iommuAmdLookupIoAddrRange(PPDMDEVINS pDevIns, PFNIOPAGELOOKUP pfnIoPageLookup, PCIOADDRRANGE pAddrIn, + PCIOMMUOPAUX pAux, PIOPAGELOOKUP pAddrOut, size_t *pcbContiguous) +{ + int rc; + size_t const cbIova = pAddrIn->cb; + uint8_t const fPerm = pAddrIn->fPerm; + uint64_t const uIova = pAddrIn->uAddr; + RTGCPHYS GCPhysSpa = NIL_RTGCPHYS; + size_t cbRemaining = cbIova; + uint64_t uIovaPage = pAddrIn->uAddr & X86_PAGE_4K_BASE_MASK; + uint64_t offIova = pAddrIn->uAddr & X86_PAGE_4K_OFFSET_MASK; + size_t const cbPage = X86_PAGE_4K_SIZE; + + IOPAGELOOKUP PageLookupPrev; + RT_ZERO(PageLookupPrev); + for (;;) + { + /* Lookup the physical page corresponding to the I/O virtual address. */ + IOPAGELOOKUP PageLookup; + rc = pfnIoPageLookup(pDevIns, uIovaPage, fPerm, pAux, &PageLookup); + if (RT_SUCCESS(rc)) + { + /* + * Validate results of the translation. + */ + /* The IOTLB cache preserves the original page sizes even though the IOVAs are split into 4K pages. */ + Assert(PageLookup.cShift >= X86_PAGE_4K_SHIFT && PageLookup.cShift <= 51); + Assert( pfnIoPageLookup != iommuAmdDteLookupPage + || !(PageLookup.GCPhysSpa & X86_GET_PAGE_OFFSET_MASK(PageLookup.cShift))); + Assert((PageLookup.fPerm & fPerm) == fPerm); + + /* Store the translated address before continuing to access more pages. */ + if (cbRemaining == cbIova) + { + uint64_t const offSpa = uIova & X86_GET_PAGE_OFFSET_MASK(PageLookup.cShift); + GCPhysSpa = PageLookup.GCPhysSpa | offSpa; + } + /* + * Check if translated address results in a physically contiguous region. + * + * Also ensure that the permissions for all pages in this range are identical + * because we specify a common permission while adding pages in this range + * to the IOTLB cache. + * + * The page size must also be identical since we need to know how many offset + * bits to copy into the final translated address (while retrieving 4K sized + * pages from the IOTLB cache). + */ + else if ( PageLookup.GCPhysSpa == PageLookupPrev.GCPhysSpa + cbPage + && PageLookup.fPerm == PageLookupPrev.fPerm + && PageLookup.cShift == PageLookupPrev.cShift) + { /* likely */ } + else + { + Assert(cbRemaining > 0); + rc = VERR_OUT_OF_RANGE; + break; + } + + /* Store the page lookup result from the first/previous page. */ + PageLookupPrev = PageLookup; + + /* Check if we need to access more pages. */ + if (cbRemaining > cbPage - offIova) + { + cbRemaining -= (cbPage - offIova); /* Calculate how much more we need to access. */ + uIovaPage += cbPage; /* Update address of the next access. */ + offIova = 0; /* After the first page, remaining pages are accessed from offset 0. */ + } + else + { + /* Caller (PDM) doesn't expect more data accessed than what was requested. */ + cbRemaining = 0; + break; + } + } + else + break; + } + + pAddrOut->GCPhysSpa = GCPhysSpa; /* Update the translated address. */ + pAddrOut->cShift = PageLookupPrev.cShift; /* Update the page size of the lookup. */ + pAddrOut->fPerm = PageLookupPrev.fPerm; /* Update the allowed permissions for this access. */ + *pcbContiguous = cbIova - cbRemaining; /* Update the size of the contiguous memory region. */ + return rc; +} + + +/** + * Looks up an I/O virtual address from the device table. + * + * @returns VBox status code. + * @param pDevIns The IOMMU instance data. + * @param idDevice The device ID (bus, device, function). + * @param uIova The I/O virtual address to lookup. + * @param cbIova The size of the access. + * @param fPerm The I/O permissions for this access, see + * IOMMU_IO_PERM_XXX. + * @param enmOp The IOMMU operation being performed. + * @param pGCPhysSpa Where to store the translated system physical address. + * @param pcbContiguous Where to store the number of contiguous bytes translated + * and permission-checked. + * + * @thread Any. + */ +static int iommuAmdDteLookup(PPDMDEVINS pDevIns, uint16_t idDevice, uint64_t uIova, size_t cbIova, uint8_t fPerm, IOMMUOP enmOp, + PRTGCPHYS pGCPhysSpa, size_t *pcbContiguous) +{ + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + RTGCPHYS GCPhysSpa = NIL_RTGCPHYS; + size_t cbContiguous = 0; + + /* Read the device table entry from memory. */ + DTE_T Dte; + int rc = iommuAmdDteRead(pDevIns, idDevice, enmOp, &Dte); + if (RT_SUCCESS(rc)) + { + if (Dte.n.u1Valid) + { + /* Validate bits 127:0 of the device table entry when DTE.V is 1. */ + uint64_t const fRsvd0 = Dte.au64[0] & ~(IOMMU_DTE_QWORD_0_VALID_MASK & ~IOMMU_DTE_QWORD_0_FEAT_MASK); + uint64_t const fRsvd1 = Dte.au64[1] & ~(IOMMU_DTE_QWORD_1_VALID_MASK & ~IOMMU_DTE_QWORD_1_FEAT_MASK); + if (RT_LIKELY(!fRsvd0 && !fRsvd1)) + { + /* + * Check if the DTE is configured for translating addresses. + * Note: Addresses cannot be subject to exclusion as we do -not- support remote IOTLBs, + * so there's no need to check the address exclusion base/limit here. + */ + rc = iommuAmdPreTranslateChecks(pDevIns, idDevice, uIova, fPerm, &Dte, enmOp); + if (rc == VINF_SUCCESS) + { + IOADDRRANGE AddrIn; + AddrIn.uAddr = uIova; + AddrIn.cb = cbIova; + AddrIn.fPerm = fPerm; + + IOMMUOPAUX Aux; + Aux.enmOp = enmOp; + Aux.pDte = &Dte; + Aux.idDevice = idDevice; + Aux.idDomain = Dte.n.u16DomainId; + + /* Lookup the address from the DTE and I/O page tables.*/ + IOPAGELOOKUP AddrOut; + rc = iommuAmdLookupIoAddrRange(pDevIns, iommuAmdDteLookupPage, &AddrIn, &Aux, &AddrOut, &cbContiguous); + GCPhysSpa = AddrOut.GCPhysSpa; + + /* + * If we stopped since translation resulted in non-contiguous physical addresses + * or permissions aren't identical for all pages in the access, what we translated + * thus far is still valid. + */ + if (rc == VERR_OUT_OF_RANGE) + { + Assert(cbContiguous > 0 && cbContiguous < cbIova); + rc = VINF_SUCCESS; + STAM_COUNTER_INC(&pThis->StatAccessDteNonContig); NOREF(pThis); + } + else if (rc == VERR_IOMMU_ADDR_ACCESS_DENIED) + STAM_COUNTER_INC(&pThis->StatAccessDtePermDenied); + +#ifdef IOMMU_WITH_IOTLBE_CACHE + if (RT_SUCCESS(rc)) + { + /* Update that addresses requires translation (cumulative permissions of DTE and I/O page tables). */ + iommuAmdDteCacheAddOrUpdateFlags(pDevIns, &Dte, idDevice, IOMMU_DTE_CACHE_F_ADDR_TRANSLATE, + 0 /* fAndMask */); + /* Update IOTLB for the contiguous range of I/O virtual addresses. */ + iommuAmdIotlbAddRange(pDevIns, Aux.idDomain, uIova & X86_PAGE_4K_BASE_MASK, cbContiguous, &AddrOut); + } +#endif + } + else if (rc == VINF_IOMMU_ADDR_TRANSLATION_DISABLED) + { + /* + * Translation is disabled for this device (root paging mode is 0). + * GPA=SPA, but the permission bits are important and controls accesses. + */ + GCPhysSpa = uIova; + cbContiguous = cbIova; + rc = VINF_SUCCESS; + +#ifdef IOMMU_WITH_IOTLBE_CACHE + /* Update that addresses permissions of DTE apply (but omit address translation). */ + iommuAmdDteCacheAddOrUpdateFlags(pDevIns, &Dte, idDevice, IOMMU_DTE_CACHE_F_IO_PERM, + IOMMU_DTE_CACHE_F_ADDR_TRANSLATE); +#endif + } + else + { + /* Address translation failed or access is denied. */ + Assert(rc == VERR_IOMMU_ADDR_ACCESS_DENIED || rc == VERR_IOMMU_ADDR_TRANSLATION_FAILED); + GCPhysSpa = NIL_RTGCPHYS; + cbContiguous = 0; + STAM_COUNTER_INC(&pThis->StatAccessDtePermDenied); + } + } + else + { + /* Invalid reserved bits in the DTE, raise an error event. */ + LogFunc(("Invalid DTE reserved bits (u64[0]=%#RX64 u64[1]=%#RX64) -> Illegal DTE\n", fRsvd0, fRsvd1)); + EVT_ILLEGAL_DTE_T Event; + iommuAmdIllegalDteEventInit(idDevice, uIova, true /* fRsvdNotZero */, enmOp, &Event); + iommuAmdIllegalDteEventRaise(pDevIns, enmOp, &Event, kIllegalDteType_RsvdNotZero); + rc = VERR_IOMMU_ADDR_TRANSLATION_FAILED; + } + } + else + { + /* + * The DTE is not valid, forward addresses untranslated. + * See AMD IOMMU spec. "Table 5: Feature Enablement for Address Translation". + */ + GCPhysSpa = uIova; + cbContiguous = cbIova; + } + } + else + { + LogFunc(("Failed to read device table entry. idDevice=%#x rc=%Rrc\n", idDevice, rc)); + rc = VERR_IOMMU_ADDR_TRANSLATION_FAILED; + } + + *pGCPhysSpa = GCPhysSpa; + *pcbContiguous = cbContiguous; + AssertMsg(rc != VINF_SUCCESS || cbContiguous > 0, ("cbContiguous=%zu\n", cbContiguous)); + return rc; +} + + +#ifdef IOMMU_WITH_IOTLBE_CACHE +/** + * I/O page lookup callback for finding an I/O page from the IOTLB. + * + * @returns VBox status code. + * @retval VINF_SUCCESS when the page is found and has the right permissions. + * @retval VERR_NOT_FOUND when the page is not found. + * @retval VERR_IOMMU_ADDR_ACCESS_DENIED when the page is found but permissions are + * insufficient to what is requested. + * + * @param pDevIns The IOMMU instance data. + * @param uIovaPage The I/O virtual address to lookup in the cache (must be + * 4K aligned). + * @param fPerm The I/O permissions for this access, see + * IOMMU_IO_PERM_XXX. + * @param pAux The auxiliary information required during lookup. + * @param pPageLookup Where to store the looked up I/O page. + */ +static DECLCALLBACK(int) iommuAmdCacheLookupPage(PPDMDEVINS pDevIns, uint64_t uIovaPage, uint8_t fPerm, PCIOMMUOPAUX pAux, + PIOPAGELOOKUP pPageLookup) +{ + Assert(pAux); + Assert(pPageLookup); + Assert(!(uIovaPage & X86_PAGE_4K_OFFSET_MASK)); + + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3); + + STAM_PROFILE_ADV_START(&pThis->StatProfIotlbeLookup, a); + PCIOTLBE pIotlbe = iommuAmdIotlbLookup(pThis, pThisR3, pAux->idDomain, uIovaPage); + STAM_PROFILE_ADV_STOP(&pThis->StatProfIotlbeLookup, a); + if (pIotlbe) + { + *pPageLookup = pIotlbe->PageLookup; + if ((pPageLookup->fPerm & fPerm) == fPerm) + { + STAM_COUNTER_INC(&pThis->StatAccessCacheHit); + return VINF_SUCCESS; + } + return VERR_IOMMU_ADDR_ACCESS_DENIED; + } + return VERR_NOT_FOUND; +} + + +/** + * Lookups a memory access from the IOTLB cache. + * + * @returns VBox status code. + * @retval VINF_SUCCESS if the access was cached and permissions are verified. + * @retval VERR_OUT_OF_RANGE if the access resulted in a non-contiguous physical + * address region. + * @retval VERR_NOT_FOUND if the access was not cached. + * @retval VERR_IOMMU_ADDR_ACCESS_DENIED if the access was cached but permissions + * are insufficient. + * + * @param pDevIns The IOMMU instance data. + * @param idDevice The device ID (bus, device, function). + * @param uIova The I/O virtual address to lookup. + * @param cbIova The size of the access. + * @param fPerm The I/O permissions for this access, see + * IOMMU_IO_PERM_XXX. + * @param enmOp The IOMMU operation being performed. + * @param pGCPhysSpa Where to store the translated system physical address. + * @param pcbContiguous Where to store the number of contiguous bytes translated + * and permission-checked. + */ +static int iommuAmdIotlbCacheLookup(PPDMDEVINS pDevIns, uint16_t idDevice, uint64_t uIova, size_t cbIova, uint8_t fPerm, + IOMMUOP enmOp, PRTGCPHYS pGCPhysSpa, size_t *pcbContiguous) +{ + int rc; + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + +#define IOMMU_IOTLB_LOOKUP_FAILED(a_rc) \ + do { \ + *pGCPhysSpa = NIL_RTGCPHYS; \ + *pcbContiguous = 0; \ + rc = (a_rc); \ + } while (0) + + /* + * We hold the cache lock across both the DTE and the IOTLB lookups (if any) because + * we don't want the DTE cache to be invalidate while we perform IOTBL lookups. + */ + IOMMU_CACHE_LOCK(pDevIns, pThis); + + /* Lookup the DTE cache entry. */ + uint16_t const idxDteCache = iommuAmdDteCacheEntryLookup(pThis, idDevice); + if (idxDteCache < RT_ELEMENTS(pThis->aDteCache)) + { + PCDTECACHE pDteCache = &pThis->aDteCache[idxDteCache]; + if ((pDteCache->fFlags & (IOMMU_DTE_CACHE_F_PRESENT | IOMMU_DTE_CACHE_F_VALID | IOMMU_DTE_CACHE_F_ADDR_TRANSLATE)) + == (IOMMU_DTE_CACHE_F_PRESENT | IOMMU_DTE_CACHE_F_VALID | IOMMU_DTE_CACHE_F_ADDR_TRANSLATE)) + { + /* Lookup IOTLB entries. */ + IOADDRRANGE AddrIn; + AddrIn.uAddr = uIova; + AddrIn.cb = cbIova; + AddrIn.fPerm = fPerm; + + IOMMUOPAUX Aux; + Aux.enmOp = enmOp; + Aux.pDte = NULL; + Aux.idDevice = idDevice; + Aux.idDomain = pDteCache->idDomain; + + IOPAGELOOKUP AddrOut; + rc = iommuAmdLookupIoAddrRange(pDevIns, iommuAmdCacheLookupPage, &AddrIn, &Aux, &AddrOut, pcbContiguous); + *pGCPhysSpa = AddrOut.GCPhysSpa; + Assert(*pcbContiguous <= cbIova); + } + else if ((pDteCache->fFlags & (IOMMU_DTE_CACHE_F_PRESENT | IOMMU_DTE_CACHE_F_VALID | IOMMU_DTE_CACHE_F_IO_PERM)) + == (IOMMU_DTE_CACHE_F_PRESENT | IOMMU_DTE_CACHE_F_VALID | IOMMU_DTE_CACHE_F_IO_PERM)) + { + /* Address translation is disabled, but DTE permissions apply. */ + Assert(!(pDteCache->fFlags & IOMMU_DTE_CACHE_F_ADDR_TRANSLATE)); + uint8_t const fDtePerm = (pDteCache->fFlags >> IOMMU_DTE_CACHE_F_IO_PERM_SHIFT) & IOMMU_DTE_CACHE_F_IO_PERM_MASK; + if ((fDtePerm & fPerm) == fPerm) + { + *pGCPhysSpa = uIova; + *pcbContiguous = cbIova; + rc = VINF_SUCCESS; + } + else + IOMMU_IOTLB_LOOKUP_FAILED(VERR_IOMMU_ADDR_ACCESS_DENIED); + } + else if (pDteCache->fFlags & IOMMU_DTE_CACHE_F_PRESENT) + { + /* Forward addresses untranslated, without checking permissions. */ + *pGCPhysSpa = uIova; + *pcbContiguous = cbIova; + rc = VINF_SUCCESS; + } + else + IOMMU_IOTLB_LOOKUP_FAILED(VERR_NOT_FOUND); + } + else + IOMMU_IOTLB_LOOKUP_FAILED(VERR_NOT_FOUND); + + IOMMU_CACHE_UNLOCK(pDevIns, pThis); + + return rc; + +#undef IOMMU_IOTLB_LOOKUP_FAILED +} +#endif /* IOMMU_WITH_IOTLBE_CACHE */ + + +/** + * Gets the I/O permission and IOMMU operation type for the given access flags. + * + * @param pThis The shared IOMMU device state. + * @param fFlags The PDM IOMMU flags, PDMIOMMU_MEM_F_XXX. + * @param penmOp Where to store the IOMMU operation. + * @param pfPerm Where to store the IOMMU I/O permission. + * @param fBulk Whether this is a bulk read or write. + */ +DECLINLINE(void) iommuAmdMemAccessGetPermAndOp(PIOMMU pThis, uint32_t fFlags, PIOMMUOP penmOp, uint8_t *pfPerm, bool fBulk) +{ + if (fFlags & PDMIOMMU_MEM_F_WRITE) + { + *penmOp = IOMMUOP_MEM_WRITE; + *pfPerm = IOMMU_IO_PERM_WRITE; +#ifdef VBOX_WITH_STATISTICS + if (!fBulk) + STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMemWrite)); + else + STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMemBulkWrite)); +#else + RT_NOREF2(pThis, fBulk); +#endif + } + else + { + Assert(fFlags & PDMIOMMU_MEM_F_READ); + *penmOp = IOMMUOP_MEM_READ; + *pfPerm = IOMMU_IO_PERM_READ; +#ifdef VBOX_WITH_STATISTICS + if (!fBulk) + STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMemRead)); + else + STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMemBulkRead)); +#else + RT_NOREF2(pThis, fBulk); +#endif + } +} + + +/** + * Memory access transaction from a device. + * + * @returns VBox status code. + * @param pDevIns The IOMMU device instance. + * @param idDevice The device ID (bus, device, function). + * @param uIova The I/O virtual address being accessed. + * @param cbIova The size of the access. + * @param fFlags The access flags, see PDMIOMMU_MEM_F_XXX. + * @param pGCPhysSpa Where to store the translated system physical address. + * @param pcbContiguous Where to store the number of contiguous bytes translated + * and permission-checked. + * + * @thread Any. + */ +static DECLCALLBACK(int) iommuAmdMemAccess(PPDMDEVINS pDevIns, uint16_t idDevice, uint64_t uIova, size_t cbIova, + uint32_t fFlags, PRTGCPHYS pGCPhysSpa, size_t *pcbContiguous) +{ + /* Validate. */ + AssertPtr(pDevIns); + AssertPtr(pGCPhysSpa); + Assert(cbIova > 0); + Assert(!(fFlags & ~PDMIOMMU_MEM_F_VALID_MASK)); + + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + IOMMU_CTRL_T const Ctrl = iommuAmdGetCtrlUnlocked(pThis); + if (Ctrl.n.u1IommuEn) + { + IOMMUOP enmOp; + uint8_t fPerm; + iommuAmdMemAccessGetPermAndOp(pThis, fFlags, &enmOp, &fPerm, false /* fBulk */); + LogFlowFunc(("%s: idDevice=%#x uIova=%#RX64 cb=%zu\n", iommuAmdMemAccessGetPermName(fPerm), idDevice, uIova, cbIova)); + + int rc; +#ifdef IOMMU_WITH_IOTLBE_CACHE + /* Lookup the IOVA from the cache. */ + rc = iommuAmdIotlbCacheLookup(pDevIns, idDevice, uIova, cbIova, fPerm, enmOp, pGCPhysSpa, pcbContiguous); + if (rc == VINF_SUCCESS) + { + /* All pages in the access were found in the cache with sufficient permissions. */ + Assert(*pcbContiguous == cbIova); + Assert(*pGCPhysSpa != NIL_RTGCPHYS); + STAM_COUNTER_INC(&pThis->StatAccessCacheHitFull); + return VINF_SUCCESS; + } + if (rc != VERR_OUT_OF_RANGE) + { /* likely */ } + else + { + /* Access stopped since translations resulted in non-contiguous memory, let caller resume access. */ + Assert(*pcbContiguous > 0 && *pcbContiguous < cbIova); + STAM_COUNTER_INC(&pThis->StatAccessCacheNonContig); + return VINF_SUCCESS; + } + + /* + * Access incomplete as not all pages were in the cache. + * Or permissions were denied for the access (which typically doesn't happen) + * so go through the slower path and raise the required event. + */ + AssertMsg(*pcbContiguous < cbIova, ("Invalid size: cbContiguous=%zu cbIova=%zu\n", *pcbContiguous, cbIova)); + uIova += *pcbContiguous; + cbIova -= *pcbContiguous; + /* We currently are including any permission denied pages as cache misses too.*/ + STAM_COUNTER_INC(&pThis->StatAccessCacheMiss); +#endif + + /* Lookup the IOVA from the device table. */ + rc = iommuAmdDteLookup(pDevIns, idDevice, uIova, cbIova, fPerm, enmOp, pGCPhysSpa, pcbContiguous); + if (RT_SUCCESS(rc)) + { /* likely */ } + else + { + Assert(rc != VERR_OUT_OF_RANGE); + LogFunc(("DTE lookup failed! idDevice=%#x uIova=%#RX64 fPerm=%u cbIova=%zu rc=%#Rrc\n", idDevice, uIova, fPerm, + cbIova, rc)); + } + + return rc; + } + + /* Addresses are forwarded without translation when the IOMMU is disabled. */ + *pGCPhysSpa = uIova; + *pcbContiguous = cbIova; + return VINF_SUCCESS; +} + + +/** + * Memory access bulk (one or more 4K pages) request from a device. + * + * @returns VBox status code. + * @param pDevIns The IOMMU device instance. + * @param idDevice The device ID (bus, device, function). + * @param cIovas The number of addresses being accessed. + * @param pauIovas The I/O virtual addresses for each page being accessed. + * @param fFlags The access flags, see PDMIOMMU_MEM_F_XXX. + * @param paGCPhysSpa Where to store the translated physical addresses. + * + * @thread Any. + */ +static DECLCALLBACK(int) iommuAmdMemBulkAccess(PPDMDEVINS pDevIns, uint16_t idDevice, size_t cIovas, uint64_t const *pauIovas, + uint32_t fFlags, PRTGCPHYS paGCPhysSpa) +{ + /* Validate. */ + AssertPtr(pDevIns); + Assert(cIovas > 0); + AssertPtr(pauIovas); + AssertPtr(paGCPhysSpa); + Assert(!(fFlags & ~PDMIOMMU_MEM_F_VALID_MASK)); + + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + IOMMU_CTRL_T const Ctrl = iommuAmdGetCtrlUnlocked(pThis); + if (Ctrl.n.u1IommuEn) + { + IOMMUOP enmOp; + uint8_t fPerm; + iommuAmdMemAccessGetPermAndOp(pThis, fFlags, &enmOp, &fPerm, true /* fBulk */); + LogFlowFunc(("%s: idDevice=%#x cIovas=%zu\n", iommuAmdMemAccessGetPermName(fPerm), idDevice, cIovas)); + + for (size_t i = 0; i < cIovas; i++) + { + int rc; + size_t cbContig; + +#ifdef IOMMU_WITH_IOTLBE_CACHE + /* Lookup the IOVA from the IOTLB cache. */ + rc = iommuAmdIotlbCacheLookup(pDevIns, idDevice, pauIovas[i], X86_PAGE_SIZE, fPerm, enmOp, &paGCPhysSpa[i], + &cbContig); + if (rc == VINF_SUCCESS) + { + Assert(cbContig == X86_PAGE_SIZE); + Assert(paGCPhysSpa[i] != NIL_RTGCPHYS); + STAM_COUNTER_INC(&pThis->StatAccessCacheHitFull); + continue; + } + Assert(rc == VERR_NOT_FOUND || rc == VERR_IOMMU_ADDR_ACCESS_DENIED); + STAM_COUNTER_INC(&pThis->StatAccessCacheMiss); +#endif + + /* Lookup the IOVA from the device table. */ + rc = iommuAmdDteLookup(pDevIns, idDevice, pauIovas[i], X86_PAGE_SIZE, fPerm, enmOp, &paGCPhysSpa[i], &cbContig); + if (RT_SUCCESS(rc)) + { /* likely */ } + else + { + LogFunc(("Failed! idDevice=%#x uIova=%#RX64 fPerm=%u rc=%Rrc\n", idDevice, pauIovas[i], fPerm, rc)); + return rc; + } + Assert(cbContig == X86_PAGE_SIZE); + } + } + else + { + /* Addresses are forwarded without translation when the IOMMU is disabled. */ + for (size_t i = 0; i < cIovas; i++) + paGCPhysSpa[i] = pauIovas[i]; + } + + return VINF_SUCCESS; +} + + +/** + * Reads an interrupt remapping table entry from guest memory given its DTE. + * + * @returns VBox status code. + * @param pDevIns The IOMMU device instance. + * @param idDevice The device ID (bus, device, function). + * @param pDte The device table entry. + * @param GCPhysIn The source MSI address (used for reporting errors). + * @param uDataIn The source MSI data. + * @param enmOp The IOMMU operation being performed. + * @param pIrte Where to store the interrupt remapping table entry. + * + * @thread Any. + */ +static int iommuAmdIrteRead(PPDMDEVINS pDevIns, uint16_t idDevice, PCDTE_T pDte, RTGCPHYS GCPhysIn, uint32_t uDataIn, + IOMMUOP enmOp, PIRTE_T pIrte) +{ + /* Ensure the IRTE length is valid. */ + Assert(pDte->n.u4IntrTableLength < IOMMU_DTE_INTR_TAB_LEN_MAX); + + RTGCPHYS const GCPhysIntrTable = pDte->au64[2] & IOMMU_DTE_IRTE_ROOT_PTR_MASK; + uint16_t const cbIntrTable = IOMMU_DTE_GET_INTR_TAB_LEN(pDte); + uint16_t const offIrte = IOMMU_GET_IRTE_OFF(uDataIn); + RTGCPHYS const GCPhysIrte = GCPhysIntrTable + offIrte; + + /* Ensure the IRTE falls completely within the interrupt table. */ + if (offIrte + sizeof(IRTE_T) <= cbIntrTable) + { /* likely */ } + else + { + LogFunc(("IRTE exceeds table length (GCPhysIntrTable=%#RGp cbIntrTable=%u offIrte=%#x uDataIn=%#x) -> IOPF\n", + GCPhysIntrTable, cbIntrTable, offIrte, uDataIn)); + + EVT_IO_PAGE_FAULT_T EvtIoPageFault; + iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, GCPhysIn, false /* fPresent */, false /* fRsvdNotZero */, + false /* fPermDenied */, enmOp, &EvtIoPageFault); + iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault, + kIoPageFaultType_IrteAddrInvalid); + return VERR_IOMMU_ADDR_TRANSLATION_FAILED; + } + + /* Read the IRTE from memory. */ + Assert(!(GCPhysIrte & 3)); + int rc = PDMDevHlpPCIPhysRead(pDevIns, GCPhysIrte, pIrte, sizeof(*pIrte)); + if (RT_SUCCESS(rc)) + return VINF_SUCCESS; + + /** @todo The IOMMU spec. does not tell what kind of error is reported in this + * situation. Is it an I/O page fault or a device table hardware error? + * There's no interrupt table hardware error event, but it's unclear what + * we should do here. */ + LogFunc(("Failed to read interrupt table entry at %#RGp. rc=%Rrc -> ???\n", GCPhysIrte, rc)); + return VERR_IOMMU_IPE_4; +} + + +/** + * Remaps the interrupt using the interrupt remapping table. + * + * @returns VBox status code. + * @param pDevIns The IOMMU instance data. + * @param idDevice The device ID (bus, device, function). + * @param pDte The device table entry. + * @param enmOp The IOMMU operation being performed. + * @param pMsiIn The source MSI. + * @param pMsiOut Where to store the remapped MSI. + * + * @thread Any. + */ +static int iommuAmdIntrRemap(PPDMDEVINS pDevIns, uint16_t idDevice, PCDTE_T pDte, IOMMUOP enmOp, PCMSIMSG pMsiIn, + PMSIMSG pMsiOut) +{ + Assert(pDte->n.u2IntrCtrl == IOMMU_INTR_CTRL_REMAP); + + IRTE_T Irte; + uint32_t const uMsiInData = pMsiIn->Data.u32; + int rc = iommuAmdIrteRead(pDevIns, idDevice, pDte, pMsiIn->Addr.u64, uMsiInData, enmOp, &Irte); + if (RT_SUCCESS(rc)) + { + if (Irte.n.u1RemapEnable) + { + if (!Irte.n.u1GuestMode) + { + if (Irte.n.u3IntrType <= VBOX_MSI_DELIVERY_MODE_LOWEST_PRIO) + { + iommuAmdIrteRemapMsi(pMsiIn, pMsiOut, &Irte); +#ifdef IOMMU_WITH_IRTE_CACHE + iommuAmdIrteCacheAdd(pDevIns, idDevice, IOMMU_GET_IRTE_OFF(uMsiInData), &Irte); +#endif + return VINF_SUCCESS; + } + + LogFunc(("Interrupt type (%#x) invalid -> IOPF\n", Irte.n.u3IntrType)); + EVT_IO_PAGE_FAULT_T EvtIoPageFault; + iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, pMsiIn->Addr.u64, Irte.n.u1RemapEnable, + true /* fRsvdNotZero */, false /* fPermDenied */, enmOp, &EvtIoPageFault); + iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, &Irte, enmOp, &EvtIoPageFault, + kIoPageFaultType_IrteRsvdIntType); + return VERR_IOMMU_ADDR_TRANSLATION_FAILED; + } + + LogFunc(("Guest mode not supported -> IOPF\n")); + EVT_IO_PAGE_FAULT_T EvtIoPageFault; + iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, pMsiIn->Addr.u64, Irte.n.u1RemapEnable, + true /* fRsvdNotZero */, false /* fPermDenied */, enmOp, &EvtIoPageFault); + iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, &Irte, enmOp, &EvtIoPageFault, kIoPageFaultType_IrteRsvdNotZero); + return VERR_IOMMU_ADDR_TRANSLATION_FAILED; + } + + LogFunc(("Remapping disabled -> IOPF\n")); + EVT_IO_PAGE_FAULT_T EvtIoPageFault; + iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, pMsiIn->Addr.u64, Irte.n.u1RemapEnable, + false /* fRsvdNotZero */, false /* fPermDenied */, enmOp, &EvtIoPageFault); + iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, &Irte, enmOp, &EvtIoPageFault, kIoPageFaultType_IrteRemapEn); + return VERR_IOMMU_ADDR_TRANSLATION_FAILED; + } + + return rc; +} + + +/** + * Looks up an MSI interrupt from the interrupt remapping table. + * + * @returns VBox status code. + * @param pDevIns The IOMMU instance data. + * @param idDevice The device ID (bus, device, function). + * @param enmOp The IOMMU operation being performed. + * @param pMsiIn The source MSI. + * @param pMsiOut Where to store the remapped MSI. + * + * @thread Any. + */ +static int iommuAmdIntrTableLookup(PPDMDEVINS pDevIns, uint16_t idDevice, IOMMUOP enmOp, PCMSIMSG pMsiIn, PMSIMSG pMsiOut) +{ + LogFlowFunc(("idDevice=%#x (%#x:%#x:%#x) enmOp=%u\n", idDevice, ((idDevice >> VBOX_PCI_BUS_SHIFT) & VBOX_PCI_BUS_MASK), + ((idDevice >> VBOX_PCI_DEVFN_DEV_SHIFT) & VBOX_PCI_DEVFN_DEV_MASK), (idDevice & VBOX_PCI_DEVFN_FUN_MASK), + enmOp)); + + /* Read the device table entry from memory. */ + DTE_T Dte; + int rc = iommuAmdDteRead(pDevIns, idDevice, enmOp, &Dte); + if (RT_SUCCESS(rc)) + { +#ifdef IOMMU_WITH_IRTE_CACHE + iommuAmdDteCacheAdd(pDevIns, idDevice, &Dte); +#endif + /* If the DTE is not valid, all interrupts are forwarded without remapping. */ + if (Dte.n.u1IntrMapValid) + { + /* Validate bits 255:128 of the device table entry when DTE.IV is 1. */ + uint64_t const fRsvd0 = Dte.au64[2] & ~IOMMU_DTE_QWORD_2_VALID_MASK; + uint64_t const fRsvd1 = Dte.au64[3] & ~IOMMU_DTE_QWORD_3_VALID_MASK; + if (RT_LIKELY(!fRsvd0 && !fRsvd1)) + { /* likely */ } + else + { + LogFunc(("Invalid reserved bits in DTE (u64[2]=%#RX64 u64[3]=%#RX64) -> Illegal DTE\n", fRsvd0, fRsvd1)); + EVT_ILLEGAL_DTE_T Event; + iommuAmdIllegalDteEventInit(idDevice, pMsiIn->Addr.u64, true /* fRsvdNotZero */, enmOp, &Event); + iommuAmdIllegalDteEventRaise(pDevIns, enmOp, &Event, kIllegalDteType_RsvdNotZero); + return VERR_IOMMU_INTR_REMAP_FAILED; + } + + /* + * LINT0/LINT1 pins cannot be driven by PCI(e) devices. Perhaps for a Southbridge + * that's connected through HyperTransport it might be possible; but for us, it + * doesn't seem we need to specially handle these pins. + */ + + /* + * Validate the MSI source address. + * + * 64-bit MSIs are supported by the PCI and AMD IOMMU spec. However as far as the + * CPU is concerned, the MSI region is fixed and we must ensure no other device + * claims the region as I/O space. + * + * See PCI spec. 6.1.4. "Message Signaled Interrupt (MSI) Support". + * See AMD IOMMU spec. 2.8 "IOMMU Interrupt Support". + * See Intel spec. 10.11.1 "Message Address Register Format". + */ + if ((pMsiIn->Addr.u64 & VBOX_MSI_ADDR_ADDR_MASK) == VBOX_MSI_ADDR_BASE) + { + /* + * The IOMMU remaps fixed and arbitrated interrupts using the IRTE. + * See AMD IOMMU spec. "2.2.5.1 Interrupt Remapping Tables, Guest Virtual APIC Not Enabled". + */ + uint8_t const u8DeliveryMode = pMsiIn->Data.n.u3DeliveryMode; + bool fPassThru = false; + switch (u8DeliveryMode) + { + case VBOX_MSI_DELIVERY_MODE_FIXED: + case VBOX_MSI_DELIVERY_MODE_LOWEST_PRIO: + { + uint8_t const uIntrCtrl = Dte.n.u2IntrCtrl; + if (uIntrCtrl == IOMMU_INTR_CTRL_REMAP) + { + /* Validate the encoded interrupt table length when IntCtl specifies remapping. */ + uint8_t const uIntrTabLen = Dte.n.u4IntrTableLength; + if (uIntrTabLen < IOMMU_DTE_INTR_TAB_LEN_MAX) + { + /* + * We don't support guest interrupt remapping yet. When we do, we'll need to + * check Ctrl.u1GstVirtApicEn and use the guest Virtual APIC Table Root Pointer + * in the DTE rather than the Interrupt Root Table Pointer. Since the caller + * already reads the control register, add that as a parameter when we eventually + * support guest interrupt remapping. For now, just assert. + */ + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + Assert(!pThis->ExtFeat.n.u1GstVirtApicSup); + NOREF(pThis); + + return iommuAmdIntrRemap(pDevIns, idDevice, &Dte, enmOp, pMsiIn, pMsiOut); + } + + LogFunc(("Invalid interrupt table length %#x -> Illegal DTE\n", uIntrTabLen)); + EVT_ILLEGAL_DTE_T Event; + iommuAmdIllegalDteEventInit(idDevice, pMsiIn->Addr.u64, false /* fRsvdNotZero */, enmOp, &Event); + iommuAmdIllegalDteEventRaise(pDevIns, enmOp, &Event, kIllegalDteType_RsvdIntTabLen); + return VERR_IOMMU_INTR_REMAP_FAILED; + } + + if (uIntrCtrl == IOMMU_INTR_CTRL_FWD_UNMAPPED) + { + fPassThru = true; + break; + } + + if (uIntrCtrl == IOMMU_INTR_CTRL_TARGET_ABORT) + { + LogRelMax(10, ("%s: Remapping disallowed for fixed/arbitrated interrupt %#x -> Target abort\n", + IOMMU_LOG_PFX, pMsiIn->Data.n.u8Vector)); + iommuAmdSetPciTargetAbort(pDevIns); + return VERR_IOMMU_INTR_REMAP_DENIED; + } + + Assert(uIntrCtrl == IOMMU_INTR_CTRL_RSVD); /* Paranoia. */ + LogRelMax(10, ("%s: IntCtl mode invalid %#x -> Illegal DTE\n", IOMMU_LOG_PFX, uIntrCtrl)); + EVT_ILLEGAL_DTE_T Event; + iommuAmdIllegalDteEventInit(idDevice, pMsiIn->Addr.u64, true /* fRsvdNotZero */, enmOp, &Event); + iommuAmdIllegalDteEventRaise(pDevIns, enmOp, &Event, kIllegalDteType_RsvdIntCtl); + return VERR_IOMMU_INTR_REMAP_FAILED; + } + + /* SMIs are passed through unmapped. We don't implement SMI filters. */ + case VBOX_MSI_DELIVERY_MODE_SMI: fPassThru = true; break; + case VBOX_MSI_DELIVERY_MODE_NMI: fPassThru = Dte.n.u1NmiPassthru; break; + case VBOX_MSI_DELIVERY_MODE_INIT: fPassThru = Dte.n.u1InitPassthru; break; + case VBOX_MSI_DELIVERY_MODE_EXT_INT: fPassThru = Dte.n.u1ExtIntPassthru; break; + default: + { + LogRelMax(10, ("%s: MSI data delivery mode invalid %#x -> Target abort\n", IOMMU_LOG_PFX, + u8DeliveryMode)); + iommuAmdSetPciTargetAbort(pDevIns); + return VERR_IOMMU_INTR_REMAP_FAILED; + } + } + + /* + * For those other than fixed and arbitrated interrupts, destination mode must be 0 (physical). + * See AMD IOMMU spec. The note below Table 19: "IOMMU Controls and Actions for Upstream Interrupts". + */ + if ( u8DeliveryMode <= VBOX_MSI_DELIVERY_MODE_LOWEST_PRIO + || !pMsiIn->Addr.n.u1DestMode) + { + if (fPassThru) + { + *pMsiOut = *pMsiIn; + return VINF_SUCCESS; + } + LogRelMax(10, ("%s: Remapping/passthru disallowed for interrupt %#x -> Target abort\n", IOMMU_LOG_PFX, + pMsiIn->Data.n.u8Vector)); + } + else + LogRelMax(10, ("%s: Logical destination mode invalid for delivery mode %#x\n -> Target abort\n", + IOMMU_LOG_PFX, u8DeliveryMode)); + + iommuAmdSetPciTargetAbort(pDevIns); + return VERR_IOMMU_INTR_REMAP_DENIED; + } + else + { + /** @todo should be cause a PCI target abort here? */ + LogRelMax(10, ("%s: MSI address region invalid %#RX64\n", IOMMU_LOG_PFX, pMsiIn->Addr.u64)); + return VERR_IOMMU_INTR_REMAP_FAILED; + } + } + else + { + LogFlowFunc(("DTE interrupt map not valid\n")); + *pMsiOut = *pMsiIn; + return VINF_SUCCESS; + } + } + + LogFunc(("Failed to read device table entry. idDevice=%#x rc=%Rrc\n", idDevice, rc)); + return VERR_IOMMU_INTR_REMAP_FAILED; +} + + +/** + * Interrupt remap request from a device. + * + * @returns VBox status code. + * @param pDevIns The IOMMU device instance. + * @param idDevice The device ID (bus, device, function). + * @param pMsiIn The source MSI. + * @param pMsiOut Where to store the remapped MSI. + */ +static DECLCALLBACK(int) iommuAmdMsiRemap(PPDMDEVINS pDevIns, uint16_t idDevice, PCMSIMSG pMsiIn, PMSIMSG pMsiOut) +{ + /* Validate. */ + Assert(pDevIns); + Assert(pMsiIn); + Assert(pMsiOut); + + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + + /* If this MSI was generated by the IOMMU itself, it's not subject to remapping, see @bugref{9654#c104}. */ + if (idDevice == pThis->uPciAddress) + return VERR_IOMMU_CANNOT_CALL_SELF; + + /* Interrupts are forwarded with remapping when the IOMMU is disabled. */ + IOMMU_CTRL_T const Ctrl = iommuAmdGetCtrlUnlocked(pThis); + if (Ctrl.n.u1IommuEn) + { + STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMsiRemap)); + + int rc; +#ifdef IOMMU_WITH_IRTE_CACHE + STAM_PROFILE_ADV_START(&pThis->StatProfIrteCacheLookup, a); + rc = iommuAmdIrteCacheLookup(pDevIns, idDevice, IOMMUOP_INTR_REQ, pMsiIn, pMsiOut); + STAM_PROFILE_ADV_STOP(&pThis->StatProfIrteCacheLookup, a); + if (RT_SUCCESS(rc)) + { + STAM_COUNTER_INC(&pThis->StatIntrCacheHit); + return VINF_SUCCESS; + } + STAM_COUNTER_INC(&pThis->StatIntrCacheMiss); +#endif + + STAM_PROFILE_ADV_START(&pThis->StatProfIrteLookup, a); + rc = iommuAmdIntrTableLookup(pDevIns, idDevice, IOMMUOP_INTR_REQ, pMsiIn, pMsiOut); + STAM_PROFILE_ADV_STOP(&pThis->StatProfIrteLookup, a); + return rc; + } + + *pMsiOut = *pMsiIn; + return VINF_SUCCESS; +} + + +/** + * @callback_method_impl{FNIOMMMIONEWWRITE} + */ +static DECLCALLBACK(VBOXSTRICTRC) iommuAmdMmioWrite(PPDMDEVINS pDevIns, void *pvUser, RTGCPHYS off, void const *pv, unsigned cb) +{ + NOREF(pvUser); + Assert(cb == 4 || cb == 8); + Assert(!(off & (cb - 1))); + + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMmioWrite)); NOREF(pThis); + + uint64_t const uValue = cb == 8 ? *(uint64_t const *)pv : *(uint32_t const *)pv; + return iommuAmdRegisterWrite(pDevIns, off, cb, uValue); +} + + +/** + * @callback_method_impl{FNIOMMMIONEWREAD} + */ +static DECLCALLBACK(VBOXSTRICTRC) iommuAmdMmioRead(PPDMDEVINS pDevIns, void *pvUser, RTGCPHYS off, void *pv, unsigned cb) +{ + NOREF(pvUser); + Assert(cb == 4 || cb == 8); + Assert(!(off & (cb - 1))); + + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMmioRead)); NOREF(pThis); + + uint64_t uResult; + VBOXSTRICTRC rcStrict = iommuAmdRegisterRead(pDevIns, off, &uResult); + if (rcStrict == VINF_SUCCESS) + { + if (cb == 8) + *(uint64_t *)pv = uResult; + else + *(uint32_t *)pv = (uint32_t)uResult; + } + + return rcStrict; +} + + +#ifdef IN_RING3 +/** + * Processes an IOMMU command. + * + * @returns VBox status code. + * @param pDevIns The IOMMU device instance. + * @param pCmd The command to process. + * @param GCPhysCmd The system physical address of the command. + * @param pEvtError Where to store the error event in case of failures. + * + * @thread Command thread. + */ +static int iommuAmdR3CmdProcess(PPDMDEVINS pDevIns, PCCMD_GENERIC_T pCmd, RTGCPHYS GCPhysCmd, PEVT_GENERIC_T pEvtError) +{ + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3); + + STAM_COUNTER_INC(&pThis->StatCmd); + + uint8_t const bCmd = pCmd->n.u4Opcode; + switch (bCmd) + { + case IOMMU_CMD_COMPLETION_WAIT: + { + STAM_COUNTER_INC(&pThis->StatCmdCompWait); + + PCCMD_COMWAIT_T pCmdComWait = (PCCMD_COMWAIT_T)pCmd; + AssertCompile(sizeof(*pCmdComWait) == sizeof(*pCmd)); + + /* Validate reserved bits in the command. */ + if (!(pCmdComWait->au64[0] & ~IOMMU_CMD_COM_WAIT_QWORD_0_VALID_MASK)) + { + /* If Completion Store is requested, write the StoreData to the specified address. */ + if (pCmdComWait->n.u1Store) + { + RTGCPHYS const GCPhysStore = RT_MAKE_U64(pCmdComWait->n.u29StoreAddrLo << 3, pCmdComWait->n.u20StoreAddrHi); + uint64_t const u64Data = pCmdComWait->n.u64StoreData; + int rc = PDMDevHlpPCIPhysWrite(pDevIns, GCPhysStore, &u64Data, sizeof(u64Data)); + if (RT_FAILURE(rc)) + { + LogFunc(("Cmd(%#x): Failed to write StoreData (%#RX64) to %#RGp, rc=%Rrc\n", bCmd, u64Data, + GCPhysStore, rc)); + iommuAmdCmdHwErrorEventInit(GCPhysStore, (PEVT_CMD_HW_ERR_T)pEvtError); + return VERR_IOMMU_CMD_HW_ERROR; + } + } + + /* If the command requests an interrupt and completion wait interrupts are enabled, raise it. */ + if (pCmdComWait->n.u1Interrupt) + { + IOMMU_LOCK(pDevIns, pThisR3); + ASMAtomicOrU64(&pThis->Status.u64, IOMMU_STATUS_COMPLETION_WAIT_INTR); + bool const fRaiseInt = pThis->Ctrl.n.u1CompWaitIntrEn; + IOMMU_UNLOCK(pDevIns, pThisR3); + if (fRaiseInt) + iommuAmdMsiInterruptRaise(pDevIns); + } + return VINF_SUCCESS; + } + iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError); + return VERR_IOMMU_CMD_INVALID_FORMAT; + } + + case IOMMU_CMD_INV_DEV_TAB_ENTRY: + { + STAM_COUNTER_INC(&pThis->StatCmdInvDte); + PCCMD_INV_DTE_T pCmdInvDte = (PCCMD_INV_DTE_T)pCmd; + AssertCompile(sizeof(*pCmdInvDte) == sizeof(*pCmd)); + + /* Validate reserved bits in the command. */ + if ( !(pCmdInvDte->au64[0] & ~IOMMU_CMD_INV_DTE_QWORD_0_VALID_MASK) + && !(pCmdInvDte->au64[1] & ~IOMMU_CMD_INV_DTE_QWORD_1_VALID_MASK)) + { +#ifdef IOMMU_WITH_DTE_CACHE + iommuAmdDteCacheRemove(pDevIns, pCmdInvDte->n.u16DevId); +#endif + return VINF_SUCCESS; + } + iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError); + return VERR_IOMMU_CMD_INVALID_FORMAT; + } + + case IOMMU_CMD_INV_IOMMU_PAGES: + { + STAM_COUNTER_INC(&pThis->StatCmdInvIommuPages); + PCCMD_INV_IOMMU_PAGES_T pCmdInvPages = (PCCMD_INV_IOMMU_PAGES_T)pCmd; + AssertCompile(sizeof(*pCmdInvPages) == sizeof(*pCmd)); + + /* Validate reserved bits in the command. */ + if ( !(pCmdInvPages->au64[0] & ~IOMMU_CMD_INV_IOMMU_PAGES_QWORD_0_VALID_MASK) + && !(pCmdInvPages->au64[1] & ~IOMMU_CMD_INV_IOMMU_PAGES_QWORD_1_VALID_MASK)) + { +#ifdef IOMMU_WITH_IOTLBE_CACHE + uint64_t const uIova = RT_MAKE_U64(pCmdInvPages->n.u20AddrLo << X86_PAGE_4K_SHIFT, pCmdInvPages->n.u32AddrHi); + uint16_t const idDomain = pCmdInvPages->n.u16DomainId; + uint8_t cShift; + if (!pCmdInvPages->n.u1Size) + cShift = X86_PAGE_4K_SHIFT; + else + { + /* Find the first clear bit starting from bit 12 to 64 of the I/O virtual address. */ + unsigned const uFirstZeroBit = ASMBitLastSetU64(~(uIova >> X86_PAGE_4K_SHIFT)); + cShift = X86_PAGE_4K_SHIFT + uFirstZeroBit; + + /* + * For the address 0x7ffffffffffff000, cShift would be 76 (12+64) and the code below + * would do the right thing by clearing the entire cache for the specified domain ID. + * + * However, for the address 0xfffffffffffff000, cShift would be computed as 12. + * IOMMU behavior is undefined in this case, so it's safe to invalidate just one page. + * A debug-time assert is in place here to let us know if any software tries this. + * + * See AMD IOMMU spec. 2.4.3 "INVALIDATE_IOMMU_PAGES". + * See AMD IOMMU spec. Table 14: "Example Page Size Encodings". + */ + Assert(uIova != UINT64_C(0xfffffffffffff000)); + } + + /* + * Validate invalidation size. + * See AMD IOMMU spec. 2.2.3 "I/O Page Tables for Host Translations". + */ + if ( cShift >= 12 /* 4 KB */ + && cShift <= 51 /* 2 PB */) + { + /* Remove the range of I/O virtual addresses requesting to be invalidated. */ + size_t const cbIova = RT_BIT_64(cShift); + iommuAmdIotlbRemoveRange(pDevIns, idDomain, uIova, cbIova); + } + else + { + /* + * The guest provided size is invalid or exceeds the largest, meaningful page size. + * In such situations we must remove all ranges for the specified domain ID. + */ + iommuAmdIotlbRemoveDomainId(pDevIns, idDomain); + } +#endif + return VINF_SUCCESS; + } + iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError); + return VERR_IOMMU_CMD_INVALID_FORMAT; + } + + case IOMMU_CMD_INV_IOTLB_PAGES: + { + STAM_COUNTER_INC(&pThis->StatCmdInvIotlbPages); + + uint32_t const uCapHdr = PDMPciDevGetDWord(pDevIns->apPciDevs[0], IOMMU_PCI_OFF_CAP_HDR); + if (RT_BF_GET(uCapHdr, IOMMU_BF_CAPHDR_IOTLB_SUP)) + { + /** @todo IOMMU: Implement remote IOTLB invalidation. */ + return VERR_NOT_IMPLEMENTED; + } + iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError); + return VERR_IOMMU_CMD_NOT_SUPPORTED; + } + + case IOMMU_CMD_INV_INTR_TABLE: + { + STAM_COUNTER_INC(&pThis->StatCmdInvIntrTable); + + PCCMD_INV_INTR_TABLE_T pCmdInvIntrTable = (PCCMD_INV_INTR_TABLE_T)pCmd; + AssertCompile(sizeof(*pCmdInvIntrTable) == sizeof(*pCmd)); + + /* Validate reserved bits in the command. */ + if ( !(pCmdInvIntrTable->au64[0] & ~IOMMU_CMD_INV_INTR_TABLE_QWORD_0_VALID_MASK) + && !(pCmdInvIntrTable->au64[1] & ~IOMMU_CMD_INV_INTR_TABLE_QWORD_1_VALID_MASK)) + { +#ifdef IOMMU_WITH_IRTE_CACHE + iommuAmdIrteCacheRemove(pDevIns, pCmdInvIntrTable->u.u16DevId); +#endif + return VINF_SUCCESS; + } + iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError); + return VERR_IOMMU_CMD_INVALID_FORMAT; + } + + case IOMMU_CMD_PREFETCH_IOMMU_PAGES: + { + /* Linux doesn't use prefetching of IOMMU pages, so we don't bother for now. */ + STAM_COUNTER_INC(&pThis->StatCmdPrefIommuPages); + Assert(!pThis->ExtFeat.n.u1PrefetchSup); + iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError); + return VERR_IOMMU_CMD_NOT_SUPPORTED; + } + + case IOMMU_CMD_COMPLETE_PPR_REQ: + { + STAM_COUNTER_INC(&pThis->StatCmdCompletePprReq); + + /* We don't support PPR requests yet. */ + Assert(!pThis->ExtFeat.n.u1PprSup); + iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError); + return VERR_IOMMU_CMD_NOT_SUPPORTED; + } + + case IOMMU_CMD_INV_IOMMU_ALL: + { + STAM_COUNTER_INC(&pThis->StatCmdInvIommuAll); + if (pThis->ExtFeat.n.u1InvAllSup) + { + PCCMD_INV_IOMMU_ALL_T pCmdInvAll = (PCCMD_INV_IOMMU_ALL_T)pCmd; + AssertCompile(sizeof(*pCmdInvAll) == sizeof(*pCmd)); + + /* Validate reserved bits in the command. */ + if ( !(pCmdInvAll->au64[0] & ~IOMMU_CMD_INV_IOMMU_ALL_QWORD_0_VALID_MASK) + && !(pCmdInvAll->au64[1] & ~IOMMU_CMD_INV_IOMMU_ALL_QWORD_1_VALID_MASK)) + { +#ifdef IOMMU_WITH_DTE_CACHE + iommuAmdDteCacheRemoveAll(pDevIns); +#endif +#ifdef IOMMU_WITH_IOTLBE_CACHE + iommuAmdIotlbRemoveAll(pDevIns); +#endif + return VINF_SUCCESS; + } + iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError); + return VERR_IOMMU_CMD_INVALID_FORMAT; + } + iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError); + return VERR_IOMMU_CMD_NOT_SUPPORTED; + } + } + + STAM_COUNTER_DEC(&pThis->StatCmd); + LogFunc(("Cmd(%#x): Unrecognized\n", bCmd)); + iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError); + return VERR_IOMMU_CMD_NOT_SUPPORTED; +} + + +/** + * The IOMMU command thread. + * + * @returns VBox status code. + * @param pDevIns The IOMMU device instance. + * @param pThread The command thread. + */ +static DECLCALLBACK(int) iommuAmdR3CmdThread(PPDMDEVINS pDevIns, PPDMTHREAD pThread) +{ + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3); + + if (pThread->enmState == PDMTHREADSTATE_INITIALIZING) + return VINF_SUCCESS; + + /* + * Pre-allocate the maximum command buffer size supported by the IOMMU. + * This avoid trashing the heap as well as not wasting time allocating + * and freeing buffers while processing commands. + */ + size_t const cbMaxCmdBuf = sizeof(CMD_GENERIC_T) * iommuAmdGetBufMaxEntries(15); + void *pvCmds = RTMemAllocZ(cbMaxCmdBuf); + AssertPtrReturn(pvCmds, VERR_NO_MEMORY); + + while (pThread->enmState == PDMTHREADSTATE_RUNNING) + { + /* + * Sleep perpetually until we are woken up to process commands. + */ + bool const fSignaled = ASMAtomicXchgBool(&pThis->fCmdThreadSignaled, false); + if (!fSignaled) + { + int rc = PDMDevHlpSUPSemEventWaitNoResume(pDevIns, pThis->hEvtCmdThread, RT_INDEFINITE_WAIT); + AssertLogRelMsgReturn(RT_SUCCESS(rc) || rc == VERR_INTERRUPTED, ("%Rrc\n", rc), rc); + if (RT_UNLIKELY(pThread->enmState != PDMTHREADSTATE_RUNNING)) + break; + Log4Func(("Woken up with rc=%Rrc\n", rc)); + ASMAtomicWriteBool(&pThis->fCmdThreadSignaled, false); + } + + /* + * Fetch and process IOMMU commands. + */ + /** @todo r=ramshankar: We currently copy all commands from guest memory into a + * temporary host buffer before processing them as a batch. If we want to + * save on host memory a bit, we could (once PGM has the necessary APIs) + * lock the page mappings page mappings and access them directly. */ + IOMMU_LOCK(pDevIns, pThisR3); + + if (pThis->Status.n.u1CmdBufRunning) + { + /* Get the offsets we need to read commands from memory (circular buffer offset). */ + uint32_t const cbCmdBuf = iommuAmdGetTotalBufLength(pThis->CmdBufBaseAddr.n.u4Len); + uint32_t const offTail = pThis->CmdBufTailPtr.n.off; + uint32_t offHead = pThis->CmdBufHeadPtr.n.off; + + /* Validate. */ + Assert(!(offHead & ~IOMMU_CMD_BUF_HEAD_PTR_VALID_MASK)); + Assert(offHead < cbCmdBuf); + Assert(cbCmdBuf <= cbMaxCmdBuf); + + if (offHead != offTail) + { + /* Read the entire command buffer from memory (avoids multiple PGM calls). */ + RTGCPHYS const GCPhysCmdBufBase = pThis->CmdBufBaseAddr.n.u40Base << X86_PAGE_4K_SHIFT; + + IOMMU_UNLOCK(pDevIns, pThisR3); + int rc = PDMDevHlpPCIPhysRead(pDevIns, GCPhysCmdBufBase, pvCmds, cbCmdBuf); + IOMMU_LOCK(pDevIns, pThisR3); + + if (RT_SUCCESS(rc)) + { + /* Indicate to software we've fetched all commands from the buffer. */ + pThis->CmdBufHeadPtr.n.off = offTail; + + /* Allow IOMMU to do other work while we process commands. */ + IOMMU_UNLOCK(pDevIns, pThisR3); + + /* Process the fetched commands. */ + EVT_GENERIC_T EvtError; + do + { + PCCMD_GENERIC_T pCmd = (PCCMD_GENERIC_T)((uintptr_t)pvCmds + offHead); + rc = iommuAmdR3CmdProcess(pDevIns, pCmd, GCPhysCmdBufBase + offHead, &EvtError); + if (RT_FAILURE(rc)) + { + if ( rc == VERR_IOMMU_CMD_NOT_SUPPORTED + || rc == VERR_IOMMU_CMD_INVALID_FORMAT) + { + Assert(EvtError.n.u4EvtCode == IOMMU_EVT_ILLEGAL_CMD_ERROR); + iommuAmdIllegalCmdEventRaise(pDevIns, (PCEVT_ILLEGAL_CMD_ERR_T)&EvtError); + } + else if (rc == VERR_IOMMU_CMD_HW_ERROR) + { + Assert(EvtError.n.u4EvtCode == IOMMU_EVT_COMMAND_HW_ERROR); + LogFunc(("Raising command hardware error. Cmd=%#x -> COMMAND_HW_ERROR\n", pCmd->n.u4Opcode)); + iommuAmdCmdHwErrorEventRaise(pDevIns, (PCEVT_CMD_HW_ERR_T)&EvtError); + } + break; + } + + /* Move to the next command in the circular buffer. */ + offHead = (offHead + sizeof(CMD_GENERIC_T)) % cbCmdBuf; + } while (offHead != offTail); + } + else + { + LogFunc(("Failed to read command at %#RGp. rc=%Rrc -> COMMAND_HW_ERROR\n", GCPhysCmdBufBase, rc)); + EVT_CMD_HW_ERR_T EvtCmdHwErr; + iommuAmdCmdHwErrorEventInit(GCPhysCmdBufBase, &EvtCmdHwErr); + iommuAmdCmdHwErrorEventRaise(pDevIns, &EvtCmdHwErr); + + IOMMU_UNLOCK(pDevIns, pThisR3); + } + } + else + IOMMU_UNLOCK(pDevIns, pThisR3); + } + else + IOMMU_UNLOCK(pDevIns, pThisR3); + } + + RTMemFree(pvCmds); + LogFlowFunc(("Command thread terminating\n")); + return VINF_SUCCESS; +} + + +/** + * Wakes up the command thread so it can respond to a state change. + * + * @returns VBox status code. + * @param pDevIns The IOMMU device instance. + * @param pThread The command thread. + */ +static DECLCALLBACK(int) iommuAmdR3CmdThreadWakeUp(PPDMDEVINS pDevIns, PPDMTHREAD pThread) +{ + RT_NOREF(pThread); + Log4Func(("\n")); + PCIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + return PDMDevHlpSUPSemEventSignal(pDevIns, pThis->hEvtCmdThread); +} + + +/** + * @callback_method_impl{FNPCICONFIGREAD} + */ +static DECLCALLBACK(VBOXSTRICTRC) iommuAmdR3PciConfigRead(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, uint32_t uAddress, + unsigned cb, uint32_t *pu32Value) +{ + /** @todo IOMMU: PCI config read stat counter. */ + VBOXSTRICTRC rcStrict = PDMDevHlpPCIConfigRead(pDevIns, pPciDev, uAddress, cb, pu32Value); + Log3Func(("uAddress=%#x (cb=%u) -> %#x. rc=%Rrc\n", uAddress, cb, *pu32Value, VBOXSTRICTRC_VAL(rcStrict))); + return rcStrict; +} + + +/** + * Sets up the IOMMU MMIO region (usually in response to an IOMMU base address + * register write). + * + * @returns VBox status code. + * @param pDevIns The IOMMU instance data. + * + * @remarks Call this function only when the IOMMU BAR is enabled. + */ +static int iommuAmdR3MmioSetup(PPDMDEVINS pDevIns) +{ + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + Assert(pThis->IommuBar.n.u1Enable); + Assert(pThis->hMmio != NIL_IOMMMIOHANDLE); /* Paranoia. Ensure we have a valid IOM MMIO handle. */ + Assert(!pThis->ExtFeat.n.u1PerfCounterSup); /* Base is 16K aligned when performance counters aren't supported. */ + RTGCPHYS const GCPhysMmioBase = RT_MAKE_U64(pThis->IommuBar.au32[0] & 0xffffc000, pThis->IommuBar.au32[1]); + RTGCPHYS const GCPhysMmioBasePrev = PDMDevHlpMmioGetMappingAddress(pDevIns, pThis->hMmio); + + /* If the MMIO region is already mapped at the specified address, we're done. */ + Assert(GCPhysMmioBase != NIL_RTGCPHYS); + if (GCPhysMmioBasePrev == GCPhysMmioBase) + return VINF_SUCCESS; + + /* Unmap the previous MMIO region (which is at a different address). */ + if (GCPhysMmioBasePrev != NIL_RTGCPHYS) + { + LogFlowFunc(("Unmapping previous MMIO region at %#RGp\n", GCPhysMmioBasePrev)); + int rc = PDMDevHlpMmioUnmap(pDevIns, pThis->hMmio); + if (RT_FAILURE(rc)) + { + LogFunc(("Failed to unmap MMIO region at %#RGp. rc=%Rrc\n", GCPhysMmioBasePrev, rc)); + return rc; + } + } + + /* Map the newly specified MMIO region. */ + LogFlowFunc(("Mapping MMIO region at %#RGp\n", GCPhysMmioBase)); + int rc = PDMDevHlpMmioMap(pDevIns, pThis->hMmio, GCPhysMmioBase); + if (RT_FAILURE(rc)) + { + LogFunc(("Failed to unmap MMIO region at %#RGp. rc=%Rrc\n", GCPhysMmioBase, rc)); + return rc; + } + + return VINF_SUCCESS; +} + + +/** + * @callback_method_impl{FNPCICONFIGWRITE} + */ +static DECLCALLBACK(VBOXSTRICTRC) iommuAmdR3PciConfigWrite(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, uint32_t uAddress, + unsigned cb, uint32_t u32Value) +{ + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + + /* + * Discard writes to read-only registers that are specific to the IOMMU. + * Other common PCI registers are handled by the generic code, see devpciR3IsConfigByteWritable(). + * See PCI spec. 6.1. "Configuration Space Organization". + */ + switch (uAddress) + { + case IOMMU_PCI_OFF_CAP_HDR: /* All bits are read-only. */ + case IOMMU_PCI_OFF_RANGE_REG: /* We don't have any devices integrated with the IOMMU. */ + case IOMMU_PCI_OFF_MISCINFO_REG_0: /* We don't support MSI-X. */ + case IOMMU_PCI_OFF_MISCINFO_REG_1: /* We don't support guest-address translation. */ + { + LogFunc(("PCI config write (%#RX32) to read-only register %#x -> Ignored\n", u32Value, uAddress)); + return VINF_SUCCESS; + } + } + + PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3); + IOMMU_LOCK(pDevIns, pThisR3); + + VBOXSTRICTRC rcStrict; + switch (uAddress) + { + case IOMMU_PCI_OFF_BASE_ADDR_REG_LO: + { + if (!pThis->IommuBar.n.u1Enable) + { + pThis->IommuBar.au32[0] = u32Value & IOMMU_BAR_VALID_MASK; + if (pThis->IommuBar.n.u1Enable) + rcStrict = iommuAmdR3MmioSetup(pDevIns); + else + rcStrict = VINF_SUCCESS; + } + else + { + LogFunc(("Writing Base Address (Lo) when it's already enabled -> Ignored\n")); + rcStrict = VINF_SUCCESS; + } + break; + } + + case IOMMU_PCI_OFF_BASE_ADDR_REG_HI: + { + if (!pThis->IommuBar.n.u1Enable) + { + AssertCompile((IOMMU_BAR_VALID_MASK >> 32) == 0xffffffff); + pThis->IommuBar.au32[1] = u32Value; + } + else + LogFunc(("Writing Base Address (Hi) when it's already enabled -> Ignored\n")); + rcStrict = VINF_SUCCESS; + break; + } + + case IOMMU_PCI_OFF_MSI_CAP_HDR: + { + u32Value |= RT_BIT(23); /* 64-bit MSI addressess must always be enabled for IOMMU. */ + RT_FALL_THRU(); + } + default: + { + rcStrict = PDMDevHlpPCIConfigWrite(pDevIns, pPciDev, uAddress, cb, u32Value); + break; + } + } + + IOMMU_UNLOCK(pDevIns, pThisR3); + + Log3Func(("uAddress=%#x (cb=%u) with %#x. rc=%Rrc\n", uAddress, cb, u32Value, VBOXSTRICTRC_VAL(rcStrict))); + return rcStrict; +} + + +/** + * @callback_method_impl{FNDBGFHANDLERDEV} + */ +static DECLCALLBACK(void) iommuAmdR3DbgInfo(PPDMDEVINS pDevIns, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + PCIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + PCPDMPCIDEV pPciDev = pDevIns->apPciDevs[0]; + PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev); + + bool const fVerbose = RTStrCmp(pszArgs, "verbose") == 0; + + pHlp->pfnPrintf(pHlp, "AMD-IOMMU:\n"); + /* Device Table Base Addresses (all segments). */ + for (unsigned i = 0; i < RT_ELEMENTS(pThis->aDevTabBaseAddrs); i++) + { + DEV_TAB_BAR_T const DevTabBar = pThis->aDevTabBaseAddrs[i]; + pHlp->pfnPrintf(pHlp, " Device Table BAR %u = %#RX64\n", i, DevTabBar.u64); + if (fVerbose) + { + pHlp->pfnPrintf(pHlp, " Size = %#x (%u bytes)\n", DevTabBar.n.u9Size, + IOMMU_GET_DEV_TAB_LEN(&DevTabBar)); + pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n", + DevTabBar.n.u40Base << X86_PAGE_4K_SHIFT); + } + } + /* Command Buffer Base Address Register. */ + { + CMD_BUF_BAR_T const CmdBufBar = pThis->CmdBufBaseAddr; + uint8_t const uEncodedLen = CmdBufBar.n.u4Len; + uint32_t const cEntries = iommuAmdGetBufMaxEntries(uEncodedLen); + uint32_t const cbBuffer = iommuAmdGetTotalBufLength(uEncodedLen); + pHlp->pfnPrintf(pHlp, " Command Buffer BAR = %#RX64\n", CmdBufBar.u64); + if (fVerbose) + { + pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n", + CmdBufBar.n.u40Base << X86_PAGE_4K_SHIFT); + pHlp->pfnPrintf(pHlp, " Length = %u (%u entries, %u bytes)\n", uEncodedLen, + cEntries, cbBuffer); + } + } + /* Event Log Base Address Register. */ + { + EVT_LOG_BAR_T const EvtLogBar = pThis->EvtLogBaseAddr; + uint8_t const uEncodedLen = EvtLogBar.n.u4Len; + uint32_t const cEntries = iommuAmdGetBufMaxEntries(uEncodedLen); + uint32_t const cbBuffer = iommuAmdGetTotalBufLength(uEncodedLen); + pHlp->pfnPrintf(pHlp, " Event Log BAR = %#RX64\n", EvtLogBar.u64); + if (fVerbose) + { + pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n", + EvtLogBar.n.u40Base << X86_PAGE_4K_SHIFT); + pHlp->pfnPrintf(pHlp, " Length = %u (%u entries, %u bytes)\n", uEncodedLen, + cEntries, cbBuffer); + } + } + /* IOMMU Control Register. */ + { + IOMMU_CTRL_T const Ctrl = pThis->Ctrl; + pHlp->pfnPrintf(pHlp, " Control = %#RX64\n", Ctrl.u64); + if (fVerbose) + { + pHlp->pfnPrintf(pHlp, " IOMMU enable = %RTbool\n", Ctrl.n.u1IommuEn); + pHlp->pfnPrintf(pHlp, " HT Tunnel translation enable = %RTbool\n", Ctrl.n.u1HtTunEn); + pHlp->pfnPrintf(pHlp, " Event log enable = %RTbool\n", Ctrl.n.u1EvtLogEn); + pHlp->pfnPrintf(pHlp, " Event log interrupt enable = %RTbool\n", Ctrl.n.u1EvtIntrEn); + pHlp->pfnPrintf(pHlp, " Completion wait interrupt enable = %RTbool\n", Ctrl.n.u1EvtIntrEn); + pHlp->pfnPrintf(pHlp, " Invalidation timeout = %u\n", Ctrl.n.u3InvTimeOut); + pHlp->pfnPrintf(pHlp, " Pass posted write = %RTbool\n", Ctrl.n.u1PassPW); + pHlp->pfnPrintf(pHlp, " Respose Pass posted write = %RTbool\n", Ctrl.n.u1ResPassPW); + pHlp->pfnPrintf(pHlp, " Coherent = %RTbool\n", Ctrl.n.u1Coherent); + pHlp->pfnPrintf(pHlp, " Isochronous = %RTbool\n", Ctrl.n.u1Isoc); + pHlp->pfnPrintf(pHlp, " Command buffer enable = %RTbool\n", Ctrl.n.u1CmdBufEn); + pHlp->pfnPrintf(pHlp, " PPR log enable = %RTbool\n", Ctrl.n.u1PprLogEn); + pHlp->pfnPrintf(pHlp, " PPR interrupt enable = %RTbool\n", Ctrl.n.u1PprIntrEn); + pHlp->pfnPrintf(pHlp, " PPR enable = %RTbool\n", Ctrl.n.u1PprEn); + pHlp->pfnPrintf(pHlp, " Guest translation eanble = %RTbool\n", Ctrl.n.u1GstTranslateEn); + pHlp->pfnPrintf(pHlp, " Guest virtual-APIC enable = %RTbool\n", Ctrl.n.u1GstVirtApicEn); + pHlp->pfnPrintf(pHlp, " CRW = %#x\n", Ctrl.n.u4Crw); + pHlp->pfnPrintf(pHlp, " SMI filter enable = %RTbool\n", Ctrl.n.u1SmiFilterEn); + pHlp->pfnPrintf(pHlp, " Self-writeback disable = %RTbool\n", Ctrl.n.u1SelfWriteBackDis); + pHlp->pfnPrintf(pHlp, " SMI filter log enable = %RTbool\n", Ctrl.n.u1SmiFilterLogEn); + pHlp->pfnPrintf(pHlp, " Guest virtual-APIC mode enable = %#x\n", Ctrl.n.u3GstVirtApicModeEn); + pHlp->pfnPrintf(pHlp, " Guest virtual-APIC GA log enable = %RTbool\n", Ctrl.n.u1GstLogEn); + pHlp->pfnPrintf(pHlp, " Guest virtual-APIC interrupt enable = %RTbool\n", Ctrl.n.u1GstIntrEn); + pHlp->pfnPrintf(pHlp, " Dual PPR log enable = %#x\n", Ctrl.n.u2DualPprLogEn); + pHlp->pfnPrintf(pHlp, " Dual event log enable = %#x\n", Ctrl.n.u2DualEvtLogEn); + pHlp->pfnPrintf(pHlp, " Device table segmentation enable = %#x\n", Ctrl.n.u3DevTabSegEn); + pHlp->pfnPrintf(pHlp, " Privilege abort enable = %#x\n", Ctrl.n.u2PrivAbortEn); + pHlp->pfnPrintf(pHlp, " PPR auto response enable = %RTbool\n", Ctrl.n.u1PprAutoRespEn); + pHlp->pfnPrintf(pHlp, " MARC enable = %RTbool\n", Ctrl.n.u1MarcEn); + pHlp->pfnPrintf(pHlp, " Block StopMark enable = %RTbool\n", Ctrl.n.u1BlockStopMarkEn); + pHlp->pfnPrintf(pHlp, " PPR auto response always-on enable = %RTbool\n", Ctrl.n.u1PprAutoRespAlwaysOnEn); + pHlp->pfnPrintf(pHlp, " Domain IDPNE = %RTbool\n", Ctrl.n.u1DomainIDPNE); + pHlp->pfnPrintf(pHlp, " Enhanced PPR handling = %RTbool\n", Ctrl.n.u1EnhancedPpr); + pHlp->pfnPrintf(pHlp, " Host page table access/dirty bit update = %#x\n", Ctrl.n.u2HstAccDirtyBitUpdate); + pHlp->pfnPrintf(pHlp, " Guest page table dirty bit disable = %RTbool\n", Ctrl.n.u1GstDirtyUpdateDis); + pHlp->pfnPrintf(pHlp, " x2APIC enable = %RTbool\n", Ctrl.n.u1X2ApicEn); + pHlp->pfnPrintf(pHlp, " x2APIC interrupt enable = %RTbool\n", Ctrl.n.u1X2ApicIntrGenEn); + pHlp->pfnPrintf(pHlp, " Guest page table access bit update = %RTbool\n", Ctrl.n.u1GstAccessUpdateDis); + } + } + /* Exclusion Base Address Register. */ + { + IOMMU_EXCL_RANGE_BAR_T const ExclRangeBar = pThis->ExclRangeBaseAddr; + pHlp->pfnPrintf(pHlp, " Exclusion BAR = %#RX64\n", ExclRangeBar.u64); + if (fVerbose) + { + pHlp->pfnPrintf(pHlp, " Exclusion enable = %RTbool\n", ExclRangeBar.n.u1ExclEnable); + pHlp->pfnPrintf(pHlp, " Allow all devices = %RTbool\n", ExclRangeBar.n.u1AllowAll); + pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n", + ExclRangeBar.n.u40ExclRangeBase << X86_PAGE_4K_SHIFT); + } + } + /* Exclusion Range Limit Register. */ + { + IOMMU_EXCL_RANGE_LIMIT_T const ExclRangeLimit = pThis->ExclRangeLimit; + pHlp->pfnPrintf(pHlp, " Exclusion Range Limit = %#RX64\n", ExclRangeLimit.u64); + if (fVerbose) + { + pHlp->pfnPrintf(pHlp, " Range limit = %#RX64\n", + (ExclRangeLimit.n.u40ExclRangeLimit << X86_PAGE_4K_SHIFT) | X86_PAGE_4K_OFFSET_MASK); + } + } + /* Extended Feature Register. */ + { + IOMMU_EXT_FEAT_T ExtFeat = pThis->ExtFeat; + pHlp->pfnPrintf(pHlp, " Extended Feature Register = %#RX64\n", ExtFeat.u64); + if (fVerbose) + { + pHlp->pfnPrintf(pHlp, " Prefetch support = %RTbool\n", ExtFeat.n.u1PrefetchSup); + pHlp->pfnPrintf(pHlp, " PPR support = %RTbool\n", ExtFeat.n.u1PprSup); + pHlp->pfnPrintf(pHlp, " x2APIC support = %RTbool\n", ExtFeat.n.u1X2ApicSup); + pHlp->pfnPrintf(pHlp, " NX and privilege level support = %RTbool\n", ExtFeat.n.u1NoExecuteSup); + pHlp->pfnPrintf(pHlp, " Guest translation support = %RTbool\n", ExtFeat.n.u1GstTranslateSup); + pHlp->pfnPrintf(pHlp, " Invalidate-All command support = %RTbool\n", ExtFeat.n.u1InvAllSup); + pHlp->pfnPrintf(pHlp, " Guest virtual-APIC support = %RTbool\n", ExtFeat.n.u1GstVirtApicSup); + pHlp->pfnPrintf(pHlp, " Hardware error register support = %RTbool\n", ExtFeat.n.u1HwErrorSup); + pHlp->pfnPrintf(pHlp, " Performance counters support = %RTbool\n", ExtFeat.n.u1PerfCounterSup); + pHlp->pfnPrintf(pHlp, " Host address translation size = %#x\n", ExtFeat.n.u2HostAddrTranslateSize); + pHlp->pfnPrintf(pHlp, " Guest address translation size = %#x\n", ExtFeat.n.u2GstAddrTranslateSize); + pHlp->pfnPrintf(pHlp, " Guest CR3 root table level support = %#x\n", ExtFeat.n.u2GstCr3RootTblLevel); + pHlp->pfnPrintf(pHlp, " SMI filter register support = %#x\n", ExtFeat.n.u2SmiFilterSup); + pHlp->pfnPrintf(pHlp, " SMI filter register count = %#x\n", ExtFeat.n.u3SmiFilterCount); + pHlp->pfnPrintf(pHlp, " Guest virtual-APIC modes support = %#x\n", ExtFeat.n.u3GstVirtApicModeSup); + pHlp->pfnPrintf(pHlp, " Dual PPR log support = %#x\n", ExtFeat.n.u2DualPprLogSup); + pHlp->pfnPrintf(pHlp, " Dual event log support = %#x\n", ExtFeat.n.u2DualEvtLogSup); + pHlp->pfnPrintf(pHlp, " Maximum PASID = %#x\n", ExtFeat.n.u5MaxPasidSup); + pHlp->pfnPrintf(pHlp, " User/supervisor page protection support = %RTbool\n", ExtFeat.n.u1UserSupervisorSup); + pHlp->pfnPrintf(pHlp, " Device table segments supported = %#x (%u)\n", ExtFeat.n.u2DevTabSegSup, + g_acDevTabSegs[ExtFeat.n.u2DevTabSegSup]); + pHlp->pfnPrintf(pHlp, " PPR log overflow early warning support = %RTbool\n", ExtFeat.n.u1PprLogOverflowWarn); + pHlp->pfnPrintf(pHlp, " PPR auto response support = %RTbool\n", ExtFeat.n.u1PprAutoRespSup); + pHlp->pfnPrintf(pHlp, " MARC support = %#x\n", ExtFeat.n.u2MarcSup); + pHlp->pfnPrintf(pHlp, " Block StopMark message support = %RTbool\n", ExtFeat.n.u1BlockStopMarkSup); + pHlp->pfnPrintf(pHlp, " Performance optimization support = %RTbool\n", ExtFeat.n.u1PerfOptSup); + pHlp->pfnPrintf(pHlp, " MSI capability MMIO access support = %RTbool\n", ExtFeat.n.u1MsiCapMmioSup); + pHlp->pfnPrintf(pHlp, " Guest I/O protection support = %RTbool\n", ExtFeat.n.u1GstIoSup); + pHlp->pfnPrintf(pHlp, " Host access support = %RTbool\n", ExtFeat.n.u1HostAccessSup); + pHlp->pfnPrintf(pHlp, " Enhanced PPR handling support = %RTbool\n", ExtFeat.n.u1EnhancedPprSup); + pHlp->pfnPrintf(pHlp, " Attribute forward supported = %RTbool\n", ExtFeat.n.u1AttrForwardSup); + pHlp->pfnPrintf(pHlp, " Host dirty support = %RTbool\n", ExtFeat.n.u1HostDirtySup); + pHlp->pfnPrintf(pHlp, " Invalidate IOTLB type support = %RTbool\n", ExtFeat.n.u1InvIoTlbTypeSup); + pHlp->pfnPrintf(pHlp, " Guest page table access bit hw disable = %RTbool\n", ExtFeat.n.u1GstUpdateDisSup); + pHlp->pfnPrintf(pHlp, " Force physical dest for remapped intr. = %RTbool\n", ExtFeat.n.u1ForcePhysDstSup); + } + } + /* PPR Log Base Address Register. */ + { + PPR_LOG_BAR_T PprLogBar = pThis->PprLogBaseAddr; + uint8_t const uEncodedLen = PprLogBar.n.u4Len; + uint32_t const cEntries = iommuAmdGetBufMaxEntries(uEncodedLen); + uint32_t const cbBuffer = iommuAmdGetTotalBufLength(uEncodedLen); + pHlp->pfnPrintf(pHlp, " PPR Log BAR = %#RX64\n", PprLogBar.u64); + if (fVerbose) + { + pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n", + PprLogBar.n.u40Base << X86_PAGE_4K_SHIFT); + pHlp->pfnPrintf(pHlp, " Length = %u (%u entries, %u bytes)\n", uEncodedLen, + cEntries, cbBuffer); + } + } + /* Hardware Event (Hi) Register. */ + { + IOMMU_HW_EVT_HI_T HwEvtHi = pThis->HwEvtHi; + pHlp->pfnPrintf(pHlp, " Hardware Event (Hi) = %#RX64\n", HwEvtHi.u64); + if (fVerbose) + { + pHlp->pfnPrintf(pHlp, " First operand = %#RX64\n", HwEvtHi.n.u60FirstOperand); + pHlp->pfnPrintf(pHlp, " Event code = %#RX8\n", HwEvtHi.n.u4EvtCode); + } + } + /* Hardware Event (Lo) Register. */ + pHlp->pfnPrintf(pHlp, " Hardware Event (Lo) = %#RX64\n", pThis->HwEvtLo); + /* Hardware Event Status. */ + { + IOMMU_HW_EVT_STATUS_T HwEvtStatus = pThis->HwEvtStatus; + pHlp->pfnPrintf(pHlp, " Hardware Event Status = %#RX64\n", HwEvtStatus.u64); + if (fVerbose) + { + pHlp->pfnPrintf(pHlp, " Valid = %RTbool\n", HwEvtStatus.n.u1Valid); + pHlp->pfnPrintf(pHlp, " Overflow = %RTbool\n", HwEvtStatus.n.u1Overflow); + } + } + /* Guest Virtual-APIC Log Base Address Register. */ + { + GALOG_BAR_T const GALogBar = pThis->GALogBaseAddr; + uint8_t const uEncodedLen = GALogBar.n.u4Len; + uint32_t const cEntries = iommuAmdGetBufMaxEntries(uEncodedLen); + uint32_t const cbBuffer = iommuAmdGetTotalBufLength(uEncodedLen); + pHlp->pfnPrintf(pHlp, " Guest Log BAR = %#RX64\n", GALogBar.u64); + if (fVerbose) + { + pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n", + GALogBar.n.u40Base << X86_PAGE_4K_SHIFT); + pHlp->pfnPrintf(pHlp, " Length = %u (%u entries, %u bytes)\n", uEncodedLen, + cEntries, cbBuffer); + } + } + /* Guest Virtual-APIC Log Tail Address Register. */ + { + GALOG_TAIL_ADDR_T GALogTail = pThis->GALogTailAddr; + pHlp->pfnPrintf(pHlp, " Guest Log Tail Address = %#RX64\n", GALogTail.u64); + if (fVerbose) + pHlp->pfnPrintf(pHlp, " Tail address = %#RX64\n", GALogTail.n.u40GALogTailAddr); + } + /* PPR Log B Base Address Register. */ + { + PPR_LOG_B_BAR_T PprLogBBar = pThis->PprLogBBaseAddr; + uint8_t const uEncodedLen = PprLogBBar.n.u4Len; + uint32_t const cEntries = iommuAmdGetBufMaxEntries(uEncodedLen); + uint32_t const cbBuffer = iommuAmdGetTotalBufLength(uEncodedLen); + pHlp->pfnPrintf(pHlp, " PPR Log B BAR = %#RX64\n", PprLogBBar.u64); + if (fVerbose) + { + pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n", + PprLogBBar.n.u40Base << X86_PAGE_4K_SHIFT); + pHlp->pfnPrintf(pHlp, " Length = %u (%u entries, %u bytes)\n", uEncodedLen, + cEntries, cbBuffer); + } + } + /* Event Log B Base Address Register. */ + { + EVT_LOG_B_BAR_T EvtLogBBar = pThis->EvtLogBBaseAddr; + uint8_t const uEncodedLen = EvtLogBBar.n.u4Len; + uint32_t const cEntries = iommuAmdGetBufMaxEntries(uEncodedLen); + uint32_t const cbBuffer = iommuAmdGetTotalBufLength(uEncodedLen); + pHlp->pfnPrintf(pHlp, " Event Log B BAR = %#RX64\n", EvtLogBBar.u64); + if (fVerbose) + { + pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n", + EvtLogBBar.n.u40Base << X86_PAGE_4K_SHIFT); + pHlp->pfnPrintf(pHlp, " Length = %u (%u entries, %u bytes)\n", uEncodedLen, + cEntries, cbBuffer); + } + } + /* Device-Specific Feature Extension Register. */ + { + DEV_SPECIFIC_FEAT_T const DevSpecificFeat = pThis->DevSpecificFeat; + pHlp->pfnPrintf(pHlp, " Device-specific Feature = %#RX64\n", DevSpecificFeat.u64); + if (fVerbose) + { + pHlp->pfnPrintf(pHlp, " Feature = %#RX32\n", DevSpecificFeat.n.u24DevSpecFeat); + pHlp->pfnPrintf(pHlp, " Minor revision ID = %#x\n", DevSpecificFeat.n.u4RevMinor); + pHlp->pfnPrintf(pHlp, " Major revision ID = %#x\n", DevSpecificFeat.n.u4RevMajor); + } + } + /* Device-Specific Control Extension Register. */ + { + DEV_SPECIFIC_CTRL_T const DevSpecificCtrl = pThis->DevSpecificCtrl; + pHlp->pfnPrintf(pHlp, " Device-specific Control = %#RX64\n", DevSpecificCtrl.u64); + if (fVerbose) + { + pHlp->pfnPrintf(pHlp, " Control = %#RX32\n", DevSpecificCtrl.n.u24DevSpecCtrl); + pHlp->pfnPrintf(pHlp, " Minor revision ID = %#x\n", DevSpecificCtrl.n.u4RevMinor); + pHlp->pfnPrintf(pHlp, " Major revision ID = %#x\n", DevSpecificCtrl.n.u4RevMajor); + } + } + /* Device-Specific Status Extension Register. */ + { + DEV_SPECIFIC_STATUS_T const DevSpecificStatus = pThis->DevSpecificStatus; + pHlp->pfnPrintf(pHlp, " Device-specific Status = %#RX64\n", DevSpecificStatus.u64); + if (fVerbose) + { + pHlp->pfnPrintf(pHlp, " Status = %#RX32\n", DevSpecificStatus.n.u24DevSpecStatus); + pHlp->pfnPrintf(pHlp, " Minor revision ID = %#x\n", DevSpecificStatus.n.u4RevMinor); + pHlp->pfnPrintf(pHlp, " Major revision ID = %#x\n", DevSpecificStatus.n.u4RevMajor); + } + } + /* Miscellaneous Information Register (Lo and Hi). */ + { + MSI_MISC_INFO_T const MiscInfo = pThis->MiscInfo; + pHlp->pfnPrintf(pHlp, " Misc. Info. Register = %#RX64\n", MiscInfo.u64); + if (fVerbose) + { + pHlp->pfnPrintf(pHlp, " Event Log MSI number = %#x\n", MiscInfo.n.u5MsiNumEvtLog); + pHlp->pfnPrintf(pHlp, " Guest Virtual-Address Size = %#x\n", MiscInfo.n.u3GstVirtAddrSize); + pHlp->pfnPrintf(pHlp, " Physical Address Size = %#x\n", MiscInfo.n.u7PhysAddrSize); + pHlp->pfnPrintf(pHlp, " Virtual-Address Size = %#x\n", MiscInfo.n.u7VirtAddrSize); + pHlp->pfnPrintf(pHlp, " HT Transport ATS Range Reserved = %RTbool\n", MiscInfo.n.u1HtAtsResv); + pHlp->pfnPrintf(pHlp, " PPR MSI number = %#x\n", MiscInfo.n.u5MsiNumPpr); + pHlp->pfnPrintf(pHlp, " GA Log MSI number = %#x\n", MiscInfo.n.u5MsiNumGa); + } + } + /* MSI Capability Header. */ + { + MSI_CAP_HDR_T MsiCapHdr; + MsiCapHdr.u32 = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_CAP_HDR); + pHlp->pfnPrintf(pHlp, " MSI Capability Header = %#RX32\n", MsiCapHdr.u32); + if (fVerbose) + { + pHlp->pfnPrintf(pHlp, " Capability ID = %#x\n", MsiCapHdr.n.u8MsiCapId); + pHlp->pfnPrintf(pHlp, " Capability Ptr (PCI config offset) = %#x\n", MsiCapHdr.n.u8MsiCapPtr); + pHlp->pfnPrintf(pHlp, " Enable = %RTbool\n", MsiCapHdr.n.u1MsiEnable); + pHlp->pfnPrintf(pHlp, " Multi-message capability = %#x\n", MsiCapHdr.n.u3MsiMultiMessCap); + pHlp->pfnPrintf(pHlp, " Multi-message enable = %#x\n", MsiCapHdr.n.u3MsiMultiMessEn); + } + } + /* MSI Address Register (Lo and Hi). */ + { + uint32_t const uMsiAddrLo = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_LO); + uint32_t const uMsiAddrHi = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_HI); + MSIADDR MsiAddr; + MsiAddr.u64 = RT_MAKE_U64(uMsiAddrLo, uMsiAddrHi); + pHlp->pfnPrintf(pHlp, " MSI Address = %#RX64\n", MsiAddr.u64); + if (fVerbose) + { + pHlp->pfnPrintf(pHlp, " Destination mode = %#x\n", MsiAddr.n.u1DestMode); + pHlp->pfnPrintf(pHlp, " Redirection hint = %#x\n", MsiAddr.n.u1RedirHint); + pHlp->pfnPrintf(pHlp, " Destination Id = %#x\n", MsiAddr.n.u8DestId); + pHlp->pfnPrintf(pHlp, " Address = %#RX32\n", MsiAddr.n.u12Addr); + pHlp->pfnPrintf(pHlp, " Address (Hi) / Rsvd? = %#RX32\n", MsiAddr.n.u32Rsvd0); + } + } + /* MSI Data. */ + { + MSIDATA MsiData; + MsiData.u32 = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_DATA); + pHlp->pfnPrintf(pHlp, " MSI Data = %#RX32\n", MsiData.u32); + if (fVerbose) + { + pHlp->pfnPrintf(pHlp, " Vector = %#x (%u)\n", MsiData.n.u8Vector, + MsiData.n.u8Vector); + pHlp->pfnPrintf(pHlp, " Delivery mode = %#x\n", MsiData.n.u3DeliveryMode); + pHlp->pfnPrintf(pHlp, " Level = %#x\n", MsiData.n.u1Level); + pHlp->pfnPrintf(pHlp, " Trigger mode = %s\n", MsiData.n.u1TriggerMode ? + "level" : "edge"); + } + } + /* MSI Mapping Capability Header (HyperTransport, reporting all 0s currently). */ + { + MSI_MAP_CAP_HDR_T MsiMapCapHdr; + MsiMapCapHdr.u32 = 0; + pHlp->pfnPrintf(pHlp, " MSI Mapping Capability Header = %#RX32\n", MsiMapCapHdr.u32); + if (fVerbose) + { + pHlp->pfnPrintf(pHlp, " Capability ID = %#x\n", MsiMapCapHdr.n.u8MsiMapCapId); + pHlp->pfnPrintf(pHlp, " Map enable = %RTbool\n", MsiMapCapHdr.n.u1MsiMapEn); + pHlp->pfnPrintf(pHlp, " Map fixed = %RTbool\n", MsiMapCapHdr.n.u1MsiMapFixed); + pHlp->pfnPrintf(pHlp, " Map capability type = %#x\n", MsiMapCapHdr.n.u5MapCapType); + } + } + /* Performance Optimization Control Register. */ + { + IOMMU_PERF_OPT_CTRL_T const PerfOptCtrl = pThis->PerfOptCtrl; + pHlp->pfnPrintf(pHlp, " Performance Optimization Control = %#RX32\n", PerfOptCtrl.u32); + if (fVerbose) + pHlp->pfnPrintf(pHlp, " Enable = %RTbool\n", PerfOptCtrl.n.u1PerfOptEn); + } + /* XT (x2APIC) General Interrupt Control Register. */ + { + IOMMU_XT_GEN_INTR_CTRL_T const XtGenIntrCtrl = pThis->XtGenIntrCtrl; + pHlp->pfnPrintf(pHlp, " XT General Interrupt Control = %#RX64\n", XtGenIntrCtrl.u64); + if (fVerbose) + { + pHlp->pfnPrintf(pHlp, " Interrupt destination mode = %s\n", + !XtGenIntrCtrl.n.u1X2ApicIntrDstMode ? "physical" : "logical"); + pHlp->pfnPrintf(pHlp, " Interrupt destination = %#RX64\n", + RT_MAKE_U64(XtGenIntrCtrl.n.u24X2ApicIntrDstLo, XtGenIntrCtrl.n.u7X2ApicIntrDstHi)); + pHlp->pfnPrintf(pHlp, " Interrupt vector = %#x\n", XtGenIntrCtrl.n.u8X2ApicIntrVector); + pHlp->pfnPrintf(pHlp, " Interrupt delivery mode = %s\n", + !XtGenIntrCtrl.n.u8X2ApicIntrVector ? "fixed" : "arbitrated"); + } + } + /* XT (x2APIC) PPR Interrupt Control Register. */ + { + IOMMU_XT_PPR_INTR_CTRL_T const XtPprIntrCtrl = pThis->XtPprIntrCtrl; + pHlp->pfnPrintf(pHlp, " XT PPR Interrupt Control = %#RX64\n", XtPprIntrCtrl.u64); + if (fVerbose) + { + pHlp->pfnPrintf(pHlp, " Interrupt destination mode = %s\n", + !XtPprIntrCtrl.n.u1X2ApicIntrDstMode ? "physical" : "logical"); + pHlp->pfnPrintf(pHlp, " Interrupt destination = %#RX64\n", + RT_MAKE_U64(XtPprIntrCtrl.n.u24X2ApicIntrDstLo, XtPprIntrCtrl.n.u7X2ApicIntrDstHi)); + pHlp->pfnPrintf(pHlp, " Interrupt vector = %#x\n", XtPprIntrCtrl.n.u8X2ApicIntrVector); + pHlp->pfnPrintf(pHlp, " Interrupt delivery mode = %s\n", + !XtPprIntrCtrl.n.u8X2ApicIntrVector ? "fixed" : "arbitrated"); + } + } + /* XT (X2APIC) GA Log Interrupt Control Register. */ + { + IOMMU_XT_GALOG_INTR_CTRL_T const XtGALogIntrCtrl = pThis->XtGALogIntrCtrl; + pHlp->pfnPrintf(pHlp, " XT PPR Interrupt Control = %#RX64\n", XtGALogIntrCtrl.u64); + if (fVerbose) + { + pHlp->pfnPrintf(pHlp, " Interrupt destination mode = %s\n", + !XtGALogIntrCtrl.n.u1X2ApicIntrDstMode ? "physical" : "logical"); + pHlp->pfnPrintf(pHlp, " Interrupt destination = %#RX64\n", + RT_MAKE_U64(XtGALogIntrCtrl.n.u24X2ApicIntrDstLo, XtGALogIntrCtrl.n.u7X2ApicIntrDstHi)); + pHlp->pfnPrintf(pHlp, " Interrupt vector = %#x\n", XtGALogIntrCtrl.n.u8X2ApicIntrVector); + pHlp->pfnPrintf(pHlp, " Interrupt delivery mode = %s\n", + !XtGALogIntrCtrl.n.u8X2ApicIntrVector ? "fixed" : "arbitrated"); + } + } + /* MARC Registers. */ + { + for (unsigned i = 0; i < RT_ELEMENTS(pThis->aMarcApers); i++) + { + pHlp->pfnPrintf(pHlp, " MARC Aperature %u:\n", i); + MARC_APER_BAR_T const MarcAperBar = pThis->aMarcApers[i].Base; + pHlp->pfnPrintf(pHlp, " Base = %#RX64\n", MarcAperBar.n.u40MarcBaseAddr << X86_PAGE_4K_SHIFT); + + MARC_APER_RELOC_T const MarcAperReloc = pThis->aMarcApers[i].Reloc; + pHlp->pfnPrintf(pHlp, " Reloc = %#RX64 (addr: %#RX64, read-only: %RTbool, enable: %RTbool)\n", + MarcAperReloc.u64, MarcAperReloc.n.u40MarcRelocAddr << X86_PAGE_4K_SHIFT, + MarcAperReloc.n.u1ReadOnly, MarcAperReloc.n.u1RelocEn); + + MARC_APER_LEN_T const MarcAperLen = pThis->aMarcApers[i].Length; + pHlp->pfnPrintf(pHlp, " Length = %u pages\n", MarcAperLen.n.u40MarcLength); + } + } + /* Reserved Register. */ + pHlp->pfnPrintf(pHlp, " Reserved Register = %#RX64\n", pThis->RsvdReg); + /* Command Buffer Head Pointer Register. */ + { + CMD_BUF_HEAD_PTR_T const CmdBufHeadPtr = pThis->CmdBufHeadPtr; + pHlp->pfnPrintf(pHlp, " Command Buffer Head Pointer = %#RX64 (off: %#x)\n", CmdBufHeadPtr.u64, + CmdBufHeadPtr.n.off); + } + /* Command Buffer Tail Pointer Register. */ + { + CMD_BUF_HEAD_PTR_T const CmdBufTailPtr = pThis->CmdBufTailPtr; + pHlp->pfnPrintf(pHlp, " Command Buffer Tail Pointer = %#RX64 (off: %#x)\n", CmdBufTailPtr.u64, + CmdBufTailPtr.n.off); + } + /* Event Log Head Pointer Register. */ + { + EVT_LOG_HEAD_PTR_T const EvtLogHeadPtr = pThis->EvtLogHeadPtr; + pHlp->pfnPrintf(pHlp, " Event Log Head Pointer = %#RX64 (off: %#x)\n", EvtLogHeadPtr.u64, + EvtLogHeadPtr.n.off); + } + /* Event Log Tail Pointer Register. */ + { + EVT_LOG_TAIL_PTR_T const EvtLogTailPtr = pThis->EvtLogTailPtr; + pHlp->pfnPrintf(pHlp, " Event Log Head Pointer = %#RX64 (off: %#x)\n", EvtLogTailPtr.u64, + EvtLogTailPtr.n.off); + } + /* Status Register. */ + { + IOMMU_STATUS_T const Status = pThis->Status; + pHlp->pfnPrintf(pHlp, " Status Register = %#RX64\n", Status.u64); + if (fVerbose) + { + pHlp->pfnPrintf(pHlp, " Event log overflow = %RTbool\n", Status.n.u1EvtOverflow); + pHlp->pfnPrintf(pHlp, " Event log interrupt = %RTbool\n", Status.n.u1EvtLogIntr); + pHlp->pfnPrintf(pHlp, " Completion wait interrupt = %RTbool\n", Status.n.u1CompWaitIntr); + pHlp->pfnPrintf(pHlp, " Event log running = %RTbool\n", Status.n.u1EvtLogRunning); + pHlp->pfnPrintf(pHlp, " Command buffer running = %RTbool\n", Status.n.u1CmdBufRunning); + pHlp->pfnPrintf(pHlp, " PPR overflow = %RTbool\n", Status.n.u1PprOverflow); + pHlp->pfnPrintf(pHlp, " PPR interrupt = %RTbool\n", Status.n.u1PprIntr); + pHlp->pfnPrintf(pHlp, " PPR log running = %RTbool\n", Status.n.u1PprLogRunning); + pHlp->pfnPrintf(pHlp, " Guest log running = %RTbool\n", Status.n.u1GstLogRunning); + pHlp->pfnPrintf(pHlp, " Guest log interrupt = %RTbool\n", Status.n.u1GstLogIntr); + pHlp->pfnPrintf(pHlp, " PPR log B overflow = %RTbool\n", Status.n.u1PprOverflowB); + pHlp->pfnPrintf(pHlp, " PPR log active = %RTbool\n", Status.n.u1PprLogActive); + pHlp->pfnPrintf(pHlp, " Event log B overflow = %RTbool\n", Status.n.u1EvtOverflowB); + pHlp->pfnPrintf(pHlp, " Event log active = %RTbool\n", Status.n.u1EvtLogActive); + pHlp->pfnPrintf(pHlp, " PPR log B overflow early warning = %RTbool\n", Status.n.u1PprOverflowEarlyB); + pHlp->pfnPrintf(pHlp, " PPR log overflow early warning = %RTbool\n", Status.n.u1PprOverflowEarly); + } + } + /* PPR Log Head Pointer. */ + { + PPR_LOG_HEAD_PTR_T const PprLogHeadPtr = pThis->PprLogHeadPtr; + pHlp->pfnPrintf(pHlp, " PPR Log Head Pointer = %#RX64 (off: %#x)\n", PprLogHeadPtr.u64, + PprLogHeadPtr.n.off); + } + /* PPR Log Tail Pointer. */ + { + PPR_LOG_TAIL_PTR_T const PprLogTailPtr = pThis->PprLogTailPtr; + pHlp->pfnPrintf(pHlp, " PPR Log Tail Pointer = %#RX64 (off: %#x)\n", PprLogTailPtr.u64, + PprLogTailPtr.n.off); + } + /* Guest Virtual-APIC Log Head Pointer. */ + { + GALOG_HEAD_PTR_T const GALogHeadPtr = pThis->GALogHeadPtr; + pHlp->pfnPrintf(pHlp, " Guest Virtual-APIC Log Head Pointer = %#RX64 (off: %#x)\n", GALogHeadPtr.u64, + GALogHeadPtr.n.u12GALogPtr); + } + /* Guest Virtual-APIC Log Tail Pointer. */ + { + GALOG_HEAD_PTR_T const GALogTailPtr = pThis->GALogTailPtr; + pHlp->pfnPrintf(pHlp, " Guest Virtual-APIC Log Tail Pointer = %#RX64 (off: %#x)\n", GALogTailPtr.u64, + GALogTailPtr.n.u12GALogPtr); + } + /* PPR Log B Head Pointer. */ + { + PPR_LOG_B_HEAD_PTR_T const PprLogBHeadPtr = pThis->PprLogBHeadPtr; + pHlp->pfnPrintf(pHlp, " PPR Log B Head Pointer = %#RX64 (off: %#x)\n", PprLogBHeadPtr.u64, + PprLogBHeadPtr.n.off); + } + /* PPR Log B Tail Pointer. */ + { + PPR_LOG_B_TAIL_PTR_T const PprLogBTailPtr = pThis->PprLogBTailPtr; + pHlp->pfnPrintf(pHlp, " PPR Log B Tail Pointer = %#RX64 (off: %#x)\n", PprLogBTailPtr.u64, + PprLogBTailPtr.n.off); + } + /* Event Log B Head Pointer. */ + { + EVT_LOG_B_HEAD_PTR_T const EvtLogBHeadPtr = pThis->EvtLogBHeadPtr; + pHlp->pfnPrintf(pHlp, " Event Log B Head Pointer = %#RX64 (off: %#x)\n", EvtLogBHeadPtr.u64, + EvtLogBHeadPtr.n.off); + } + /* Event Log B Tail Pointer. */ + { + EVT_LOG_B_TAIL_PTR_T const EvtLogBTailPtr = pThis->EvtLogBTailPtr; + pHlp->pfnPrintf(pHlp, " Event Log B Tail Pointer = %#RX64 (off: %#x)\n", EvtLogBTailPtr.u64, + EvtLogBTailPtr.n.off); + } + /* PPR Log Auto Response Register. */ + { + PPR_LOG_AUTO_RESP_T const PprLogAutoResp = pThis->PprLogAutoResp; + pHlp->pfnPrintf(pHlp, " PPR Log Auto Response Register = %#RX64\n", PprLogAutoResp.u64); + if (fVerbose) + { + pHlp->pfnPrintf(pHlp, " Code = %#x\n", PprLogAutoResp.n.u4AutoRespCode); + pHlp->pfnPrintf(pHlp, " Mask Gen. = %RTbool\n", PprLogAutoResp.n.u1AutoRespMaskGen); + } + } + /* PPR Log Overflow Early Warning Indicator Register. */ + { + PPR_LOG_OVERFLOW_EARLY_T const PprLogOverflowEarly = pThis->PprLogOverflowEarly; + pHlp->pfnPrintf(pHlp, " PPR Log overflow early warning = %#RX64\n", PprLogOverflowEarly.u64); + if (fVerbose) + { + pHlp->pfnPrintf(pHlp, " Threshold = %#x\n", PprLogOverflowEarly.n.u15Threshold); + pHlp->pfnPrintf(pHlp, " Interrupt enable = %RTbool\n", PprLogOverflowEarly.n.u1IntrEn); + pHlp->pfnPrintf(pHlp, " Enable = %RTbool\n", PprLogOverflowEarly.n.u1Enable); + } + } + /* PPR Log Overflow Early Warning Indicator Register. */ + { + PPR_LOG_OVERFLOW_EARLY_T const PprLogBOverflowEarly = pThis->PprLogBOverflowEarly; + pHlp->pfnPrintf(pHlp, " PPR Log B overflow early warning = %#RX64\n", PprLogBOverflowEarly.u64); + if (fVerbose) + { + pHlp->pfnPrintf(pHlp, " Threshold = %#x\n", PprLogBOverflowEarly.n.u15Threshold); + pHlp->pfnPrintf(pHlp, " Interrupt enable = %RTbool\n", PprLogBOverflowEarly.n.u1IntrEn); + pHlp->pfnPrintf(pHlp, " Enable = %RTbool\n", PprLogBOverflowEarly.n.u1Enable); + } + } +} + + +/** + * Dumps the DTE via the info callback helper. + * + * @param pHlp The info helper. + * @param pDte The device table entry. + * @param pszPrefix The string prefix. + */ +static void iommuAmdR3DbgInfoDteWorker(PCDBGFINFOHLP pHlp, PCDTE_T pDte, const char *pszPrefix) +{ + AssertReturnVoid(pHlp); + AssertReturnVoid(pDte); + AssertReturnVoid(pszPrefix); + + pHlp->pfnPrintf(pHlp, "%sValid = %RTbool\n", pszPrefix, pDte->n.u1Valid); + pHlp->pfnPrintf(pHlp, "%sTranslation Valid = %RTbool\n", pszPrefix, pDte->n.u1TranslationValid); + pHlp->pfnPrintf(pHlp, "%sHost Access Dirty = %#x\n", pszPrefix, pDte->n.u2Had); + pHlp->pfnPrintf(pHlp, "%sPaging Mode = %u\n", pszPrefix, pDte->n.u3Mode); + pHlp->pfnPrintf(pHlp, "%sPage Table Root Ptr = %#RX64 (addr=%#RGp)\n", pszPrefix, pDte->n.u40PageTableRootPtrLo, + pDte->n.u40PageTableRootPtrLo << 12); + pHlp->pfnPrintf(pHlp, "%sPPR enable = %RTbool\n", pszPrefix, pDte->n.u1Ppr); + pHlp->pfnPrintf(pHlp, "%sGuest PPR Resp w/ PASID = %RTbool\n", pszPrefix, pDte->n.u1GstPprRespPasid); + pHlp->pfnPrintf(pHlp, "%sGuest I/O Prot Valid = %RTbool\n", pszPrefix, pDte->n.u1GstIoValid); + pHlp->pfnPrintf(pHlp, "%sGuest Translation Valid = %RTbool\n", pszPrefix, pDte->n.u1GstTranslateValid); + pHlp->pfnPrintf(pHlp, "%sGuest Levels Translated = %#x\n", pszPrefix, pDte->n.u2GstMode); + pHlp->pfnPrintf(pHlp, "%sGuest Root Page Table Ptr = %#x %#x %#x (addr=%#RGp)\n", pszPrefix, + pDte->n.u3GstCr3TableRootPtrLo, pDte->n.u16GstCr3TableRootPtrMid, pDte->n.u21GstCr3TableRootPtrHi, + (pDte->n.u21GstCr3TableRootPtrHi << 31) + | (pDte->n.u16GstCr3TableRootPtrMid << 15) + | (pDte->n.u3GstCr3TableRootPtrLo << 12)); + pHlp->pfnPrintf(pHlp, "%sI/O Read = %s\n", pszPrefix, pDte->n.u1IoRead ? "allowed" : "denied"); + pHlp->pfnPrintf(pHlp, "%sI/O Write = %s\n", pszPrefix, pDte->n.u1IoWrite ? "allowed" : "denied"); + pHlp->pfnPrintf(pHlp, "%sReserved (MBZ) = %#x\n", pszPrefix, pDte->n.u1Rsvd0); + pHlp->pfnPrintf(pHlp, "%sDomain ID = %u (%#x)\n", pszPrefix, pDte->n.u16DomainId, pDte->n.u16DomainId); + pHlp->pfnPrintf(pHlp, "%sIOTLB Enable = %RTbool\n", pszPrefix, pDte->n.u1IoTlbEnable); + pHlp->pfnPrintf(pHlp, "%sSuppress I/O PFs = %RTbool\n", pszPrefix, pDte->n.u1SuppressPfEvents); + pHlp->pfnPrintf(pHlp, "%sSuppress all I/O PFs = %RTbool\n", pszPrefix, pDte->n.u1SuppressAllPfEvents); + pHlp->pfnPrintf(pHlp, "%sPort I/O Control = %#x\n", pszPrefix, pDte->n.u2IoCtl); + pHlp->pfnPrintf(pHlp, "%sIOTLB Cache Hint = %s\n", pszPrefix, pDte->n.u1Cache ? "no caching" : "cache"); + pHlp->pfnPrintf(pHlp, "%sSnoop Disable = %RTbool\n", pszPrefix, pDte->n.u1SnoopDisable); + pHlp->pfnPrintf(pHlp, "%sAllow Exclusion = %RTbool\n", pszPrefix, pDte->n.u1AllowExclusion); + pHlp->pfnPrintf(pHlp, "%sSysMgt Message Enable = %RTbool\n", pszPrefix, pDte->n.u2SysMgt); + pHlp->pfnPrintf(pHlp, "%sInterrupt Map Valid = %RTbool\n", pszPrefix, pDte->n.u1IntrMapValid); + uint8_t const uIntrTabLen = pDte->n.u4IntrTableLength; + if (uIntrTabLen < IOMMU_DTE_INTR_TAB_LEN_MAX) + { + uint16_t const cEntries = IOMMU_DTE_GET_INTR_TAB_ENTRIES(pDte); + uint16_t const cbIntrTable = IOMMU_DTE_GET_INTR_TAB_LEN(pDte); + pHlp->pfnPrintf(pHlp, "%sInterrupt Table Length = %#x (%u entries, %u bytes)\n", pszPrefix, uIntrTabLen, cEntries, + cbIntrTable); + } + else + pHlp->pfnPrintf(pHlp, "%sInterrupt Table Length = %#x (invalid!)\n", pszPrefix, uIntrTabLen); + pHlp->pfnPrintf(pHlp, "%sIgnore Unmapped Interrupts = %RTbool\n", pszPrefix, pDte->n.u1IgnoreUnmappedIntrs); + pHlp->pfnPrintf(pHlp, "%sInterrupt Table Root Ptr = %#RX64 (addr=%#RGp)\n", pszPrefix, + pDte->n.u46IntrTableRootPtr, pDte->au64[2] & IOMMU_DTE_IRTE_ROOT_PTR_MASK); + pHlp->pfnPrintf(pHlp, "%sReserved (MBZ) = %#x\n", pszPrefix, pDte->n.u4Rsvd0); + pHlp->pfnPrintf(pHlp, "%sINIT passthru = %RTbool\n", pszPrefix, pDte->n.u1InitPassthru); + pHlp->pfnPrintf(pHlp, "%sExtInt passthru = %RTbool\n", pszPrefix, pDte->n.u1ExtIntPassthru); + pHlp->pfnPrintf(pHlp, "%sNMI passthru = %RTbool\n", pszPrefix, pDte->n.u1NmiPassthru); + pHlp->pfnPrintf(pHlp, "%sReserved (MBZ) = %#x\n", pszPrefix, pDte->n.u1Rsvd2); + pHlp->pfnPrintf(pHlp, "%sInterrupt Control = %#x\n", pszPrefix, pDte->n.u2IntrCtrl); + pHlp->pfnPrintf(pHlp, "%sLINT0 passthru = %RTbool\n", pszPrefix, pDte->n.u1Lint0Passthru); + pHlp->pfnPrintf(pHlp, "%sLINT1 passthru = %RTbool\n", pszPrefix, pDte->n.u1Lint1Passthru); + pHlp->pfnPrintf(pHlp, "%sReserved (MBZ) = %#x\n", pszPrefix, pDte->n.u32Rsvd0); + pHlp->pfnPrintf(pHlp, "%sReserved (MBZ) = %#x\n", pszPrefix, pDte->n.u22Rsvd0); + pHlp->pfnPrintf(pHlp, "%sAttribute Override Valid = %RTbool\n", pszPrefix, pDte->n.u1AttrOverride); + pHlp->pfnPrintf(pHlp, "%sMode0FC = %#x\n", pszPrefix, pDte->n.u1Mode0FC); + pHlp->pfnPrintf(pHlp, "%sSnoop Attribute = %#x\n", pszPrefix, pDte->n.u8SnoopAttr); + pHlp->pfnPrintf(pHlp, "\n"); +} + + +/** + * @callback_method_impl{FNDBGFHANDLERDEV} + */ +static DECLCALLBACK(void) iommuAmdR3DbgInfoDte(PPDMDEVINS pDevIns, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + if (pszArgs) + { + uint16_t idDevice = 0; + int rc = RTStrToUInt16Full(pszArgs, 0 /* uBase */, &idDevice); + if (RT_SUCCESS(rc)) + { + DTE_T Dte; + rc = iommuAmdDteRead(pDevIns, idDevice, IOMMUOP_TRANSLATE_REQ, &Dte); + if (RT_SUCCESS(rc)) + { + pHlp->pfnPrintf(pHlp, "DTE for device %#x\n", idDevice); + iommuAmdR3DbgInfoDteWorker(pHlp, &Dte, " "); + return; + } + pHlp->pfnPrintf(pHlp, "Failed to read DTE for device ID %u (%#x). rc=%Rrc\n", idDevice, idDevice, rc); + } + else + pHlp->pfnPrintf(pHlp, "Failed to parse a valid 16-bit device ID. rc=%Rrc\n", rc); + } + else + pHlp->pfnPrintf(pHlp, "Missing device ID.\n"); +} + + +# ifdef IOMMU_WITH_DTE_CACHE +/** + * @callback_method_impl{FNDBGFHANDLERDEV} + */ +static DECLCALLBACK(void) iommuAmdR3DbgInfoDteCache(PPDMDEVINS pDevIns, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + RT_NOREF(pszArgs); + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + IOMMU_CACHE_LOCK(pDevIns, pThis); + + uint16_t const cDteCache = RT_ELEMENTS(pThis->aDeviceIds); + pHlp->pfnPrintf(pHlp, "DTE Cache: Capacity=%u entries\n", cDteCache); + for (uint16_t i = 0; i < cDteCache; i++) + { + uint16_t const idDevice = pThis->aDeviceIds[i]; + if (idDevice) + { + pHlp->pfnPrintf(pHlp, " Entry[%u]: Device=%#x (BDF %02x:%02x.%d)\n", i, idDevice, + (idDevice >> VBOX_PCI_BUS_SHIFT) & VBOX_PCI_BUS_MASK, + (idDevice >> VBOX_PCI_DEVFN_DEV_SHIFT) & VBOX_PCI_DEVFN_DEV_MASK, + idDevice & VBOX_PCI_DEVFN_FUN_MASK); + + PCDTECACHE pDteCache = &pThis->aDteCache[i]; + pHlp->pfnPrintf(pHlp, " Flags = %#x\n", pDteCache->fFlags); + pHlp->pfnPrintf(pHlp, " Domain Id = %u\n", pDteCache->idDomain); + pHlp->pfnPrintf(pHlp, "\n"); + } + } + IOMMU_CACHE_UNLOCK(pDevIns, pThis); +} +# endif /* IOMMU_WITH_DTE_CACHE */ + + +# ifdef IOMMU_WITH_IOTLBE_CACHE +/** + * @callback_method_impl{FNDBGFHANDLERDEV} + */ +static DECLCALLBACK(void) iommuAmdR3DbgInfoIotlb(PPDMDEVINS pDevIns, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + if (pszArgs) + { + uint16_t idDomain = 0; + int rc = RTStrToUInt16Full(pszArgs, 0 /* uBase */, &idDomain); + if (RT_SUCCESS(rc)) + { + pHlp->pfnPrintf(pHlp, "IOTLBEs for domain %u (%#x):\n", idDomain, idDomain); + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3); + IOTLBEINFOARG Args; + Args.pIommuR3 = pThisR3; + Args.pHlp = pHlp; + Args.idDomain = idDomain; + + IOMMU_CACHE_LOCK(pDevIns, pThis); + RTAvlU64DoWithAll(&pThisR3->TreeIotlbe, true /* fFromLeft */, iommuAmdR3IotlbEntryInfo, &Args); + IOMMU_CACHE_UNLOCK(pDevIns, pThis); + } + else + pHlp->pfnPrintf(pHlp, "Failed to parse a valid 16-bit domain ID. rc=%Rrc\n", rc); + } + else + pHlp->pfnPrintf(pHlp, "Missing domain ID.\n"); +} +# endif /* IOMMU_WITH_IOTLBE_CACHE */ + + +# ifdef IOMMU_WITH_IRTE_CACHE +/** + * Gets the interrupt type name for an interrupt type in the IRTE. + * + * @returns The interrupt type name. + * @param uIntrType The interrupt type (as specified in the IRTE). + */ +static const char *iommuAmdIrteGetIntrTypeName(uint8_t uIntrType) +{ + switch (uIntrType) + { + case VBOX_MSI_DELIVERY_MODE_FIXED: return "Fixed"; + case VBOX_MSI_DELIVERY_MODE_LOWEST_PRIO: return "Arbitrated"; + default: return "<Reserved>"; + } +} + + +/** + * @callback_method_impl{FNDBGFHANDLERDEV} + */ +static DECLCALLBACK(void) iommuAmdR3DbgInfoIrteCache(PPDMDEVINS pDevIns, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + RT_NOREF(pszArgs); + + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + IOMMU_CACHE_LOCK(pDevIns, pThis); + + uint16_t const cIrteCache = RT_ELEMENTS(pThis->aIrteCache); + pHlp->pfnPrintf(pHlp, "IRTE Cache: Capacity=%u entries\n", cIrteCache); + for (uint16_t idxIrte = 0; idxIrte < cIrteCache; idxIrte++) + { + PCIRTECACHE pIrteCache = &pThis->aIrteCache[idxIrte]; + uint32_t const uKey = pIrteCache->uKey; + if (uKey != IOMMU_IRTE_CACHE_KEY_NIL) + { + uint16_t const idDevice = IOMMU_IRTE_CACHE_KEY_GET_DEVICE_ID(uKey); + uint16_t const offIrte = IOMMU_IRTE_CACHE_KEY_GET_OFF(uKey); + pHlp->pfnPrintf(pHlp, " Entry[%u]: Offset=%#x Device=%#x (BDF %02x:%02x.%d)\n", + idxIrte, offIrte, idDevice, + (idDevice >> VBOX_PCI_BUS_SHIFT) & VBOX_PCI_BUS_MASK, + (idDevice >> VBOX_PCI_DEVFN_DEV_SHIFT) & VBOX_PCI_DEVFN_DEV_MASK, + idDevice & VBOX_PCI_DEVFN_FUN_MASK); + + PCIRTE_T pIrte = &pIrteCache->Irte; + pHlp->pfnPrintf(pHlp, " Remap Enable = %RTbool\n", pIrte->n.u1RemapEnable); + pHlp->pfnPrintf(pHlp, " Suppress IOPF = %RTbool\n", pIrte->n.u1SuppressIoPf); + pHlp->pfnPrintf(pHlp, " Interrupt Type = %#x (%s)\n", pIrte->n.u3IntrType, + iommuAmdIrteGetIntrTypeName(pIrte->n.u3IntrType)); + pHlp->pfnPrintf(pHlp, " Request EOI = %RTbool\n", pIrte->n.u1ReqEoi); + pHlp->pfnPrintf(pHlp, " Destination mode = %s\n", pIrte->n.u1DestMode ? "Logical" : "Physical"); + pHlp->pfnPrintf(pHlp, " Destination Id = %u\n", pIrte->n.u8Dest); + pHlp->pfnPrintf(pHlp, " Vector = %#x (%u)\n", pIrte->n.u8Vector, pIrte->n.u8Vector); + pHlp->pfnPrintf(pHlp, "\n"); + } + } + IOMMU_CACHE_UNLOCK(pDevIns, pThis); +} +# endif /* IOMMU_WITH_IRTE_CACHE */ + + +/** + * @callback_method_impl{FNDBGFHANDLERDEV} + */ +static DECLCALLBACK(void) iommuAmdR3DbgInfoDevTabs(PPDMDEVINS pDevIns, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + RT_NOREF(pszArgs); + + PCIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + PCPDMPCIDEV pPciDev = pDevIns->apPciDevs[0]; + PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev); + NOREF(pPciDev); + + uint8_t cSegments = 0; + for (uint8_t i = 0; i < RT_ELEMENTS(pThis->aDevTabBaseAddrs); i++) + { + DEV_TAB_BAR_T const DevTabBar = pThis->aDevTabBaseAddrs[i]; + RTGCPHYS const GCPhysDevTab = DevTabBar.n.u40Base << X86_PAGE_4K_SHIFT; + if (GCPhysDevTab) + ++cSegments; + } + + pHlp->pfnPrintf(pHlp, "AMD-IOMMU device tables with address translations enabled:\n"); + pHlp->pfnPrintf(pHlp, " DTE Segments=%u\n", cSegments); + if (!cSegments) + return; + + for (uint8_t i = 0; i < RT_ELEMENTS(pThis->aDevTabBaseAddrs); i++) + { + DEV_TAB_BAR_T const DevTabBar = pThis->aDevTabBaseAddrs[i]; + RTGCPHYS const GCPhysDevTab = DevTabBar.n.u40Base << X86_PAGE_4K_SHIFT; + if (GCPhysDevTab) + { + uint32_t const cbDevTab = IOMMU_GET_DEV_TAB_LEN(&DevTabBar); + uint32_t const cDtes = cbDevTab / sizeof(DTE_T); + + void *pvDevTab = RTMemAllocZ(cbDevTab); + if (RT_LIKELY(pvDevTab)) + { + int rc = PDMDevHlpPCIPhysRead(pDevIns, GCPhysDevTab, pvDevTab, cbDevTab); + if (RT_SUCCESS(rc)) + { + for (uint32_t idxDte = 0; idxDte < cDtes; idxDte++) + { + PCDTE_T pDte = (PCDTE_T)((uintptr_t)pvDevTab + idxDte * sizeof(DTE_T)); + if ( pDte->n.u1Valid + && pDte->n.u1TranslationValid + && pDte->n.u3Mode != 0) + { + pHlp->pfnPrintf(pHlp, " DTE %u (BDF %02x:%02x.%d)\n", idxDte, + (idxDte >> VBOX_PCI_BUS_SHIFT) & VBOX_PCI_BUS_MASK, + (idxDte >> VBOX_PCI_DEVFN_DEV_SHIFT) & VBOX_PCI_DEVFN_DEV_MASK, + idxDte & VBOX_PCI_DEVFN_FUN_MASK); + iommuAmdR3DbgInfoDteWorker(pHlp, pDte, " "); + pHlp->pfnPrintf(pHlp, "\n"); + } + } + pHlp->pfnPrintf(pHlp, "\n"); + } + else + { + pHlp->pfnPrintf(pHlp, " Failed to read table at %#RGp of size %zu bytes. rc=%Rrc!\n", GCPhysDevTab, + cbDevTab, rc); + } + + RTMemFree(pvDevTab); + } + else + { + pHlp->pfnPrintf(pHlp, " Allocating %zu bytes for reading the device table failed!\n", cbDevTab); + return; + } + } + } +} + + +/** + * @callback_method_impl{FNSSMDEVSAVEEXEC} + */ +static DECLCALLBACK(int) iommuAmdR3SaveExec(PPDMDEVINS pDevIns, PSSMHANDLE pSSM) +{ + PCIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + PCPDMDEVHLPR3 pHlp = pDevIns->pHlpR3; + LogFlowFunc(("\n")); + + /* First, save ExtFeat and other registers that cannot be modified by the guest. */ + pHlp->pfnSSMPutU64(pSSM, pThis->ExtFeat.u64); + pHlp->pfnSSMPutU64(pSSM, pThis->DevSpecificFeat.u64); + pHlp->pfnSSMPutU64(pSSM, pThis->DevSpecificCtrl.u64); + pHlp->pfnSSMPutU64(pSSM, pThis->DevSpecificStatus.u64); + pHlp->pfnSSMPutU64(pSSM, pThis->MiscInfo.u64); + pHlp->pfnSSMPutU64(pSSM, pThis->RsvdReg); + + /* Next, save all registers that can be modified by the guest. */ + pHlp->pfnSSMPutU64(pSSM, pThis->IommuBar.u64); + + uint8_t const cDevTabBaseAddrs = RT_ELEMENTS(pThis->aDevTabBaseAddrs); + pHlp->pfnSSMPutU8(pSSM, cDevTabBaseAddrs); + for (uint8_t i = 0; i < cDevTabBaseAddrs; i++) + pHlp->pfnSSMPutU64(pSSM, pThis->aDevTabBaseAddrs[i].u64); + + AssertReturn(pThis->CmdBufBaseAddr.n.u4Len >= 8, VERR_IOMMU_IPE_4); + pHlp->pfnSSMPutU64(pSSM, pThis->CmdBufBaseAddr.u64); + pHlp->pfnSSMPutU64(pSSM, pThis->EvtLogBaseAddr.u64); + pHlp->pfnSSMPutU64(pSSM, pThis->Ctrl.u64); + pHlp->pfnSSMPutU64(pSSM, pThis->ExclRangeBaseAddr.u64); + pHlp->pfnSSMPutU64(pSSM, pThis->ExclRangeLimit.u64); +#if 0 + pHlp->pfnSSMPutU64(pSSM, pThis->ExtFeat.u64); /* read-only, done already (above). */ +#endif + + pHlp->pfnSSMPutU64(pSSM, pThis->PprLogBaseAddr.u64); + pHlp->pfnSSMPutU64(pSSM, pThis->HwEvtHi.u64); + pHlp->pfnSSMPutU64(pSSM, pThis->HwEvtLo); + pHlp->pfnSSMPutU64(pSSM, pThis->HwEvtStatus.u64); + + pHlp->pfnSSMPutU64(pSSM, pThis->GALogBaseAddr.u64); + pHlp->pfnSSMPutU64(pSSM, pThis->GALogTailAddr.u64); + + pHlp->pfnSSMPutU64(pSSM, pThis->PprLogBBaseAddr.u64); + pHlp->pfnSSMPutU64(pSSM, pThis->EvtLogBBaseAddr.u64); + +#if 0 + pHlp->pfnSSMPutU64(pSSM, pThis->DevSpecificFeat.u64); /* read-only, done already (above). */ + pHlp->pfnSSMPutU64(pSSM, pThis->DevSpecificCtrl.u64); /* read-only, done already (above). */ + pHlp->pfnSSMPutU64(pSSM, pThis->DevSpecificStatus.u64); /* read-only, done already (above). */ + + pHlp->pfnSSMPutU64(pSSM, pThis->MiscInfo.u64); /* read-only, done already (above). */ +#endif + pHlp->pfnSSMPutU32(pSSM, pThis->PerfOptCtrl.u32); + + pHlp->pfnSSMPutU64(pSSM, pThis->XtGenIntrCtrl.u64); + pHlp->pfnSSMPutU64(pSSM, pThis->XtPprIntrCtrl.u64); + pHlp->pfnSSMPutU64(pSSM, pThis->XtGALogIntrCtrl.u64); + + size_t const cMarcApers = RT_ELEMENTS(pThis->aMarcApers); + pHlp->pfnSSMPutU8(pSSM, cMarcApers); + for (size_t i = 0; i < cMarcApers; i++) + { + pHlp->pfnSSMPutU64(pSSM, pThis->aMarcApers[i].Base.u64); + pHlp->pfnSSMPutU64(pSSM, pThis->aMarcApers[i].Reloc.u64); + pHlp->pfnSSMPutU64(pSSM, pThis->aMarcApers[i].Length.u64); + } + +#if 0 + pHlp->pfnSSMPutU64(pSSM, pThis->RsvdReg); /* read-only, done already (above). */ +#endif + + pHlp->pfnSSMPutU64(pSSM, pThis->CmdBufHeadPtr.u64); + pHlp->pfnSSMPutU64(pSSM, pThis->CmdBufTailPtr.u64); + pHlp->pfnSSMPutU64(pSSM, pThis->EvtLogHeadPtr.u64); + pHlp->pfnSSMPutU64(pSSM, pThis->EvtLogTailPtr.u64); + + pHlp->pfnSSMPutU64(pSSM, pThis->Status.u64); + + pHlp->pfnSSMPutU64(pSSM, pThis->PprLogHeadPtr.u64); + pHlp->pfnSSMPutU64(pSSM, pThis->PprLogTailPtr.u64); + + pHlp->pfnSSMPutU64(pSSM, pThis->GALogHeadPtr.u64); + pHlp->pfnSSMPutU64(pSSM, pThis->GALogTailPtr.u64); + + pHlp->pfnSSMPutU64(pSSM, pThis->PprLogBHeadPtr.u64); + pHlp->pfnSSMPutU64(pSSM, pThis->PprLogBTailPtr.u64); + + pHlp->pfnSSMPutU64(pSSM, pThis->EvtLogBHeadPtr.u64); + pHlp->pfnSSMPutU64(pSSM, pThis->EvtLogBTailPtr.u64); + + pHlp->pfnSSMPutU64(pSSM, pThis->PprLogAutoResp.u64); + pHlp->pfnSSMPutU64(pSSM, pThis->PprLogOverflowEarly.u64); + pHlp->pfnSSMPutU64(pSSM, pThis->PprLogBOverflowEarly.u64); + + return pHlp->pfnSSMPutU32(pSSM, UINT32_MAX); +} + + +/** + * @callback_method_impl{FNSSMDEVLOADEXEC} + */ +static DECLCALLBACK(int) iommuAmdR3LoadExec(PPDMDEVINS pDevIns, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass) +{ + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + PCPDMDEVHLPR3 pHlp = pDevIns->pHlpR3; + int const rcErr = VERR_SSM_UNEXPECTED_DATA; + LogFlowFunc(("\n")); + + /* Validate. */ + AssertReturn(uPass == SSM_PASS_FINAL, VERR_WRONG_ORDER); + if (uVersion != IOMMU_SAVED_STATE_VERSION) + { + LogRel(("%s: Invalid saved-state version %#x\n", IOMMU_LOG_PFX, uVersion)); + return VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION; + } + + /* Load ExtFeat and other read-only registers first. */ + int rc = pHlp->pfnSSMGetU64(pSSM, &pThis->ExtFeat.u64); + AssertRCReturn(rc, rc); + AssertLogRelMsgReturn(pThis->ExtFeat.n.u2HostAddrTranslateSize < 0x3, + ("ExtFeat.HATS register invalid %#RX64\n", pThis->ExtFeat.u64), rcErr); + pHlp->pfnSSMGetU64(pSSM, &pThis->DevSpecificFeat.u64); + pHlp->pfnSSMGetU64(pSSM, &pThis->DevSpecificCtrl.u64); + pHlp->pfnSSMGetU64(pSSM, &pThis->DevSpecificStatus.u64); + pHlp->pfnSSMGetU64(pSSM, &pThis->MiscInfo.u64); + pHlp->pfnSSMGetU64(pSSM, &pThis->RsvdReg); + + /* IOMMU base address register. */ + rc = pHlp->pfnSSMGetU64(pSSM, &pThis->IommuBar.u64); + AssertRCReturn(rc, rc); + pThis->IommuBar.u64 &= IOMMU_BAR_VALID_MASK; + + /* Device table base address registers. */ + uint8_t cDevTabBaseAddrs; + rc = pHlp->pfnSSMGetU8(pSSM, &cDevTabBaseAddrs); + AssertRCReturn(rc, rc); + AssertLogRelMsgReturn(cDevTabBaseAddrs > 0 && cDevTabBaseAddrs <= RT_ELEMENTS(pThis->aDevTabBaseAddrs), + ("Device table segment count invalid %#x\n", cDevTabBaseAddrs), rcErr); + AssertCompile(RT_ELEMENTS(pThis->aDevTabBaseAddrs) == RT_ELEMENTS(g_auDevTabSegMaxSizes)); + for (uint8_t i = 0; i < cDevTabBaseAddrs; i++) + { + rc = pHlp->pfnSSMGetU64(pSSM, &pThis->aDevTabBaseAddrs[i].u64); + AssertRCReturn(rc, rc); + pThis->aDevTabBaseAddrs[i].u64 &= IOMMU_DEV_TAB_BAR_VALID_MASK; + uint16_t const uSegSize = pThis->aDevTabBaseAddrs[i].n.u9Size; + uint16_t const uMaxSegSize = g_auDevTabSegMaxSizes[i]; + AssertLogRelMsgReturn(uSegSize <= uMaxSegSize, + ("Device table [%u] segment size invalid %u (max %u)\n", i, uSegSize, uMaxSegSize), rcErr); + } + + /* Command buffer base address register. */ + rc = pHlp->pfnSSMGetU64(pSSM, &pThis->CmdBufBaseAddr.u64); + AssertRCReturn(rc, rc); + pThis->CmdBufBaseAddr.u64 &= IOMMU_CMD_BUF_BAR_VALID_MASK; + AssertLogRelMsgReturn(pThis->CmdBufBaseAddr.n.u4Len >= 8, + ("Command buffer base address invalid %#RX64\n", pThis->CmdBufBaseAddr.u64), rcErr); + + /* Event log base address register. */ + rc = pHlp->pfnSSMGetU64(pSSM, &pThis->EvtLogBaseAddr.u64); + AssertRCReturn(rc, rc); + pThis->EvtLogBaseAddr.u64 &= IOMMU_EVT_LOG_BAR_VALID_MASK; + AssertLogRelMsgReturn(pThis->EvtLogBaseAddr.n.u4Len >= 8, + ("Event log base address invalid %#RX64\n", pThis->EvtLogBaseAddr.u64), rcErr); + + /* Control register. */ + rc = pHlp->pfnSSMGetU64(pSSM, &pThis->Ctrl.u64); + AssertRCReturn(rc, rc); + pThis->Ctrl.u64 &= IOMMU_CTRL_VALID_MASK; + AssertLogRelMsgReturn(pThis->Ctrl.n.u3DevTabSegEn <= pThis->ExtFeat.n.u2DevTabSegSup, + ("Control register invalid %#RX64\n", pThis->Ctrl.u64), rcErr); + + /* Exclusion range base address register. */ + rc = pHlp->pfnSSMGetU64(pSSM, &pThis->ExclRangeBaseAddr.u64); + AssertRCReturn(rc, rc); + pThis->ExclRangeBaseAddr.u64 &= IOMMU_EXCL_RANGE_BAR_VALID_MASK; + + /* Exclusion range limit register. */ + rc = pHlp->pfnSSMGetU64(pSSM, &pThis->ExclRangeLimit.u64); + AssertRCReturn(rc, rc); + pThis->ExclRangeLimit.u64 &= IOMMU_EXCL_RANGE_LIMIT_VALID_MASK; + pThis->ExclRangeLimit.u64 |= UINT64_C(0xfff); + +#if 0 + pHlp->pfnSSMGetU64(pSSM, &pThis->ExtFeat.u64); /* read-only, done already (above). */ +#endif + + /* PPR log base address register. */ + rc = pHlp->pfnSSMGetU64(pSSM, &pThis->PprLogBaseAddr.u64); + AssertRCReturn(rc, rc); + Assert(!pThis->ExtFeat.n.u1PprSup); + + /* Hardware event (Hi) register. */ + rc = pHlp->pfnSSMGetU64(pSSM, &pThis->HwEvtHi.u64); + AssertRCReturn(rc, rc); + + /* Hardware event (Lo) register. */ + rc = pHlp->pfnSSMGetU64(pSSM, &pThis->HwEvtLo); + AssertRCReturn(rc, rc); + + /* Hardware event status register. */ + rc = pHlp->pfnSSMGetU64(pSSM, &pThis->HwEvtStatus.u64); + AssertRCReturn(rc, rc); + pThis->HwEvtStatus.u64 &= IOMMU_HW_EVT_STATUS_VALID_MASK; + + /* Guest Virtual-APIC log base address register. */ + rc = pHlp->pfnSSMGetU64(pSSM, &pThis->GALogBaseAddr.u64); + AssertRCReturn(rc, rc); + Assert(!pThis->ExtFeat.n.u1GstVirtApicSup); + + /* Guest Virtual-APIC log tail address register. */ + rc = pHlp->pfnSSMGetU64(pSSM, &pThis->GALogTailAddr.u64); + AssertRCReturn(rc, rc); + Assert(!pThis->ExtFeat.n.u1GstVirtApicSup); + + /* PPR log-B base address register. */ + rc = pHlp->pfnSSMGetU64(pSSM, &pThis->PprLogBBaseAddr.u64); + AssertRCReturn(rc, rc); + Assert(!pThis->ExtFeat.n.u1PprSup); + + /* Event log-B base address register. */ + rc = pHlp->pfnSSMGetU64(pSSM, &pThis->EvtLogBBaseAddr.u64); + AssertRCReturn(rc, rc); + Assert(!pThis->ExtFeat.n.u2DualPprLogSup); + +#if 0 + pHlp->pfnSSMGetU64(pSSM, &pThis->DevSpecificFeat.u64); /* read-only, done already (above). */ + pHlp->pfnSSMGetU64(pSSM, &pThis->DevSpecificCtrl.u64); /* read-only, done already (above). */ + pHlp->pfnSSMGetU64(pSSM, &pThis->DevSpecificStatus.u64); /* read-only, done already (above). */ + + pHlp->pfnSSMGetU64(pSSM, &pThis->MiscInfo.u64); /* read-only, done already (above). */ +#endif + + /* Performance optimization control register. */ + rc = pHlp->pfnSSMGetU32(pSSM, &pThis->PerfOptCtrl.u32); + AssertRCReturn(rc, rc); + Assert(!pThis->ExtFeat.n.u1PerfOptSup); + + /* x2APIC registers. */ + { + Assert(!pThis->ExtFeat.n.u1X2ApicSup); + + /* x2APIC general interrupt control register. */ + pHlp->pfnSSMGetU64(pSSM, &pThis->XtGenIntrCtrl.u64); + AssertRCReturn(rc, rc); + + /* x2APIC PPR interrupt control register. */ + rc = pHlp->pfnSSMGetU64(pSSM, &pThis->XtPprIntrCtrl.u64); + AssertRCReturn(rc, rc); + + /* x2APIC GA log interrupt control register. */ + rc = pHlp->pfnSSMGetU64(pSSM, &pThis->XtGALogIntrCtrl.u64); + AssertRCReturn(rc, rc); + } + + /* MARC (Memory Access and Routing) registers. */ + { + uint8_t cMarcApers; + rc = pHlp->pfnSSMGetU8(pSSM, &cMarcApers); + AssertRCReturn(rc, rc); + AssertLogRelMsgReturn(cMarcApers > 0 && cMarcApers <= RT_ELEMENTS(pThis->aMarcApers), + ("MARC register count invalid %#x\n", cMarcApers), rcErr); + for (uint8_t i = 0; i < cMarcApers; i++) + { + rc = pHlp->pfnSSMGetU64(pSSM, &pThis->aMarcApers[i].Base.u64); + AssertRCReturn(rc, rc); + + rc = pHlp->pfnSSMGetU64(pSSM, &pThis->aMarcApers[i].Reloc.u64); + AssertRCReturn(rc, rc); + + rc = pHlp->pfnSSMGetU64(pSSM, &pThis->aMarcApers[i].Length.u64); + AssertRCReturn(rc, rc); + } + Assert(!pThis->ExtFeat.n.u2MarcSup); + } + +#if 0 + pHlp->pfnSSMGetU64(pSSM, &pThis->RsvdReg); /* read-only, done already (above). */ +#endif + + /* Command buffer head pointer register. */ + rc = pHlp->pfnSSMGetU64(pSSM, &pThis->CmdBufHeadPtr.u64); + AssertRCReturn(rc, rc); + { + /* + * IOMMU behavior is undefined when software writes a value outside the buffer length. + * In our emulation, since we ignore the write entirely (see iommuAmdCmdBufHeadPtr_w) + * we shouldn't see such values in the saved state. + */ + uint32_t const offBuf = pThis->CmdBufHeadPtr.u64 & IOMMU_CMD_BUF_HEAD_PTR_VALID_MASK; + uint32_t const cbBuf = iommuAmdGetTotalBufLength(pThis->CmdBufBaseAddr.n.u4Len); + Assert(cbBuf <= _512K); + AssertLogRelMsgReturn(offBuf < cbBuf, + ("Command buffer head pointer invalid %#x\n", pThis->CmdBufHeadPtr.u64), rcErr); + } + + /* Command buffer tail pointer register. */ + rc = pHlp->pfnSSMGetU64(pSSM, &pThis->CmdBufTailPtr.u64); + AssertRCReturn(rc, rc); + { + uint32_t const offBuf = pThis->CmdBufTailPtr.u64 & IOMMU_CMD_BUF_TAIL_PTR_VALID_MASK; + uint32_t const cbBuf = iommuAmdGetTotalBufLength(pThis->CmdBufBaseAddr.n.u4Len); + Assert(cbBuf <= _512K); + AssertLogRelMsgReturn(offBuf < cbBuf, + ("Command buffer tail pointer invalid %#x\n", pThis->CmdBufTailPtr.u64), rcErr); + } + + /* Event log head pointer register. */ + rc = pHlp->pfnSSMGetU64(pSSM, &pThis->EvtLogHeadPtr.u64); + AssertRCReturn(rc, rc); + { + uint32_t const offBuf = pThis->EvtLogHeadPtr.u64 & IOMMU_EVT_LOG_HEAD_PTR_VALID_MASK; + uint32_t const cbBuf = iommuAmdGetTotalBufLength(pThis->EvtLogBaseAddr.n.u4Len); + Assert(cbBuf <= _512K); + AssertLogRelMsgReturn(offBuf < cbBuf, + ("Event log head pointer invalid %#x\n", pThis->EvtLogHeadPtr.u64), rcErr); + } + + /* Event log tail pointer register. */ + rc = pHlp->pfnSSMGetU64(pSSM, &pThis->EvtLogTailPtr.u64); + AssertRCReturn(rc, rc); + { + uint32_t const offBuf = pThis->EvtLogTailPtr.u64 & IOMMU_EVT_LOG_TAIL_PTR_VALID_MASK; + uint32_t const cbBuf = iommuAmdGetTotalBufLength(pThis->EvtLogBaseAddr.n.u4Len); + Assert(cbBuf <= _512K); + AssertLogRelMsgReturn(offBuf < cbBuf, + ("Event log tail pointer invalid %#x\n", pThis->EvtLogTailPtr.u64), rcErr); + } + + /* Status register. */ + rc = pHlp->pfnSSMGetU64(pSSM, &pThis->Status.u64); + AssertRCReturn(rc, rc); + pThis->Status.u64 &= IOMMU_STATUS_VALID_MASK; + + /* PPR log head pointer register. */ + rc = pHlp->pfnSSMGetU64(pSSM, &pThis->PprLogHeadPtr.u64); + AssertRCReturn(rc, rc); + Assert(!pThis->ExtFeat.n.u1PprSup); + + /* PPR log tail pointer register. */ + rc = pHlp->pfnSSMGetU64(pSSM, &pThis->PprLogTailPtr.u64); + AssertRCReturn(rc, rc); + Assert(!pThis->ExtFeat.n.u1PprSup); + + /* Guest Virtual-APIC log head pointer register. */ + rc = pHlp->pfnSSMGetU64(pSSM, &pThis->GALogHeadPtr.u64); + AssertRCReturn(rc, rc); + Assert(!pThis->ExtFeat.n.u1GstVirtApicSup); + + /* Guest Virtual-APIC log tail pointer register. */ + rc = pHlp->pfnSSMGetU64(pSSM, &pThis->GALogTailPtr.u64); + AssertRCReturn(rc, rc); + Assert(!pThis->ExtFeat.n.u1GstVirtApicSup); + + /* PPR log-B head pointer register. */ + rc = pHlp->pfnSSMGetU64(pSSM, &pThis->PprLogBHeadPtr.u64); + AssertRCReturn(rc, rc); + Assert(!pThis->ExtFeat.n.u1PprSup); + + /* PPR log-B head pointer register. */ + rc = pHlp->pfnSSMGetU64(pSSM, &pThis->PprLogBTailPtr.u64); + AssertRCReturn(rc, rc); + Assert(!pThis->ExtFeat.n.u1PprSup); + + /* Event log-B head pointer register. */ + rc = pHlp->pfnSSMGetU64(pSSM, &pThis->EvtLogBHeadPtr.u64); + AssertRCReturn(rc, rc); + Assert(!pThis->ExtFeat.n.u2DualEvtLogSup); + + /* Event log-B tail pointer register. */ + rc = pHlp->pfnSSMGetU64(pSSM, &pThis->EvtLogBTailPtr.u64); + AssertRCReturn(rc, rc); + Assert(!pThis->ExtFeat.n.u2DualEvtLogSup); + + /* PPR log auto response register. */ + rc = pHlp->pfnSSMGetU64(pSSM, &pThis->PprLogAutoResp.u64); + AssertRCReturn(rc, rc); + Assert(!pThis->ExtFeat.n.u1PprAutoRespSup); + + /* PPR log overflow early indicator register. */ + rc = pHlp->pfnSSMGetU64(pSSM, &pThis->PprLogOverflowEarly.u64); + AssertRCReturn(rc, rc); + Assert(!pThis->ExtFeat.n.u1PprLogOverflowWarn); + + /* PPR log-B overflow early indicator register. */ + rc = pHlp->pfnSSMGetU64(pSSM, &pThis->PprLogBOverflowEarly.u64); + AssertRCReturn(rc, rc); + Assert(!pThis->ExtFeat.n.u1PprLogOverflowWarn); + + /* End marker. */ + { + uint32_t uEndMarker; + rc = pHlp->pfnSSMGetU32(pSSM, &uEndMarker); + AssertLogRelMsgRCReturn(rc, ("Failed to read end marker. rc=%Rrc\n", rc), VERR_SSM_DATA_UNIT_FORMAT_CHANGED); + AssertLogRelMsgReturn(uEndMarker == UINT32_MAX, ("End marker invalid (%#x expected %#x)\n", uEndMarker, UINT32_MAX), + rcErr); + } + + return rc; +} + + +/** + * @callback_method_impl{FNSSMDEVLOADDONE} + */ +static DECLCALLBACK(int) iommuAmdR3LoadDone(PPDMDEVINS pDevIns, PSSMHANDLE pSSM) +{ + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3); + RT_NOREF(pSSM); + LogFlowFunc(("\n")); + + /* Sanity. */ + AssertPtrReturn(pThis, VERR_INVALID_POINTER); + AssertPtrReturn(pThisR3, VERR_INVALID_POINTER); + + int rc; + IOMMU_LOCK(pDevIns, pThisR3); + + /* Map MMIO regions if the IOMMU BAR is enabled. */ + if (pThis->IommuBar.n.u1Enable) + rc = iommuAmdR3MmioSetup(pDevIns); + else + rc = VINF_SUCCESS; + + /* Wake up the command thread if commands need processing. */ + iommuAmdCmdThreadWakeUpIfNeeded(pDevIns); + + IOMMU_UNLOCK(pDevIns, pThisR3); + + LogRel(("%s: Restored: DSFX=%u.%u DSCX=%u.%u DSSX=%u.%u ExtFeat=%#RX64\n", IOMMU_LOG_PFX, + pThis->DevSpecificFeat.n.u4RevMajor, pThis->DevSpecificFeat.n.u4RevMinor, + pThis->DevSpecificCtrl.n.u4RevMajor, pThis->DevSpecificCtrl.n.u4RevMinor, + pThis->DevSpecificStatus.n.u4RevMajor, pThis->DevSpecificStatus.n.u4RevMinor, + pThis->ExtFeat.u64)); + return rc; +} + + +/** + * @interface_method_impl{PDMDEVREG,pfnReset} + */ +static DECLCALLBACK(void) iommuAmdR3Reset(PPDMDEVINS pDevIns) +{ + /* + * Resets read-write portion of the IOMMU state. + * + * NOTE! State not initialized here is expected to be initialized during + * device construction and remain read-only through the lifetime of the VM. + */ + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3); + PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0]; + PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev); + LogFlowFunc(("\n")); + + IOMMU_LOCK(pDevIns, pThisR3); + + RT_ZERO(pThis->aDevTabBaseAddrs); + + pThis->CmdBufBaseAddr.u64 = 0; + pThis->CmdBufBaseAddr.n.u4Len = 8; + + pThis->EvtLogBaseAddr.u64 = 0; + pThis->EvtLogBaseAddr.n.u4Len = 8; + + pThis->Ctrl.u64 = 0; + pThis->Ctrl.n.u1Coherent = 1; + Assert(!pThis->ExtFeat.n.u1BlockStopMarkSup); + + pThis->ExclRangeBaseAddr.u64 = 0; + pThis->ExclRangeLimit.u64 = 0; + + pThis->PprLogBaseAddr.u64 = 0; + pThis->PprLogBaseAddr.n.u4Len = 8; + + pThis->HwEvtHi.u64 = 0; + pThis->HwEvtLo = 0; + pThis->HwEvtStatus.u64 = 0; + + pThis->GALogBaseAddr.u64 = 0; + pThis->GALogBaseAddr.n.u4Len = 8; + pThis->GALogTailAddr.u64 = 0; + + pThis->PprLogBBaseAddr.u64 = 0; + pThis->PprLogBBaseAddr.n.u4Len = 8; + + pThis->EvtLogBBaseAddr.u64 = 0; + pThis->EvtLogBBaseAddr.n.u4Len = 8; + + pThis->PerfOptCtrl.u32 = 0; + + pThis->XtGenIntrCtrl.u64 = 0; + pThis->XtPprIntrCtrl.u64 = 0; + pThis->XtGALogIntrCtrl.u64 = 0; + + RT_ZERO(pThis->aMarcApers); + + pThis->CmdBufHeadPtr.u64 = 0; + pThis->CmdBufTailPtr.u64 = 0; + pThis->EvtLogHeadPtr.u64 = 0; + pThis->EvtLogTailPtr.u64 = 0; + + pThis->Status.u64 = 0; + + pThis->PprLogHeadPtr.u64 = 0; + pThis->PprLogTailPtr.u64 = 0; + + pThis->GALogHeadPtr.u64 = 0; + pThis->GALogTailPtr.u64 = 0; + + pThis->PprLogBHeadPtr.u64 = 0; + pThis->PprLogBTailPtr.u64 = 0; + + pThis->EvtLogBHeadPtr.u64 = 0; + pThis->EvtLogBTailPtr.u64 = 0; + + pThis->PprLogAutoResp.u64 = 0; + pThis->PprLogOverflowEarly.u64 = 0; + pThis->PprLogBOverflowEarly.u64 = 0; + + pThis->IommuBar.u64 = 0; + PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_BASE_ADDR_REG_LO, 0); + PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_BASE_ADDR_REG_HI, 0); + + PDMPciDevSetCommand(pPciDev, VBOX_PCI_COMMAND_MASTER); + + IOMMU_UNLOCK(pDevIns, pThisR3); + +#ifdef IOMMU_WITH_DTE_CACHE + iommuAmdDteCacheRemoveAll(pDevIns); +#endif +#ifdef IOMMU_WITH_IOTLBE_CACHE + iommuAmdIotlbRemoveAll(pDevIns); +#endif +#ifdef IOMMU_WITH_IRTE_CACHE + iommuAmdIrteCacheRemoveAll(pDevIns); +#endif +} + + +/** + * @interface_method_impl{PDMDEVREG,pfnDestruct} + */ +static DECLCALLBACK(int) iommuAmdR3Destruct(PPDMDEVINS pDevIns) +{ + PDMDEV_CHECK_VERSIONS_RETURN_QUIET(pDevIns); + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3); + LogFlowFunc(("\n")); + + IOMMU_LOCK(pDevIns, pThisR3); + + if (pThis->hEvtCmdThread != NIL_SUPSEMEVENT) + { + PDMDevHlpSUPSemEventClose(pDevIns, pThis->hEvtCmdThread); + pThis->hEvtCmdThread = NIL_SUPSEMEVENT; + } + +#ifdef IOMMU_WITH_IOTLBE_CACHE + if (pThisR3->paIotlbes) + { + PDMDevHlpMMHeapFree(pDevIns, pThisR3->paIotlbes); + pThisR3->paIotlbes = NULL; + pThisR3->idxUnusedIotlbe = 0; + } +#endif + + IOMMU_UNLOCK(pDevIns, pThisR3); + return VINF_SUCCESS; +} + + +/** + * @interface_method_impl{PDMDEVREG,pfnConstruct} + */ +static DECLCALLBACK(int) iommuAmdR3Construct(PPDMDEVINS pDevIns, int iInstance, PCFGMNODE pCfg) +{ + PDMDEV_CHECK_VERSIONS_RETURN(pDevIns); + + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3); + PCPDMDEVHLPR3 pHlp = pDevIns->pHlpR3; + + pThis->u32Magic = IOMMU_MAGIC; + pThisR3->pDevInsR3 = pDevIns; + + LogFlowFunc(("iInstance=%d\n", iInstance)); + + /* + * Validate and read the configuration. + */ + PDMDEV_VALIDATE_CONFIG_RETURN(pDevIns, "PCIAddress", ""); + int rc = pHlp->pfnCFGMQueryU32Def(pCfg, "PCIAddress", &pThis->uPciAddress, NIL_PCIBDF); + if (RT_FAILURE(rc)) + return PDMDEV_SET_ERROR(pDevIns, rc, N_("Configuration error: Failed to query 32-bit integer \"PCIAddress\"")); + if (!PCIBDF_IS_VALID(pThis->uPciAddress)) + return PDMDEV_SET_ERROR(pDevIns, rc, N_("Configuration error: Failed \"PCIAddress\" of the AMD IOMMU cannot be invalid")); + + /* + * Register the IOMMU with PDM. + */ + PDMIOMMUREGR3 IommuReg; + RT_ZERO(IommuReg); + IommuReg.u32Version = PDM_IOMMUREGCC_VERSION; + IommuReg.pfnMemAccess = iommuAmdMemAccess; + IommuReg.pfnMemBulkAccess = iommuAmdMemBulkAccess; + IommuReg.pfnMsiRemap = iommuAmdMsiRemap; + IommuReg.u32TheEnd = PDM_IOMMUREGCC_VERSION; + rc = PDMDevHlpIommuRegister(pDevIns, &IommuReg, &pThisR3->CTX_SUFF(pIommuHlp), &pThis->idxIommu); + if (RT_FAILURE(rc)) + return PDMDEV_SET_ERROR(pDevIns, rc, N_("Failed to register ourselves as an IOMMU device")); + if (pThisR3->CTX_SUFF(pIommuHlp)->u32Version != PDM_IOMMUHLPR3_VERSION) + return PDMDevHlpVMSetError(pDevIns, VERR_VERSION_MISMATCH, RT_SRC_POS, + N_("IOMMU helper version mismatch; got %#x expected %#x"), + pThisR3->CTX_SUFF(pIommuHlp)->u32Version, PDM_IOMMUHLPR3_VERSION); + if (pThisR3->CTX_SUFF(pIommuHlp)->u32TheEnd != PDM_IOMMUHLPR3_VERSION) + return PDMDevHlpVMSetError(pDevIns, VERR_VERSION_MISMATCH, RT_SRC_POS, + N_("IOMMU helper end-version mismatch; got %#x expected %#x"), + pThisR3->CTX_SUFF(pIommuHlp)->u32TheEnd, PDM_IOMMUHLPR3_VERSION); + AssertPtr(pThisR3->pIommuHlpR3->pfnLock); + AssertPtr(pThisR3->pIommuHlpR3->pfnUnlock); + AssertPtr(pThisR3->pIommuHlpR3->pfnLockIsOwner); + AssertPtr(pThisR3->pIommuHlpR3->pfnSendMsi); + + /* + * We will use PDM's critical section (via helpers) for the IOMMU device. + */ + rc = PDMDevHlpSetDeviceCritSect(pDevIns, PDMDevHlpCritSectGetNop(pDevIns)); + AssertRCReturn(rc, rc); + + /* + * Initialize read-only PCI configuration space. + */ + PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0]; + PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev); + + /* Header. */ + PDMPciDevSetVendorId(pPciDev, IOMMU_PCI_VENDOR_ID); /* AMD */ + PDMPciDevSetDeviceId(pPciDev, IOMMU_PCI_DEVICE_ID); /* VirtualBox IOMMU device */ + PDMPciDevSetCommand(pPciDev, VBOX_PCI_COMMAND_MASTER); /* Enable bus master (as we directly access main memory) */ + PDMPciDevSetStatus(pPciDev, VBOX_PCI_STATUS_CAP_LIST); /* Capability list supported */ + PDMPciDevSetRevisionId(pPciDev, IOMMU_PCI_REVISION_ID); /* VirtualBox specific device implementation revision */ + PDMPciDevSetClassBase(pPciDev, VBOX_PCI_CLASS_SYSTEM); /* System Base Peripheral */ + PDMPciDevSetClassSub(pPciDev, VBOX_PCI_SUB_SYSTEM_IOMMU); /* IOMMU */ + PDMPciDevSetClassProg(pPciDev, 0x0); /* IOMMU Programming interface */ + PDMPciDevSetHeaderType(pPciDev, 0x0); /* Single function, type 0 */ + PDMPciDevSetSubSystemId(pPciDev, IOMMU_PCI_DEVICE_ID); /* AMD */ + PDMPciDevSetSubSystemVendorId(pPciDev, IOMMU_PCI_VENDOR_ID); /* VirtualBox IOMMU device */ + PDMPciDevSetCapabilityList(pPciDev, IOMMU_PCI_OFF_CAP_HDR); /* Offset into capability registers */ + PDMPciDevSetInterruptPin(pPciDev, 0x1); /* INTA#. */ + PDMPciDevSetInterruptLine(pPciDev, 0x0); /* For software compatibility; no effect on hardware */ + + /* Capability Header. */ + /* NOTE! Fields (e.g, EFR) must match what we expose in the ACPI tables. */ + PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_CAP_HDR, + RT_BF_MAKE(IOMMU_BF_CAPHDR_CAP_ID, 0xf) /* RO - Secure Device capability block */ + | RT_BF_MAKE(IOMMU_BF_CAPHDR_CAP_PTR, IOMMU_PCI_OFF_MSI_CAP_HDR) /* RO - Next capability offset */ + | RT_BF_MAKE(IOMMU_BF_CAPHDR_CAP_TYPE, 0x3) /* RO - IOMMU capability block */ + | RT_BF_MAKE(IOMMU_BF_CAPHDR_CAP_REV, 0x1) /* RO - IOMMU interface revision */ + | RT_BF_MAKE(IOMMU_BF_CAPHDR_IOTLB_SUP, 0x0) /* RO - Remote IOTLB support */ + | RT_BF_MAKE(IOMMU_BF_CAPHDR_HT_TUNNEL, 0x0) /* RO - HyperTransport Tunnel support */ + | RT_BF_MAKE(IOMMU_BF_CAPHDR_NP_CACHE, 0x0) /* RO - Cache NP page table entries */ + | RT_BF_MAKE(IOMMU_BF_CAPHDR_EFR_SUP, 0x1) /* RO - Extended Feature Register support */ + | RT_BF_MAKE(IOMMU_BF_CAPHDR_CAP_EXT, 0x1)); /* RO - Misc. Information Register support */ + + /* Base Address Register. */ + PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_BASE_ADDR_REG_LO, 0x0); /* RW - Base address (Lo) and enable bit */ + PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_BASE_ADDR_REG_HI, 0x0); /* RW - Base address (Hi) */ + + /* IOMMU Range Register. */ + PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_RANGE_REG, 0x0); /* RW - Range register (implemented as RO by us) */ + + /* Misc. Information Register. */ + /* NOTE! Fields (e.g, GVA size) must match what we expose in the ACPI tables. */ + uint32_t const uMiscInfoReg0 = RT_BF_MAKE(IOMMU_BF_MISCINFO_0_MSI_NUM, 0) /* RO - MSI number */ + | RT_BF_MAKE(IOMMU_BF_MISCINFO_0_GVA_SIZE, 2) /* RO - Guest Virt. Addr size (2=48 bits) */ + | RT_BF_MAKE(IOMMU_BF_MISCINFO_0_PA_SIZE, 48) /* RO - Physical Addr size (48 bits) */ + | RT_BF_MAKE(IOMMU_BF_MISCINFO_0_VA_SIZE, 64) /* RO - Virt. Addr size (64 bits) */ + | RT_BF_MAKE(IOMMU_BF_MISCINFO_0_HT_ATS_RESV, 0) /* RW - HT ATS reserved */ + | RT_BF_MAKE(IOMMU_BF_MISCINFO_0_MSI_NUM_PPR, 0); /* RW - PPR interrupt number */ + uint32_t const uMiscInfoReg1 = 0; + PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MISCINFO_REG_0, uMiscInfoReg0); + PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MISCINFO_REG_1, uMiscInfoReg1); + + /* MSI Capability Header register. */ + PDMMSIREG MsiReg; + RT_ZERO(MsiReg); + MsiReg.cMsiVectors = 1; + MsiReg.iMsiCapOffset = IOMMU_PCI_OFF_MSI_CAP_HDR; + MsiReg.iMsiNextOffset = 0; /* IOMMU_PCI_OFF_MSI_MAP_CAP_HDR */ + MsiReg.fMsi64bit = 1; /* 64-bit addressing support is mandatory; See AMD IOMMU spec. 2.8 "IOMMU Interrupt Support". */ + + /* MSI Address (Lo, Hi) and MSI data are read-write PCI config registers handled by our generic PCI config space code. */ +#if 0 + /* MSI Address Lo. */ + PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_LO, 0); /* RW - MSI message address (Lo) */ + /* MSI Address Hi. */ + PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_HI, 0); /* RW - MSI message address (Hi) */ + /* MSI Data. */ + PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_DATA, 0); /* RW - MSI data */ +#endif + +#if 0 + /** @todo IOMMU: I don't know if we need to support this, enable later if + * required. */ + /* MSI Mapping Capability Header register. */ + PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_MAP_CAP_HDR, + RT_BF_MAKE(IOMMU_BF_MSI_MAP_CAPHDR_CAP_ID, 0x8) /* RO - Capability ID */ + | RT_BF_MAKE(IOMMU_BF_MSI_MAP_CAPHDR_CAP_PTR, 0x0) /* RO - Offset to next capability (NULL) */ + | RT_BF_MAKE(IOMMU_BF_MSI_MAP_CAPHDR_EN, 0x1) /* RO - MSI mapping capability enable */ + | RT_BF_MAKE(IOMMU_BF_MSI_MAP_CAPHDR_FIXED, 0x1) /* RO - MSI mapping range is fixed */ + | RT_BF_MAKE(IOMMU_BF_MSI_MAP_CAPHDR_CAP_TYPE, 0x15)); /* RO - MSI mapping capability */ + /* When implementing don't forget to copy this to its MMIO shadow register (MsiMapCapHdr) in iommuAmdR3Init. */ +#endif + + /* + * Register the PCI function with PDM. + */ + rc = PDMDevHlpPCIRegister(pDevIns, pPciDev); + AssertLogRelRCReturn(rc, rc); + + /* + * Register MSI support for the PCI device. + * This must be done -after- registering it as a PCI device! + */ + rc = PDMDevHlpPCIRegisterMsi(pDevIns, &MsiReg); + AssertRCReturn(rc, rc); + + /* + * Intercept PCI config. space accesses. + */ + rc = PDMDevHlpPCIInterceptConfigAccesses(pDevIns, pPciDev, iommuAmdR3PciConfigRead, iommuAmdR3PciConfigWrite); + AssertLogRelRCReturn(rc, rc); + + /* + * Create the MMIO region. + * Mapping of the region is done when software configures it via PCI config space. + */ + rc = PDMDevHlpMmioCreate(pDevIns, IOMMU_MMIO_REGION_SIZE, pPciDev, 0 /* iPciRegion */, iommuAmdMmioWrite, iommuAmdMmioRead, + NULL /* pvUser */, + IOMMMIO_FLAGS_READ_DWORD_QWORD + | IOMMMIO_FLAGS_WRITE_DWORD_QWORD_READ_MISSING + | IOMMMIO_FLAGS_DBGSTOP_ON_COMPLICATED_READ + | IOMMMIO_FLAGS_DBGSTOP_ON_COMPLICATED_WRITE, + "AMD-IOMMU", &pThis->hMmio); + AssertLogRelRCReturn(rc, rc); + + /* + * Register saved state handlers. + */ + rc = PDMDevHlpSSMRegisterEx(pDevIns, IOMMU_SAVED_STATE_VERSION, sizeof(IOMMU), NULL /* pszBefore */, + NULL /* pfnLivePrep */, NULL /* pfnLiveExec */, NULL /* pfnLiveVote */, + NULL /* pfnSavePrep */, iommuAmdR3SaveExec, NULL /* pfnSaveDone */, + NULL /* pfnLoadPrep */, iommuAmdR3LoadExec, iommuAmdR3LoadDone); + AssertLogRelRCReturn(rc, rc); + + /* + * Register debugger info items. + */ + PDMDevHlpDBGFInfoRegister(pDevIns, "iommu", "Display IOMMU state.", iommuAmdR3DbgInfo); + PDMDevHlpDBGFInfoRegister(pDevIns, "iommudte", "Display the DTE for a device (from memory). Arguments: DeviceID.", iommuAmdR3DbgInfoDte); + PDMDevHlpDBGFInfoRegister(pDevIns, "iommudevtabs", "Display I/O device tables with translation enabled.", iommuAmdR3DbgInfoDevTabs); +#ifdef IOMMU_WITH_IOTLBE_CACHE + PDMDevHlpDBGFInfoRegister(pDevIns, "iommutlb", "Display IOTLBs for a domain. Arguments: DomainID.", iommuAmdR3DbgInfoIotlb); +#endif +#ifdef IOMMU_WITH_DTE_CACHE + PDMDevHlpDBGFInfoRegister(pDevIns, "iommudtecache", "Display the DTE cache.", iommuAmdR3DbgInfoDteCache); +#endif +#ifdef IOMMU_WITH_IRTE_CACHE + PDMDevHlpDBGFInfoRegister(pDevIns, "iommuirtecache", "Display the IRTE cache.", iommuAmdR3DbgInfoIrteCache); +#endif + +# ifdef VBOX_WITH_STATISTICS + /* + * Statistics. + */ + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMmioReadR3, STAMTYPE_COUNTER, "R3/MmioRead", STAMUNIT_OCCURENCES, "Number of MMIO reads in R3"); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMmioReadRZ, STAMTYPE_COUNTER, "RZ/MmioRead", STAMUNIT_OCCURENCES, "Number of MMIO reads in RZ."); + + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMmioWriteR3, STAMTYPE_COUNTER, "R3/MmioWrite", STAMUNIT_OCCURENCES, "Number of MMIO writes in R3."); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMmioWriteRZ, STAMTYPE_COUNTER, "RZ/MmioWrite", STAMUNIT_OCCURENCES, "Number of MMIO writes in RZ."); + + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMsiRemapR3, STAMTYPE_COUNTER, "R3/MsiRemap", STAMUNIT_OCCURENCES, "Number of interrupt remap requests in R3."); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMsiRemapRZ, STAMTYPE_COUNTER, "RZ/MsiRemap", STAMUNIT_OCCURENCES, "Number of interrupt remap requests in RZ."); + + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemReadR3, STAMTYPE_COUNTER, "R3/MemRead", STAMUNIT_OCCURENCES, "Number of memory read translation requests in R3."); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemReadRZ, STAMTYPE_COUNTER, "RZ/MemRead", STAMUNIT_OCCURENCES, "Number of memory read translation requests in RZ."); + + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemWriteR3, STAMTYPE_COUNTER, "R3/MemWrite", STAMUNIT_OCCURENCES, "Number of memory write translation requests in R3."); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemWriteRZ, STAMTYPE_COUNTER, "RZ/MemWrite", STAMUNIT_OCCURENCES, "Number of memory write translation requests in RZ."); + + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemBulkReadR3, STAMTYPE_COUNTER, "R3/MemBulkRead", STAMUNIT_OCCURENCES, "Number of memory bulk read translation requests in R3."); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemBulkReadRZ, STAMTYPE_COUNTER, "RZ/MemBulkRead", STAMUNIT_OCCURENCES, "Number of memory bulk read translation requests in RZ."); + + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemBulkWriteR3, STAMTYPE_COUNTER, "R3/MemBulkWrite", STAMUNIT_OCCURENCES, "Number of memory bulk write translation requests in R3."); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemBulkWriteRZ, STAMTYPE_COUNTER, "RZ/MemBulkWrite", STAMUNIT_OCCURENCES, "Number of memory bulk write translation requests in RZ."); + + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmd, STAMTYPE_COUNTER, "R3/Commands", STAMUNIT_OCCURENCES, "Number of commands processed (total)."); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmdCompWait, STAMTYPE_COUNTER, "R3/Commands/CompWait", STAMUNIT_OCCURENCES, "Number of Completion Wait commands processed."); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmdInvDte, STAMTYPE_COUNTER, "R3/Commands/InvDte", STAMUNIT_OCCURENCES, "Number of Invalidate DTE commands processed."); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmdInvIommuPages, STAMTYPE_COUNTER, "R3/Commands/InvIommuPages", STAMUNIT_OCCURENCES, "Number of Invalidate IOMMU Pages commands processed."); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmdInvIotlbPages, STAMTYPE_COUNTER, "R3/Commands/InvIotlbPages", STAMUNIT_OCCURENCES, "Number of Invalidate IOTLB Pages commands processed."); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmdInvIntrTable, STAMTYPE_COUNTER, "R3/Commands/InvIntrTable", STAMUNIT_OCCURENCES, "Number of Invalidate Interrupt Table commands processed."); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmdPrefIommuPages, STAMTYPE_COUNTER, "R3/Commands/PrefIommuPages", STAMUNIT_OCCURENCES, "Number of Prefetch IOMMU Pages commands processed."); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmdCompletePprReq, STAMTYPE_COUNTER, "R3/Commands/CompletePprReq", STAMUNIT_OCCURENCES, "Number of Complete PPR Requests commands processed."); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmdInvIommuAll, STAMTYPE_COUNTER, "R3/Commands/InvIommuAll", STAMUNIT_OCCURENCES, "Number of Invalidate IOMMU All commands processed."); + + + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatIotlbeCached, STAMTYPE_COUNTER, "IOTLB/Cached", STAMUNIT_OCCURENCES, "Number of IOTLB entries in the cache."); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatIotlbeLazyEvictReuse, STAMTYPE_COUNTER, "IOTLB/LazyEvictReuse", STAMUNIT_OCCURENCES, "Number of IOTLB entries reused after lazy eviction."); + + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatProfDteLookup, STAMTYPE_PROFILE, "Profile/DteLookup", STAMUNIT_TICKS_PER_CALL, "Profiling DTE lookup."); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatProfIotlbeLookup, STAMTYPE_PROFILE, "Profile/IotlbeLookup", STAMUNIT_TICKS_PER_CALL, "Profiling IOTLBE lookup."); + + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatProfIrteLookup, STAMTYPE_PROFILE, "Profile/IrteLookup", STAMUNIT_TICKS_PER_CALL, "Profiling IRTE lookup."); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatProfIrteCacheLookup, STAMTYPE_PROFILE, "Profile/IrteCacheLookup", STAMUNIT_TICKS_PER_CALL, "Profiling IRTE cache lookup."); + + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessCacheHit, STAMTYPE_COUNTER, "MemAccess/CacheHit", STAMUNIT_OCCURENCES, "Number of cache hits."); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessCacheMiss, STAMTYPE_COUNTER, "MemAccess/CacheMiss", STAMUNIT_OCCURENCES, "Number of cache misses."); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessCacheHitFull, STAMTYPE_COUNTER, "MemAccess/CacheHitFull", STAMUNIT_OCCURENCES, "Number of accesses that was entirely in the cache."); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessCacheNonContig, STAMTYPE_COUNTER, "MemAccess/CacheNonContig", STAMUNIT_OCCURENCES, "Number of cache accesses that resulted in non-contiguous translated regions."); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessCachePermDenied, STAMTYPE_COUNTER, "MemAccess/CacheAddrDenied", STAMUNIT_OCCURENCES, "Number of cache accesses that resulted in denied permissions."); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessDteNonContig, STAMTYPE_COUNTER, "MemAccess/DteNonContig", STAMUNIT_OCCURENCES, "Number of DTE accesses that resulted in non-contiguous translated regions."); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessDtePermDenied, STAMTYPE_COUNTER, "MemAccess/DtePermDenied", STAMUNIT_OCCURENCES, "Number of DTE accesses that resulted in denied permissions."); + + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatIntrCacheHit, STAMTYPE_COUNTER, "Interrupt/CacheHit", STAMUNIT_OCCURENCES, "Number of cache hits."); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatIntrCacheMiss, STAMTYPE_COUNTER, "Interrupt/CacheMiss", STAMUNIT_OCCURENCES, "Number of cache misses."); + + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatNonStdPageSize, STAMTYPE_COUNTER, "MemAccess/NonStdPageSize", STAMUNIT_OCCURENCES, "Number of non-standard page size translations."); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatIopfs, STAMTYPE_COUNTER, "MemAccess/IOPFs", STAMUNIT_OCCURENCES, "Number of I/O page faults."); +# endif + + /* + * Create the command thread and its event semaphore. + */ + char szDevIommu[64]; + RT_ZERO(szDevIommu); + RTStrPrintf(szDevIommu, sizeof(szDevIommu), "IOMMU-%u", iInstance); + rc = PDMDevHlpThreadCreate(pDevIns, &pThisR3->pCmdThread, pThis, iommuAmdR3CmdThread, iommuAmdR3CmdThreadWakeUp, + 0 /* cbStack */, RTTHREADTYPE_IO, szDevIommu); + AssertLogRelRCReturn(rc, rc); + + rc = PDMDevHlpSUPSemEventCreate(pDevIns, &pThis->hEvtCmdThread); + AssertLogRelRCReturn(rc, rc); + +#ifdef IOMMU_WITH_DTE_CACHE + /* + * Initialize the critsect of the cache. + */ + rc = PDMDevHlpCritSectInit(pDevIns, &pThis->CritSectCache, RT_SRC_POS, "IOMMUCache-#%u", pDevIns->iInstance); + AssertLogRelRCReturn(rc, rc); + + /* Several places in this code relies on this basic assumption - assert it! */ + AssertCompile(RT_ELEMENTS(pThis->aDeviceIds) == RT_ELEMENTS(pThis->aDteCache)); +#endif + +#ifdef IOMMU_WITH_IOTLBE_CACHE + /* + * Allocate IOTLB entries. + * This is allocated upfront since we expect a relatively small number of entries, + * is more cache-line efficient and easier to track least recently used entries for + * eviction when the cache is full. This also avoids unpredictable behavior during + * the lifetime of the VM if the hyperheap gets full. + */ + size_t const cbIotlbes = sizeof(IOTLBE) * IOMMU_IOTLBE_MAX; + pThisR3->paIotlbes = (PIOTLBE)PDMDevHlpMMHeapAllocZ(pDevIns, cbIotlbes); + if (!pThisR3->paIotlbes) + return PDMDevHlpVMSetError(pDevIns, VERR_NO_MEMORY, RT_SRC_POS, + N_("Failed to allocate %zu bytes from the hyperheap for the IOTLB cache."), cbIotlbes); + RTListInit(&pThisR3->LstLruIotlbe); + LogRel(("%s: Allocated %zu bytes from the hyperheap for the IOTLB cache\n", IOMMU_LOG_PFX, cbIotlbes)); +#endif + + /* + * Initialize read-only registers. + * NOTE! Fields here must match their corresponding field in the ACPI tables. + */ + /* Don't remove the commented lines below as it lets us see all features at a glance. */ + pThis->ExtFeat.u64 = 0; + //pThis->ExtFeat.n.u1PrefetchSup = 0; + //pThis->ExtFeat.n.u1PprSup = 0; + //pThis->ExtFeat.n.u1X2ApicSup = 0; + //pThis->ExtFeat.n.u1NoExecuteSup = 0; + //pThis->ExtFeat.n.u1GstTranslateSup = 0; + pThis->ExtFeat.n.u1InvAllSup = 1; + //pThis->ExtFeat.n.u1GstVirtApicSup = 0; + pThis->ExtFeat.n.u1HwErrorSup = 1; + //pThis->ExtFeat.n.u1PerfCounterSup = 0; + AssertCompile((IOMMU_MAX_HOST_PT_LEVEL & 0x3) < 3); + pThis->ExtFeat.n.u2HostAddrTranslateSize = (IOMMU_MAX_HOST_PT_LEVEL & 0x3); + //pThis->ExtFeat.n.u2GstAddrTranslateSize = 0; /* Requires GstTranslateSup */ + //pThis->ExtFeat.n.u2GstCr3RootTblLevel = 0; /* Requires GstTranslateSup */ + //pThis->ExtFeat.n.u2SmiFilterSup = 0; + //pThis->ExtFeat.n.u3SmiFilterCount = 0; + //pThis->ExtFeat.n.u3GstVirtApicModeSup = 0; /* Requires GstVirtApicSup */ + //pThis->ExtFeat.n.u2DualPprLogSup = 0; + //pThis->ExtFeat.n.u2DualEvtLogSup = 0; + //pThis->ExtFeat.n.u5MaxPasidSup = 0; /* Requires GstTranslateSup */ + //pThis->ExtFeat.n.u1UserSupervisorSup = 0; + AssertCompile(IOMMU_MAX_DEV_TAB_SEGMENTS <= 3); + pThis->ExtFeat.n.u2DevTabSegSup = IOMMU_MAX_DEV_TAB_SEGMENTS; + //pThis->ExtFeat.n.u1PprLogOverflowWarn = 0; + //pThis->ExtFeat.n.u1PprAutoRespSup = 0; + //pThis->ExtFeat.n.u2MarcSup = 0; + //pThis->ExtFeat.n.u1BlockStopMarkSup = 0; + //pThis->ExtFeat.n.u1PerfOptSup = 0; + pThis->ExtFeat.n.u1MsiCapMmioSup = 1; + //pThis->ExtFeat.n.u1GstIoSup = 0; + //pThis->ExtFeat.n.u1HostAccessSup = 0; + //pThis->ExtFeat.n.u1EnhancedPprSup = 0; + //pThis->ExtFeat.n.u1AttrForwardSup = 0; + //pThis->ExtFeat.n.u1HostDirtySup = 0; + //pThis->ExtFeat.n.u1InvIoTlbTypeSup = 0; + //pThis->ExtFeat.n.u1GstUpdateDisSup = 0; + //pThis->ExtFeat.n.u1ForcePhysDstSup = 0; + + pThis->DevSpecificFeat.u64 = 0; + pThis->DevSpecificFeat.n.u4RevMajor = IOMMU_DEVSPEC_FEAT_MAJOR_VERSION; + pThis->DevSpecificFeat.n.u4RevMinor = IOMMU_DEVSPEC_FEAT_MINOR_VERSION; + + pThis->DevSpecificCtrl.u64 = 0; + pThis->DevSpecificCtrl.n.u4RevMajor = IOMMU_DEVSPEC_CTRL_MAJOR_VERSION; + pThis->DevSpecificCtrl.n.u4RevMinor = IOMMU_DEVSPEC_CTRL_MINOR_VERSION; + + pThis->DevSpecificStatus.u64 = 0; + pThis->DevSpecificStatus.n.u4RevMajor = IOMMU_DEVSPEC_STATUS_MAJOR_VERSION; + pThis->DevSpecificStatus.n.u4RevMinor = IOMMU_DEVSPEC_STATUS_MINOR_VERSION; + + pThis->MiscInfo.u64 = RT_MAKE_U64(uMiscInfoReg0, uMiscInfoReg1); + + pThis->RsvdReg = 0; + + /* + * Initialize parts of the IOMMU state as it would during reset. + * Also initializes non-zero initial values like IRTE cache keys. + * Must be called -after- initializing PCI config. space registers. + */ + iommuAmdR3Reset(pDevIns); + + LogRel(("%s: DSFX=%u.%u DSCX=%u.%u DSSX=%u.%u ExtFeat=%#RX64\n", IOMMU_LOG_PFX, + pThis->DevSpecificFeat.n.u4RevMajor, pThis->DevSpecificFeat.n.u4RevMinor, + pThis->DevSpecificCtrl.n.u4RevMajor, pThis->DevSpecificCtrl.n.u4RevMinor, + pThis->DevSpecificStatus.n.u4RevMajor, pThis->DevSpecificStatus.n.u4RevMinor, + pThis->ExtFeat.u64)); + return VINF_SUCCESS; +} + +#else + +/** + * @callback_method_impl{PDMDEVREGR0,pfnConstruct} + */ +static DECLCALLBACK(int) iommuAmdRZConstruct(PPDMDEVINS pDevIns) +{ + PDMDEV_CHECK_VERSIONS_RETURN(pDevIns); + PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); + PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC); + pThisCC->CTX_SUFF(pDevIns) = pDevIns; + + /* We will use PDM's critical section (via helpers) for the IOMMU device. */ + int rc = PDMDevHlpSetDeviceCritSect(pDevIns, PDMDevHlpCritSectGetNop(pDevIns)); + AssertRCReturn(rc, rc); + + /* Set up the MMIO RZ handlers. */ + rc = PDMDevHlpMmioSetUpContext(pDevIns, pThis->hMmio, iommuAmdMmioWrite, iommuAmdMmioRead, NULL /* pvUser */); + AssertRCReturn(rc, rc); + + /* Set up the IOMMU RZ callbacks. */ + PDMIOMMUREGCC IommuReg; + RT_ZERO(IommuReg); + IommuReg.u32Version = PDM_IOMMUREGCC_VERSION; + IommuReg.idxIommu = pThis->idxIommu; + IommuReg.pfnMemAccess = iommuAmdMemAccess; + IommuReg.pfnMemBulkAccess = iommuAmdMemBulkAccess; + IommuReg.pfnMsiRemap = iommuAmdMsiRemap; + IommuReg.u32TheEnd = PDM_IOMMUREGCC_VERSION; + rc = PDMDevHlpIommuSetUpContext(pDevIns, &IommuReg, &pThisCC->CTX_SUFF(pIommuHlp)); + AssertRCReturn(rc, rc); + AssertPtrReturn(pThisCC->CTX_SUFF(pIommuHlp), VERR_IOMMU_IPE_1); + AssertReturn(pThisCC->CTX_SUFF(pIommuHlp)->u32Version == CTX_MID(PDM_IOMMUHLP,_VERSION), VERR_VERSION_MISMATCH); + AssertReturn(pThisCC->CTX_SUFF(pIommuHlp)->u32TheEnd == CTX_MID(PDM_IOMMUHLP,_VERSION), VERR_VERSION_MISMATCH); + AssertPtr(pThisCC->CTX_SUFF(pIommuHlp)->pfnLock); + AssertPtr(pThisCC->CTX_SUFF(pIommuHlp)->pfnUnlock); + AssertPtr(pThisCC->CTX_SUFF(pIommuHlp)->pfnLockIsOwner); + AssertPtr(pThisCC->CTX_SUFF(pIommuHlp)->pfnSendMsi); + return VINF_SUCCESS; +} +#endif + + +/** + * The device registration structure. + */ +const PDMDEVREG g_DeviceIommuAmd = +{ + /* .u32Version = */ PDM_DEVREG_VERSION, + /* .uReserved0 = */ 0, + /* .szName = */ "iommu-amd", + /* .fFlags = */ PDM_DEVREG_FLAGS_DEFAULT_BITS | PDM_DEVREG_FLAGS_RZ | PDM_DEVREG_FLAGS_NEW_STYLE, + /* .fClass = */ PDM_DEVREG_CLASS_PCI_BUILTIN, + /* .cMaxInstances = */ 1, + /* .uSharedVersion = */ 42, + /* .cbInstanceShared = */ sizeof(IOMMU), + /* .cbInstanceCC = */ sizeof(IOMMUCC), + /* .cbInstanceRC = */ sizeof(IOMMURC), + /* .cMaxPciDevices = */ 1, + /* .cMaxMsixVectors = */ 0, + /* .pszDescription = */ "IOMMU (AMD)", +#if defined(IN_RING3) + /* .pszRCMod = */ "VBoxDDRC.rc", + /* .pszR0Mod = */ "VBoxDDR0.r0", + /* .pfnConstruct = */ iommuAmdR3Construct, + /* .pfnDestruct = */ iommuAmdR3Destruct, + /* .pfnRelocate = */ NULL, + /* .pfnMemSetup = */ NULL, + /* .pfnPowerOn = */ NULL, + /* .pfnReset = */ iommuAmdR3Reset, + /* .pfnSuspend = */ NULL, + /* .pfnResume = */ NULL, + /* .pfnAttach = */ NULL, + /* .pfnDetach = */ NULL, + /* .pfnQueryInterface = */ NULL, + /* .pfnInitComplete = */ NULL, + /* .pfnPowerOff = */ NULL, + /* .pfnSoftReset = */ NULL, + /* .pfnReserved0 = */ NULL, + /* .pfnReserved1 = */ NULL, + /* .pfnReserved2 = */ NULL, + /* .pfnReserved3 = */ NULL, + /* .pfnReserved4 = */ NULL, + /* .pfnReserved5 = */ NULL, + /* .pfnReserved6 = */ NULL, + /* .pfnReserved7 = */ NULL, +#elif defined(IN_RING0) + /* .pfnEarlyConstruct = */ NULL, + /* .pfnConstruct = */ iommuAmdRZConstruct, + /* .pfnDestruct = */ NULL, + /* .pfnFinalDestruct = */ NULL, + /* .pfnRequest = */ NULL, + /* .pfnReserved0 = */ NULL, + /* .pfnReserved1 = */ NULL, + /* .pfnReserved2 = */ NULL, + /* .pfnReserved3 = */ NULL, + /* .pfnReserved4 = */ NULL, + /* .pfnReserved5 = */ NULL, + /* .pfnReserved6 = */ NULL, + /* .pfnReserved7 = */ NULL, +#elif defined(IN_RC) + /* .pfnConstruct = */ iommuAmdRZConstruct, + /* .pfnReserved0 = */ NULL, + /* .pfnReserved1 = */ NULL, + /* .pfnReserved2 = */ NULL, + /* .pfnReserved3 = */ NULL, + /* .pfnReserved4 = */ NULL, + /* .pfnReserved5 = */ NULL, + /* .pfnReserved6 = */ NULL, + /* .pfnReserved7 = */ NULL, +#else +# error "Not in IN_RING3, IN_RING0 or IN_RC!" +#endif + /* .u32VersionEnd = */ PDM_DEVREG_VERSION +}; + +#endif /* !VBOX_DEVICE_STRUCT_TESTCASE */ + diff --git a/src/VBox/Devices/Bus/DevIommuAmd.h b/src/VBox/Devices/Bus/DevIommuAmd.h new file mode 100644 index 00000000..81d4e080 --- /dev/null +++ b/src/VBox/Devices/Bus/DevIommuAmd.h @@ -0,0 +1,62 @@ +/* $Id: DevIommuAmd.h $ */ +/** @file + * DevIommuAmd - I/O Memory Management Unit (AMD), header shared with the IOMMU, ACPI, chipset/firmware code. + */ + +/* + * Copyright (C) 2020-2022 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifndef VBOX_INCLUDED_SRC_Bus_DevIommuAmd_h +#define VBOX_INCLUDED_SRC_Bus_DevIommuAmd_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +/** AMD's vendor ID. */ +#define IOMMU_PCI_VENDOR_ID 0x1022 +/** VirtualBox IOMMU device ID. */ +#define IOMMU_PCI_DEVICE_ID 0xc0de +/** VirtualBox IOMMU device revision ID. */ +#define IOMMU_PCI_REVISION_ID 0x01 +/** The MMIO base address of the IOMMU (taken from real hardware). */ +#define IOMMU_MMIO_BASE_ADDR 0xfeb80000 +/** Size of the MMIO region in bytes. */ +#define IOMMU_MMIO_REGION_SIZE _16K +/** Number of device table segments supported (power of 2). */ +#define IOMMU_MAX_DEV_TAB_SEGMENTS 3 +/** Maximum host address translation level supported (inclusive). NOTE! If you + * change this make sure to change the value in ACPI tables (DevACPI.cpp) */ +#define IOMMU_MAX_HOST_PT_LEVEL 6 +/** The device-specific feature major revision. */ +#define IOMMU_DEVSPEC_FEAT_MAJOR_VERSION 0x1 +/** The device-specific feature minor revision. */ +#define IOMMU_DEVSPEC_FEAT_MINOR_VERSION 0x0 +/** The device-specific control major revision. */ +#define IOMMU_DEVSPEC_CTRL_MAJOR_VERSION 0x1 +/** The device-specific control minor revision. */ +#define IOMMU_DEVSPEC_CTRL_MINOR_VERSION 0x0 +/** The device-specific status major revision. */ +#define IOMMU_DEVSPEC_STATUS_MAJOR_VERSION 0x1 +/** The device-specific status minor revision. */ +#define IOMMU_DEVSPEC_STATUS_MINOR_VERSION 0x0 + +#endif /* !VBOX_INCLUDED_SRC_Bus_DevIommuAmd_h */ diff --git a/src/VBox/Devices/Bus/DevIommuIntel.cpp b/src/VBox/Devices/Bus/DevIommuIntel.cpp new file mode 100644 index 00000000..cc338f3c --- /dev/null +++ b/src/VBox/Devices/Bus/DevIommuIntel.cpp @@ -0,0 +1,4459 @@ +/* $Id: DevIommuIntel.cpp $ */ +/** @file + * IOMMU - Input/Output Memory Management Unit - Intel implementation. + */ + +/* + * Copyright (C) 2021-2022 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_DEV_IOMMU +#include "VBoxDD.h" +#include "DevIommuIntel.h" + +#include <VBox/iommu-intel.h> +#include <iprt/mem.h> +#include <iprt/string.h> + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ +/** Gets the low uint32_t of a uint64_t or something equivalent. + * + * This is suitable for casting constants outside code (since RT_LO_U32 can't be + * used as it asserts for correctness when compiling on certain compilers). */ +#define DMAR_LO_U32(a) (uint32_t)(UINT32_MAX & (a)) + +/** Gets the high uint32_t of a uint64_t or something equivalent. + * + * This is suitable for casting constants outside code (since RT_HI_U32 can't be + * used as it asserts for correctness when compiling on certain compilers). */ +#define DMAR_HI_U32(a) (uint32_t)((a) >> 32) + +/** Asserts MMIO access' offset and size are valid or returns appropriate error + * code suitable for returning from MMIO access handlers. */ +#define DMAR_ASSERT_MMIO_ACCESS_RET(a_off, a_cb) \ + do { \ + AssertReturn((a_cb) == 4 || (a_cb) == 8, VINF_IOM_MMIO_UNUSED_FF); \ + AssertReturn(!((a_off) & ((a_cb) - 1)), VINF_IOM_MMIO_UNUSED_FF); \ + } while (0) + +/** Checks if the MMIO offset is valid. */ +#define DMAR_IS_MMIO_OFF_VALID(a_off) ( (a_off) < DMAR_MMIO_GROUP_0_OFF_END \ + || (a_off) - (uint16_t)DMAR_MMIO_GROUP_1_OFF_FIRST < (uint16_t)DMAR_MMIO_GROUP_1_SIZE) + +/** Acquires the DMAR lock but returns with the given busy error code on failure. */ +#define DMAR_LOCK_RET(a_pDevIns, a_pThisCC, a_rcBusy) \ + do { \ + int const rcLock = (a_pThisCC)->CTX_SUFF(pIommuHlp)->pfnLock((a_pDevIns), (a_rcBusy)); \ + if (RT_LIKELY(rcLock == VINF_SUCCESS)) \ + { /* likely */ } \ + else \ + return rcLock; \ + } while (0) + +/** Acquires the DMAR lock (can fail under extraordinary circumstance in ring-0). */ +#define DMAR_LOCK(a_pDevIns, a_pThisCC) \ + do { \ + int const rcLock = (a_pThisCC)->CTX_SUFF(pIommuHlp)->pfnLock((a_pDevIns), VINF_SUCCESS); \ + PDM_CRITSECT_RELEASE_ASSERT_RC_DEV((a_pDevIns), NULL, rcLock); \ + } while (0) + +/** Release the DMAR lock. */ +#define DMAR_UNLOCK(a_pDevIns, a_pThisCC) (a_pThisCC)->CTX_SUFF(pIommuHlp)->pfnUnlock(a_pDevIns) + +/** Asserts that the calling thread owns the DMAR lock. */ +#define DMAR_ASSERT_LOCK_IS_OWNER(a_pDevIns, a_pThisCC) \ + do { \ + Assert((a_pThisCC)->CTX_SUFF(pIommuHlp)->pfnLockIsOwner(a_pDevIns)); \ + RT_NOREF1(a_pThisCC); \ + } while (0) + +/** Asserts that the calling thread does not own the DMAR lock. */ +#define DMAR_ASSERT_LOCK_IS_NOT_OWNER(a_pDevIns, a_pThisCC) \ + do { \ + Assert((a_pThisCC)->CTX_SUFF(pIommuHlp)->pfnLockIsOwner(a_pDevIns) == false); \ + RT_NOREF1(a_pThisCC); \ + } while (0) + +/** The number of fault recording registers our implementation supports. + * Normal guest operation shouldn't trigger faults anyway, so we only support the + * minimum number of registers (which is 1). + * + * See Intel VT-d spec. 10.4.2 "Capability Register" (CAP_REG.NFR). */ +#define DMAR_FRCD_REG_COUNT UINT32_C(1) + +/** Number of register groups (used in saved states). */ +#define DMAR_MMIO_GROUP_COUNT 2 +/** Offset of first register in group 0. */ +#define DMAR_MMIO_GROUP_0_OFF_FIRST VTD_MMIO_OFF_VER_REG +/** Offset of last register in group 0 (inclusive). */ +#define DMAR_MMIO_GROUP_0_OFF_LAST VTD_MMIO_OFF_MTRR_PHYSMASK9_REG +/** Last valid offset in group 0 (exclusive). */ +#define DMAR_MMIO_GROUP_0_OFF_END (DMAR_MMIO_GROUP_0_OFF_LAST + 8 /* sizeof MTRR_PHYSMASK9_REG */) +/** Size of the group 0 (in bytes). */ +#define DMAR_MMIO_GROUP_0_SIZE (DMAR_MMIO_GROUP_0_OFF_END - DMAR_MMIO_GROUP_0_OFF_FIRST) +/** Number of implementation-defined MMIO register offsets - IVA_REG and + * FRCD_LO_REG (used in saved state). IOTLB_REG and FRCD_HI_REG are derived from + * IVA_REG and FRCD_LO_REG respectively */ +#define DMAR_MMIO_OFF_IMPL_COUNT 2 +/** Implementation-specific MMIO offset of IVA_REG (used in saved state). */ +#define DMAR_MMIO_OFF_IVA_REG 0xe50 +/** Implementation-specific MMIO offset of IOTLB_REG. */ +#define DMAR_MMIO_OFF_IOTLB_REG 0xe58 +/** Implementation-specific MMIO offset of FRCD_LO_REG (used in saved state). */ +#define DMAR_MMIO_OFF_FRCD_LO_REG 0xe70 +/** Implementation-specific MMIO offset of FRCD_HI_REG. */ +#define DMAR_MMIO_OFF_FRCD_HI_REG 0xe78 +AssertCompile(!(DMAR_MMIO_OFF_FRCD_LO_REG & 0xf)); +AssertCompile(DMAR_MMIO_OFF_IOTLB_REG == DMAR_MMIO_OFF_IVA_REG + 8); +AssertCompile(DMAR_MMIO_OFF_FRCD_HI_REG == DMAR_MMIO_OFF_FRCD_LO_REG + 8); + +/** Offset of first register in group 1. */ +#define DMAR_MMIO_GROUP_1_OFF_FIRST VTD_MMIO_OFF_VCCAP_REG +/** Offset of last register in group 1 (inclusive). */ +#define DMAR_MMIO_GROUP_1_OFF_LAST (DMAR_MMIO_OFF_FRCD_LO_REG + 8) * DMAR_FRCD_REG_COUNT +/** Last valid offset in group 1 (exclusive). */ +#define DMAR_MMIO_GROUP_1_OFF_END (DMAR_MMIO_GROUP_1_OFF_LAST + 8 /* sizeof FRCD_HI_REG */) +/** Size of the group 1 (in bytes). */ +#define DMAR_MMIO_GROUP_1_SIZE (DMAR_MMIO_GROUP_1_OFF_END - DMAR_MMIO_GROUP_1_OFF_FIRST) + +/** DMAR implementation's major version number (exposed to software). + * We report 6 as the major version since we support queued-invalidations as + * software may make assumptions based on that. + * + * See Intel VT-d spec. 10.4.7 "Context Command Register" (CCMD_REG.CAIG). */ +#define DMAR_VER_MAJOR 6 +/** DMAR implementation's minor version number (exposed to software). */ +#define DMAR_VER_MINOR 0 + +/** Number of domain supported (0=16, 1=64, 2=256, 3=1K, 4=4K, 5=16K, 6=64K, + * 7=Reserved). */ +#define DMAR_ND 6 + +/** @name DMAR_PERM_XXX: DMA request permissions. + * The order of R, W, X bits is important as it corresponds to those bits in + * page-table entries. + * + * @{ */ +/** DMA request permission: Read. */ +#define DMAR_PERM_READ RT_BIT(0) +/** DMA request permission: Write. */ +#define DMAR_PERM_WRITE RT_BIT(1) +/** DMA request permission: Execute (ER). */ +#define DMAR_PERM_EXE RT_BIT(2) +/** DMA request permission: Supervisor privilege (PR). */ +#define DMAR_PERM_PRIV RT_BIT(3) +/** DMA request permissions: All. */ +#define DMAR_PERM_ALL (DMAR_PERM_READ | DMAR_PERM_WRITE | DMAR_PERM_EXE | DMAR_PERM_PRIV) +/** @} */ + +/** Release log prefix string. */ +#define DMAR_LOG_PFX "Intel-IOMMU" +/** The current saved state version. */ +#define DMAR_SAVED_STATE_VERSION 1 + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ +/** + * DMAR error diagnostics. + * Sorted alphabetically so it's easier to add and locate items, no other reason. + * + * @note Members of this enum are used as array indices, so no gaps in enum + * values are not allowed. Update g_apszDmarDiagDesc when you modify + * fields in this enum. + */ +typedef enum +{ + /* No error, this must be zero! */ + kDmarDiag_None = 0, + + /* Address Translation Faults. */ + kDmarDiag_At_Lm_CtxEntry_Not_Present, + kDmarDiag_At_Lm_CtxEntry_Read_Failed, + kDmarDiag_At_Lm_CtxEntry_Rsvd, + kDmarDiag_At_Lm_Pt_At_Block, + kDmarDiag_At_Lm_Pt_Aw_Invalid, + kDmarDiag_At_Lm_RootEntry_Not_Present, + kDmarDiag_At_Lm_RootEntry_Read_Failed, + kDmarDiag_At_Lm_RootEntry_Rsvd, + kDmarDiag_At_Lm_Tt_Invalid, + kDmarDiag_At_Lm_Ut_At_Block, + kDmarDiag_At_Lm_Ut_Aw_Invalid, + kDmarDiag_At_Rta_Adms_Not_Supported, + kDmarDiag_At_Rta_Rsvd, + kDmarDiag_At_Rta_Smts_Not_Supported, + kDmarDiag_At_Xm_AddrIn_Invalid, + kDmarDiag_At_Xm_AddrOut_Invalid, + kDmarDiag_At_Xm_Perm_Read_Denied, + kDmarDiag_At_Xm_Perm_Write_Denied, + kDmarDiag_At_Xm_Pte_Not_Present, + kDmarDiag_At_Xm_Pte_Rsvd, + kDmarDiag_At_Xm_Pte_Sllps_Invalid, + kDmarDiag_At_Xm_Read_Pte_Failed, + kDmarDiag_At_Xm_Slpptr_Read_Failed, + + /* CCMD_REG faults. */ + kDmarDiag_CcmdReg_Not_Supported, + kDmarDiag_CcmdReg_Qi_Enabled, + kDmarDiag_CcmdReg_Ttm_Invalid, + + /* IQA_REG faults. */ + kDmarDiag_IqaReg_Dsc_Fetch_Error, + kDmarDiag_IqaReg_Dw_128_Invalid, + kDmarDiag_IqaReg_Dw_256_Invalid, + + /* Invalidation Queue Error Info. */ + kDmarDiag_Iqei_Dsc_Type_Invalid, + kDmarDiag_Iqei_Inv_Wait_Dsc_0_1_Rsvd, + kDmarDiag_Iqei_Inv_Wait_Dsc_2_3_Rsvd, + kDmarDiag_Iqei_Inv_Wait_Dsc_Invalid, + kDmarDiag_Iqei_Ttm_Rsvd, + + /* IQT_REG faults. */ + kDmarDiag_IqtReg_Qt_Invalid, + kDmarDiag_IqtReg_Qt_Not_Aligned, + + /* Interrupt Remapping Faults. */ + kDmarDiag_Ir_Cfi_Blocked, + kDmarDiag_Ir_Rfi_Intr_Index_Invalid, + kDmarDiag_Ir_Rfi_Irte_Mode_Invalid, + kDmarDiag_Ir_Rfi_Irte_Not_Present, + kDmarDiag_Ir_Rfi_Irte_Read_Failed, + kDmarDiag_Ir_Rfi_Irte_Rsvd, + kDmarDiag_Ir_Rfi_Irte_Svt_Bus, + kDmarDiag_Ir_Rfi_Irte_Svt_Masked, + kDmarDiag_Ir_Rfi_Irte_Svt_Rsvd, + kDmarDiag_Ir_Rfi_Rsvd, + + /* Member for determining array index limit. */ + kDmarDiag_End, + + /* Usual 32-bit type size hack. */ + kDmarDiag_32Bit_Hack = 0x7fffffff +} DMARDIAG; +AssertCompileSize(DMARDIAG, 4); + +#ifdef IN_RING3 +/** DMAR diagnostic enum description expansion. + * The below construct ensures typos in the input to this macro are caught + * during compile time. */ +# define DMARDIAG_DESC(a_Name) RT_CONCAT(kDmarDiag_, a_Name) < kDmarDiag_End ? RT_STR(a_Name) : "Ignored" + +/** DMAR diagnostics description for members in DMARDIAG. */ +static const char *const g_apszDmarDiagDesc[] = +{ + DMARDIAG_DESC(None ), + + /* Address Translation Faults. */ + DMARDIAG_DESC(At_Lm_CtxEntry_Not_Present ), + DMARDIAG_DESC(At_Lm_CtxEntry_Read_Failed ), + DMARDIAG_DESC(At_Lm_CtxEntry_Rsvd ), + DMARDIAG_DESC(At_Lm_Pt_At_Block ), + DMARDIAG_DESC(At_Lm_Pt_Aw_Invalid ), + DMARDIAG_DESC(At_Lm_RootEntry_Not_Present), + DMARDIAG_DESC(At_Lm_RootEntry_Read_Failed), + DMARDIAG_DESC(At_Lm_RootEntry_Rsvd ), + DMARDIAG_DESC(At_Lm_Tt_Invalid ), + DMARDIAG_DESC(At_Lm_Ut_At_Block ), + DMARDIAG_DESC(At_Lm_Ut_Aw_Invalid ), + DMARDIAG_DESC(At_Rta_Adms_Not_Supported ), + DMARDIAG_DESC(At_Rta_Rsvd ), + DMARDIAG_DESC(At_Rta_Smts_Not_Supported ), + DMARDIAG_DESC(At_Xm_AddrIn_Invalid ), + DMARDIAG_DESC(At_Xm_AddrOut_Invalid ), + DMARDIAG_DESC(At_Xm_Perm_Read_Denied ), + DMARDIAG_DESC(At_Xm_Perm_Write_Denied ), + DMARDIAG_DESC(At_Xm_Pte_Not_Present ), + DMARDIAG_DESC(At_Xm_Pte_Rsvd ), + DMARDIAG_DESC(At_Xm_Pte_Sllps_Invalid ), + DMARDIAG_DESC(At_Xm_Read_Pte_Failed ), + DMARDIAG_DESC(At_Xm_Slpptr_Read_Failed ), + + /* CCMD_REG faults. */ + DMARDIAG_DESC(CcmdReg_Not_Supported ), + DMARDIAG_DESC(CcmdReg_Qi_Enabled ), + DMARDIAG_DESC(CcmdReg_Ttm_Invalid ), + + /* IQA_REG faults. */ + DMARDIAG_DESC(IqaReg_Dsc_Fetch_Error ), + DMARDIAG_DESC(IqaReg_Dw_128_Invalid ), + DMARDIAG_DESC(IqaReg_Dw_256_Invalid ), + + /* Invalidation Queue Error Info. */ + DMARDIAG_DESC(Iqei_Dsc_Type_Invalid ), + DMARDIAG_DESC(Iqei_Inv_Wait_Dsc_0_1_Rsvd ), + DMARDIAG_DESC(Iqei_Inv_Wait_Dsc_2_3_Rsvd ), + DMARDIAG_DESC(Iqei_Inv_Wait_Dsc_Invalid ), + DMARDIAG_DESC(Iqei_Ttm_Rsvd ), + + /* IQT_REG faults. */ + DMARDIAG_DESC(IqtReg_Qt_Invalid ), + DMARDIAG_DESC(IqtReg_Qt_Not_Aligned ), + + /* Interrupt remapping faults. */ + DMARDIAG_DESC(Ir_Cfi_Blocked ), + DMARDIAG_DESC(Ir_Rfi_Intr_Index_Invalid ), + DMARDIAG_DESC(Ir_Rfi_Irte_Mode_Invalid ), + DMARDIAG_DESC(Ir_Rfi_Irte_Not_Present ), + DMARDIAG_DESC(Ir_Rfi_Irte_Read_Failed ), + DMARDIAG_DESC(Ir_Rfi_Irte_Rsvd ), + DMARDIAG_DESC(Ir_Rfi_Irte_Svt_Bus ), + DMARDIAG_DESC(Ir_Rfi_Irte_Svt_Masked ), + DMARDIAG_DESC(Ir_Rfi_Irte_Svt_Rsvd ), + DMARDIAG_DESC(Ir_Rfi_Rsvd ), + /* kDmarDiag_End */ +}; +AssertCompile(RT_ELEMENTS(g_apszDmarDiagDesc) == kDmarDiag_End); +# undef DMARDIAG_DESC +#endif /* IN_RING3 */ + +/** + * The shared DMAR device state. + */ +typedef struct DMAR +{ + /** IOMMU device index. */ + uint32_t idxIommu; + /** Padding. */ + uint32_t u32Padding0; + + /** Registers (group 0). */ + uint8_t abRegs0[DMAR_MMIO_GROUP_0_SIZE]; + /** Registers (group 1). */ + uint8_t abRegs1[DMAR_MMIO_GROUP_1_SIZE]; + + /** @name Lazily activated registers. + * These are the active values for lazily activated registers. Software is free to + * modify the actual register values while remapping/translation is enabled but they + * take effect only when explicitly signaled by software, hence we need to hold the + * active values separately. + * @{ */ + /** Currently active IRTA_REG. */ + uint64_t uIrtaReg; + /** Currently active RTADDR_REG. */ + uint64_t uRtaddrReg; + /** @} */ + + /** @name Register copies for a tiny bit faster and more convenient access. + * @{ */ + /** Copy of VER_REG. */ + uint8_t uVerReg; + /** Alignment. */ + uint8_t abPadding0[7]; + /** Copy of CAP_REG. */ + uint64_t fCapReg; + /** Copy of ECAP_REG. */ + uint64_t fExtCapReg; + /** @} */ + + /** Host-address width (HAW) base address mask. */ + uint64_t fHawBaseMask; + /** Maximum guest-address width (MGAW) invalid address mask. */ + uint64_t fMgawInvMask; + /** Context-entry qword-1 valid mask. */ + uint64_t fCtxEntryQw1ValidMask; + /** Maximum supported paging level (3, 4 or 5). */ + uint8_t cMaxPagingLevel; + /** DMA request valid permissions mask. */ + uint8_t fPermValidMask; + /** Alignment. */ + uint8_t abPadding1[6]; + + /** The event semaphore the invalidation-queue thread waits on. */ + SUPSEMEVENT hEvtInvQueue; + /** Error diagnostic. */ + DMARDIAG enmDiag; + /** Padding. */ + uint32_t uPadding0; + /** The MMIO handle. */ + IOMMMIOHANDLE hMmio; + +#ifdef VBOX_WITH_STATISTICS + STAMCOUNTER StatMmioReadR3; /**< Number of MMIO reads in R3. */ + STAMCOUNTER StatMmioReadRZ; /**< Number of MMIO reads in RZ. */ + STAMCOUNTER StatMmioWriteR3; /**< Number of MMIO writes in R3. */ + STAMCOUNTER StatMmioWriteRZ; /**< Number of MMIO writes in RZ. */ + + STAMCOUNTER StatMsiRemapCfiR3; /**< Number of compatibility-format interrupts remap requests in R3. */ + STAMCOUNTER StatMsiRemapCfiRZ; /**< Number of compatibility-format interrupts remap requests in RZ. */ + STAMCOUNTER StatMsiRemapRfiR3; /**< Number of remappable-format interrupts remap requests in R3. */ + STAMCOUNTER StatMsiRemapRfiRZ; /**< Number of remappable-format interrupts remap requests in RZ. */ + + STAMCOUNTER StatMemReadR3; /**< Number of memory read translation requests in R3. */ + STAMCOUNTER StatMemReadRZ; /**< Number of memory read translation requests in RZ. */ + STAMCOUNTER StatMemWriteR3; /**< Number of memory write translation requests in R3. */ + STAMCOUNTER StatMemWriteRZ; /**< Number of memory write translation requests in RZ. */ + + STAMCOUNTER StatMemBulkReadR3; /**< Number of memory read bulk translation requests in R3. */ + STAMCOUNTER StatMemBulkReadRZ; /**< Number of memory read bulk translation requests in RZ. */ + STAMCOUNTER StatMemBulkWriteR3; /**< Number of memory write bulk translation requests in R3. */ + STAMCOUNTER StatMemBulkWriteRZ; /**< Number of memory write bulk translation requests in RZ. */ + + STAMCOUNTER StatCcInvDsc; /**< Number of Context-cache descriptors processed. */ + STAMCOUNTER StatIotlbInvDsc; /**< Number of IOTLB descriptors processed. */ + STAMCOUNTER StatDevtlbInvDsc; /**< Number of Device-TLB descriptors processed. */ + STAMCOUNTER StatIecInvDsc; /**< Number of Interrupt-Entry cache descriptors processed. */ + STAMCOUNTER StatInvWaitDsc; /**< Number of Invalidation wait descriptors processed. */ + STAMCOUNTER StatPasidIotlbInvDsc; /**< Number of PASID-based IOTLB descriptors processed. */ + STAMCOUNTER StatPasidCacheInvDsc; /**< Number of PASID-cache descriptors processed. */ + STAMCOUNTER StatPasidDevtlbInvDsc; /**< Number of PASID-based device-TLB descriptors processed. */ +#endif +} DMAR; +/** Pointer to the DMAR device state. */ +typedef DMAR *PDMAR; +/** Pointer to the const DMAR device state. */ +typedef DMAR const *PCDMAR; +AssertCompileMemberAlignment(DMAR, abRegs0, 8); +AssertCompileMemberAlignment(DMAR, abRegs1, 8); + +/** + * The ring-3 DMAR device state. + */ +typedef struct DMARR3 +{ + /** Device instance. */ + PPDMDEVINSR3 pDevInsR3; + /** The IOMMU helper. */ + R3PTRTYPE(PCPDMIOMMUHLPR3) pIommuHlpR3; + /** The invalidation-queue thread. */ + R3PTRTYPE(PPDMTHREAD) pInvQueueThread; +} DMARR3; +/** Pointer to the ring-3 DMAR device state. */ +typedef DMARR3 *PDMARR3; +/** Pointer to the const ring-3 DMAR device state. */ +typedef DMARR3 const *PCDMARR3; + +/** + * The ring-0 DMAR device state. + */ +typedef struct DMARR0 +{ + /** Device instance. */ + PPDMDEVINSR0 pDevInsR0; + /** The IOMMU helper. */ + R0PTRTYPE(PCPDMIOMMUHLPR0) pIommuHlpR0; +} DMARR0; +/** Pointer to the ring-0 IOMMU device state. */ +typedef DMARR0 *PDMARR0; +/** Pointer to the const ring-0 IOMMU device state. */ +typedef DMARR0 const *PCDMARR0; + +/** + * The raw-mode DMAR device state. + */ +typedef struct DMARRC +{ + /** Device instance. */ + PPDMDEVINSRC pDevInsRC; + /** The IOMMU helper. */ + RCPTRTYPE(PCPDMIOMMUHLPRC) pIommuHlpRC; +} DMARRC; +/** Pointer to the raw-mode DMAR device state. */ +typedef DMARRC *PDMARRC; +/** Pointer to the const raw-mode DMAR device state. */ +typedef DMARRC const *PCIDMARRC; + +/** The DMAR device state for the current context. */ +typedef CTX_SUFF(DMAR) DMARCC; +/** Pointer to the DMAR device state for the current context. */ +typedef CTX_SUFF(PDMAR) PDMARCC; +/** Pointer to the const DMAR device state for the current context. */ +typedef CTX_SUFF(PDMAR) const PCDMARCC; + +/** + * DMAR originated events that generate interrupts. + */ +typedef enum DMAREVENTTYPE +{ + /** Invalidation completion event. */ + DMAREVENTTYPE_INV_COMPLETE = 0, + /** Fault event. */ + DMAREVENTTYPE_FAULT +} DMAREVENTTYPE; + +/** + * I/O Page. + */ +typedef struct DMARIOPAGE +{ + /** The base DMA address of a page. */ + RTGCPHYS GCPhysBase; + /** The page shift. */ + uint8_t cShift; + /** The permissions of this page (DMAR_PERM_XXX). */ + uint8_t fPerm; +} DMARIOPAGE; +/** Pointer to an I/O page. */ +typedef DMARIOPAGE *PDMARIOPAGE; +/** Pointer to a const I/O address range. */ +typedef DMARIOPAGE const *PCDMARIOPAGE; + +/** + * I/O Address Range. + */ +typedef struct DMARIOADDRRANGE +{ + /** The starting DMA address of this range. */ + uint64_t uAddr; + /** The size of the range (in bytes). */ + size_t cb; + /** The permissions of this range (DMAR_PERM_XXX). */ + uint8_t fPerm; +} DMARIOADDRRANGE; +/** Pointer to an I/O address range. */ +typedef DMARIOADDRRANGE *PDMARIOADDRRANGE; +/** Pointer to a const I/O address range. */ +typedef DMARIOADDRRANGE const *PCDMARIOADDRRANGE; + +/** + * DMA Memory Request (Input). + */ +typedef struct DMARMEMREQIN +{ + /** The address range being accessed. */ + DMARIOADDRRANGE AddrRange; + /** The source device ID (bus, device, function). */ + uint16_t idDevice; + /** The PASID if present (can be NIL_PCIPASID). */ + PCIPASID Pasid; + /* The address translation type. */ + PCIADDRTYPE enmAddrType; + /** The request type. */ + VTDREQTYPE enmReqType; +} DMARMEMREQIN; +/** Pointer to a DMA memory request input. */ +typedef DMARMEMREQIN *PDMARMEMREQIN; +/** Pointer to a const DMA memory input. */ +typedef DMARMEMREQIN const *PCDMARMEMREQIN; + +/** + * DMA Memory Request (Output). + */ +typedef struct DMARMEMREQOUT +{ + /** The address range of the translated region. */ + DMARIOADDRRANGE AddrRange; + /** The domain ID of the translated region. */ + uint16_t idDomain; +} DMARMEMREQOUT; +/** Pointer to a DMA memory request output. */ +typedef DMARMEMREQOUT *PDMARMEMREQOUT; +/** Pointer to a const DMA memory request output. */ +typedef DMARMEMREQOUT const *PCDMARMEMREQOUT; + +/** + * DMA Memory Request (Auxiliary Info). + * These get updated and used as part of the translation process. + */ +typedef struct DMARMEMREQAUX +{ + /** The table translation mode (VTD_TTM_XXX). */ + uint8_t fTtm; + /** The fault processing disabled (FPD) bit. */ + uint8_t fFpd; + /** The paging level of the translation. */ + uint8_t cPagingLevel; + uint8_t abPadding[5]; + /** The address of the first-level page-table. */ + uint64_t GCPhysFlPt; + /** The address of second-level page-table. */ + uint64_t GCPhysSlPt; +} DMARMEMREQAUX; +/** Pointer to a DMA memory request output. */ +typedef DMARMEMREQAUX *PDMARMEMREQAUX; +/** Pointer to a const DMA memory request output. */ +typedef DMARMEMREQAUX const *PCDMARMEMREQAUX; + +/** + * DMA Memory Request Remapping Information. + */ +typedef struct DMARMEMREQREMAP +{ + /** The DMA memory request input. */ + DMARMEMREQIN In; + /** DMA memory request auxiliary information. */ + DMARMEMREQAUX Aux; + /** The DMA memory request output. */ + DMARMEMREQOUT Out; +} DMARMEMREQREMAP; +/** Pointer to a DMA remap info. */ +typedef DMARMEMREQREMAP *PDMARMEMREQREMAP; +/** Pointer to a const DMA remap info. */ +typedef DMARMEMREQREMAP const *PCDMARMEMREQREMAP; + +/** + * Callback function to lookup a DMA address. + * + * @returns VBox status code. + * @param pDevIns The IOMMU device instance. + * @param pMemReqIn The DMA memory request input. + * @param pMemReqAux The DMA memory request auxiliary info. + * @param pIoPageOut Where to store the output of the lookup. + */ +typedef DECLCALLBACKTYPE(int, FNDMADDRLOOKUP,(PPDMDEVINS pDevIns, PCDMARMEMREQIN pMemReqIn, PCDMARMEMREQAUX pMemReqAux, + PDMARIOPAGE pIoPageOut)); +/** Pointer to a DMA address-lookup function. */ +typedef FNDMADDRLOOKUP *PFNDMADDRLOOKUP; + + +/********************************************************************************************************************************* +* Global Variables * +*********************************************************************************************************************************/ +/** + * Read-write masks for DMAR registers (group 0). + */ +static uint32_t const g_au32RwMasks0[] = +{ + /* Offset Register Low High */ + /* 0x000 VER_REG */ VTD_VER_REG_RW_MASK, + /* 0x004 Reserved */ 0, + /* 0x008 CAP_REG */ DMAR_LO_U32(VTD_CAP_REG_RW_MASK), DMAR_HI_U32(VTD_CAP_REG_RW_MASK), + /* 0x010 ECAP_REG */ DMAR_LO_U32(VTD_ECAP_REG_RW_MASK), DMAR_HI_U32(VTD_ECAP_REG_RW_MASK), + /* 0x018 GCMD_REG */ VTD_GCMD_REG_RW_MASK, + /* 0x01c GSTS_REG */ VTD_GSTS_REG_RW_MASK, + /* 0x020 RTADDR_REG */ DMAR_LO_U32(VTD_RTADDR_REG_RW_MASK), DMAR_HI_U32(VTD_RTADDR_REG_RW_MASK), + /* 0x028 CCMD_REG */ DMAR_LO_U32(VTD_CCMD_REG_RW_MASK), DMAR_HI_U32(VTD_CCMD_REG_RW_MASK), + /* 0x030 Reserved */ 0, + /* 0x034 FSTS_REG */ VTD_FSTS_REG_RW_MASK, + /* 0x038 FECTL_REG */ VTD_FECTL_REG_RW_MASK, + /* 0x03c FEDATA_REG */ VTD_FEDATA_REG_RW_MASK, + /* 0x040 FEADDR_REG */ VTD_FEADDR_REG_RW_MASK, + /* 0x044 FEUADDR_REG */ VTD_FEUADDR_REG_RW_MASK, + /* 0x048 Reserved */ 0, 0, + /* 0x050 Reserved */ 0, 0, + /* 0x058 AFLOG_REG */ DMAR_LO_U32(VTD_AFLOG_REG_RW_MASK), DMAR_HI_U32(VTD_AFLOG_REG_RW_MASK), + /* 0x060 Reserved */ 0, + /* 0x064 PMEN_REG */ 0, /* RO as we don't support PLMR and PHMR. */ + /* 0x068 PLMBASE_REG */ 0, /* RO as we don't support PLMR. */ + /* 0x06c PLMLIMIT_REG */ 0, /* RO as we don't support PLMR. */ + /* 0x070 PHMBASE_REG */ 0, 0, /* RO as we don't support PHMR. */ + /* 0x078 PHMLIMIT_REG */ 0, 0, /* RO as we don't support PHMR. */ + /* 0x080 IQH_REG */ DMAR_LO_U32(VTD_IQH_REG_RW_MASK), DMAR_HI_U32(VTD_IQH_REG_RW_MASK), + /* 0x088 IQT_REG */ DMAR_LO_U32(VTD_IQT_REG_RW_MASK), DMAR_HI_U32(VTD_IQT_REG_RW_MASK), + /* 0x090 IQA_REG */ DMAR_LO_U32(VTD_IQA_REG_RW_MASK), DMAR_HI_U32(VTD_IQA_REG_RW_MASK), + /* 0x098 Reserved */ 0, + /* 0x09c ICS_REG */ VTD_ICS_REG_RW_MASK, + /* 0x0a0 IECTL_REG */ VTD_IECTL_REG_RW_MASK, + /* 0x0a4 IEDATA_REG */ VTD_IEDATA_REG_RW_MASK, + /* 0x0a8 IEADDR_REG */ VTD_IEADDR_REG_RW_MASK, + /* 0x0ac IEUADDR_REG */ VTD_IEUADDR_REG_RW_MASK, + /* 0x0b0 IQERCD_REG */ DMAR_LO_U32(VTD_IQERCD_REG_RW_MASK), DMAR_HI_U32(VTD_IQERCD_REG_RW_MASK), + /* 0x0b8 IRTA_REG */ DMAR_LO_U32(VTD_IRTA_REG_RW_MASK), DMAR_HI_U32(VTD_IRTA_REG_RW_MASK), + /* 0x0c0 PQH_REG */ DMAR_LO_U32(VTD_PQH_REG_RW_MASK), DMAR_HI_U32(VTD_PQH_REG_RW_MASK), + /* 0x0c8 PQT_REG */ DMAR_LO_U32(VTD_PQT_REG_RW_MASK), DMAR_HI_U32(VTD_PQT_REG_RW_MASK), + /* 0x0d0 PQA_REG */ DMAR_LO_U32(VTD_PQA_REG_RW_MASK), DMAR_HI_U32(VTD_PQA_REG_RW_MASK), + /* 0x0d8 Reserved */ 0, + /* 0x0dc PRS_REG */ VTD_PRS_REG_RW_MASK, + /* 0x0e0 PECTL_REG */ VTD_PECTL_REG_RW_MASK, + /* 0x0e4 PEDATA_REG */ VTD_PEDATA_REG_RW_MASK, + /* 0x0e8 PEADDR_REG */ VTD_PEADDR_REG_RW_MASK, + /* 0x0ec PEUADDR_REG */ VTD_PEUADDR_REG_RW_MASK, + /* 0x0f0 Reserved */ 0, 0, + /* 0x0f8 Reserved */ 0, 0, + /* 0x100 MTRRCAP_REG */ DMAR_LO_U32(VTD_MTRRCAP_REG_RW_MASK), DMAR_HI_U32(VTD_MTRRCAP_REG_RW_MASK), + /* 0x108 MTRRDEF_REG */ 0, 0, /* RO as we don't support MTS. */ + /* 0x110 Reserved */ 0, 0, + /* 0x118 Reserved */ 0, 0, + /* 0x120 MTRR_FIX64_00000_REG */ 0, 0, /* RO as we don't support MTS. */ + /* 0x128 MTRR_FIX16K_80000_REG */ 0, 0, + /* 0x130 MTRR_FIX16K_A0000_REG */ 0, 0, + /* 0x138 MTRR_FIX4K_C0000_REG */ 0, 0, + /* 0x140 MTRR_FIX4K_C8000_REG */ 0, 0, + /* 0x148 MTRR_FIX4K_D0000_REG */ 0, 0, + /* 0x150 MTRR_FIX4K_D8000_REG */ 0, 0, + /* 0x158 MTRR_FIX4K_E0000_REG */ 0, 0, + /* 0x160 MTRR_FIX4K_E8000_REG */ 0, 0, + /* 0x168 MTRR_FIX4K_F0000_REG */ 0, 0, + /* 0x170 MTRR_FIX4K_F8000_REG */ 0, 0, + /* 0x178 Reserved */ 0, 0, + /* 0x180 MTRR_PHYSBASE0_REG */ 0, 0, /* RO as we don't support MTS. */ + /* 0x188 MTRR_PHYSMASK0_REG */ 0, 0, + /* 0x190 MTRR_PHYSBASE1_REG */ 0, 0, + /* 0x198 MTRR_PHYSMASK1_REG */ 0, 0, + /* 0x1a0 MTRR_PHYSBASE2_REG */ 0, 0, + /* 0x1a8 MTRR_PHYSMASK2_REG */ 0, 0, + /* 0x1b0 MTRR_PHYSBASE3_REG */ 0, 0, + /* 0x1b8 MTRR_PHYSMASK3_REG */ 0, 0, + /* 0x1c0 MTRR_PHYSBASE4_REG */ 0, 0, + /* 0x1c8 MTRR_PHYSMASK4_REG */ 0, 0, + /* 0x1d0 MTRR_PHYSBASE5_REG */ 0, 0, + /* 0x1d8 MTRR_PHYSMASK5_REG */ 0, 0, + /* 0x1e0 MTRR_PHYSBASE6_REG */ 0, 0, + /* 0x1e8 MTRR_PHYSMASK6_REG */ 0, 0, + /* 0x1f0 MTRR_PHYSBASE7_REG */ 0, 0, + /* 0x1f8 MTRR_PHYSMASK7_REG */ 0, 0, + /* 0x200 MTRR_PHYSBASE8_REG */ 0, 0, + /* 0x208 MTRR_PHYSMASK8_REG */ 0, 0, + /* 0x210 MTRR_PHYSBASE9_REG */ 0, 0, + /* 0x218 MTRR_PHYSMASK9_REG */ 0, 0, +}; +AssertCompile(sizeof(g_au32RwMasks0) == DMAR_MMIO_GROUP_0_SIZE); + +/** + * Read-only Status, Write-1-to-clear masks for DMAR registers (group 0). + */ +static uint32_t const g_au32Rw1cMasks0[] = +{ + /* Offset Register Low High */ + /* 0x000 VER_REG */ 0, + /* 0x004 Reserved */ 0, + /* 0x008 CAP_REG */ 0, 0, + /* 0x010 ECAP_REG */ 0, 0, + /* 0x018 GCMD_REG */ 0, + /* 0x01c GSTS_REG */ 0, + /* 0x020 RTADDR_REG */ 0, 0, + /* 0x028 CCMD_REG */ 0, 0, + /* 0x030 Reserved */ 0, + /* 0x034 FSTS_REG */ VTD_FSTS_REG_RW1C_MASK, + /* 0x038 FECTL_REG */ 0, + /* 0x03c FEDATA_REG */ 0, + /* 0x040 FEADDR_REG */ 0, + /* 0x044 FEUADDR_REG */ 0, + /* 0x048 Reserved */ 0, 0, + /* 0x050 Reserved */ 0, 0, + /* 0x058 AFLOG_REG */ 0, 0, + /* 0x060 Reserved */ 0, + /* 0x064 PMEN_REG */ 0, + /* 0x068 PLMBASE_REG */ 0, + /* 0x06c PLMLIMIT_REG */ 0, + /* 0x070 PHMBASE_REG */ 0, 0, + /* 0x078 PHMLIMIT_REG */ 0, 0, + /* 0x080 IQH_REG */ 0, 0, + /* 0x088 IQT_REG */ 0, 0, + /* 0x090 IQA_REG */ 0, 0, + /* 0x098 Reserved */ 0, + /* 0x09c ICS_REG */ VTD_ICS_REG_RW1C_MASK, + /* 0x0a0 IECTL_REG */ 0, + /* 0x0a4 IEDATA_REG */ 0, + /* 0x0a8 IEADDR_REG */ 0, + /* 0x0ac IEUADDR_REG */ 0, + /* 0x0b0 IQERCD_REG */ 0, 0, + /* 0x0b8 IRTA_REG */ 0, 0, + /* 0x0c0 PQH_REG */ 0, 0, + /* 0x0c8 PQT_REG */ 0, 0, + /* 0x0d0 PQA_REG */ 0, 0, + /* 0x0d8 Reserved */ 0, + /* 0x0dc PRS_REG */ 0, + /* 0x0e0 PECTL_REG */ 0, + /* 0x0e4 PEDATA_REG */ 0, + /* 0x0e8 PEADDR_REG */ 0, + /* 0x0ec PEUADDR_REG */ 0, + /* 0x0f0 Reserved */ 0, 0, + /* 0x0f8 Reserved */ 0, 0, + /* 0x100 MTRRCAP_REG */ 0, 0, + /* 0x108 MTRRDEF_REG */ 0, 0, + /* 0x110 Reserved */ 0, 0, + /* 0x118 Reserved */ 0, 0, + /* 0x120 MTRR_FIX64_00000_REG */ 0, 0, + /* 0x128 MTRR_FIX16K_80000_REG */ 0, 0, + /* 0x130 MTRR_FIX16K_A0000_REG */ 0, 0, + /* 0x138 MTRR_FIX4K_C0000_REG */ 0, 0, + /* 0x140 MTRR_FIX4K_C8000_REG */ 0, 0, + /* 0x148 MTRR_FIX4K_D0000_REG */ 0, 0, + /* 0x150 MTRR_FIX4K_D8000_REG */ 0, 0, + /* 0x158 MTRR_FIX4K_E0000_REG */ 0, 0, + /* 0x160 MTRR_FIX4K_E8000_REG */ 0, 0, + /* 0x168 MTRR_FIX4K_F0000_REG */ 0, 0, + /* 0x170 MTRR_FIX4K_F8000_REG */ 0, 0, + /* 0x178 Reserved */ 0, 0, + /* 0x180 MTRR_PHYSBASE0_REG */ 0, 0, + /* 0x188 MTRR_PHYSMASK0_REG */ 0, 0, + /* 0x190 MTRR_PHYSBASE1_REG */ 0, 0, + /* 0x198 MTRR_PHYSMASK1_REG */ 0, 0, + /* 0x1a0 MTRR_PHYSBASE2_REG */ 0, 0, + /* 0x1a8 MTRR_PHYSMASK2_REG */ 0, 0, + /* 0x1b0 MTRR_PHYSBASE3_REG */ 0, 0, + /* 0x1b8 MTRR_PHYSMASK3_REG */ 0, 0, + /* 0x1c0 MTRR_PHYSBASE4_REG */ 0, 0, + /* 0x1c8 MTRR_PHYSMASK4_REG */ 0, 0, + /* 0x1d0 MTRR_PHYSBASE5_REG */ 0, 0, + /* 0x1d8 MTRR_PHYSMASK5_REG */ 0, 0, + /* 0x1e0 MTRR_PHYSBASE6_REG */ 0, 0, + /* 0x1e8 MTRR_PHYSMASK6_REG */ 0, 0, + /* 0x1f0 MTRR_PHYSBASE7_REG */ 0, 0, + /* 0x1f8 MTRR_PHYSMASK7_REG */ 0, 0, + /* 0x200 MTRR_PHYSBASE8_REG */ 0, 0, + /* 0x208 MTRR_PHYSMASK8_REG */ 0, 0, + /* 0x210 MTRR_PHYSBASE9_REG */ 0, 0, + /* 0x218 MTRR_PHYSMASK9_REG */ 0, 0, +}; +AssertCompile(sizeof(g_au32Rw1cMasks0) == DMAR_MMIO_GROUP_0_SIZE); + +/** + * Read-write masks for DMAR registers (group 1). + */ +static uint32_t const g_au32RwMasks1[] = +{ + /* Offset Register Low High */ + /* 0xe00 VCCAP_REG */ DMAR_LO_U32(VTD_VCCAP_REG_RW_MASK), DMAR_HI_U32(VTD_VCCAP_REG_RW_MASK), + /* 0xe08 VCMD_EO_REG */ DMAR_LO_U32(VTD_VCMD_EO_REG_RW_MASK), DMAR_HI_U32(VTD_VCMD_EO_REG_RW_MASK), + /* 0xe10 VCMD_REG */ 0, 0, /* RO: VCS not supported. */ + /* 0xe18 VCMDRSVD_REG */ 0, 0, + /* 0xe20 VCRSP_REG */ 0, 0, /* RO: VCS not supported. */ + /* 0xe28 VCRSPRSVD_REG */ 0, 0, + /* 0xe30 Reserved */ 0, 0, + /* 0xe38 Reserved */ 0, 0, + /* 0xe40 Reserved */ 0, 0, + /* 0xe48 Reserved */ 0, 0, + /* 0xe50 IVA_REG */ DMAR_LO_U32(VTD_IVA_REG_RW_MASK), DMAR_HI_U32(VTD_IVA_REG_RW_MASK), + /* 0xe58 IOTLB_REG */ DMAR_LO_U32(VTD_IOTLB_REG_RW_MASK), DMAR_HI_U32(VTD_IOTLB_REG_RW_MASK), + /* 0xe60 Reserved */ 0, 0, + /* 0xe68 Reserved */ 0, 0, + /* 0xe70 FRCD_REG_LO */ DMAR_LO_U32(VTD_FRCD_REG_LO_RW_MASK), DMAR_HI_U32(VTD_FRCD_REG_LO_RW_MASK), + /* 0xe78 FRCD_REG_HI */ DMAR_LO_U32(VTD_FRCD_REG_HI_RW_MASK), DMAR_HI_U32(VTD_FRCD_REG_HI_RW_MASK), +}; +AssertCompile(sizeof(g_au32RwMasks1) == DMAR_MMIO_GROUP_1_SIZE); +AssertCompile((DMAR_MMIO_OFF_FRCD_LO_REG - DMAR_MMIO_GROUP_1_OFF_FIRST) + DMAR_FRCD_REG_COUNT * 2 * sizeof(uint64_t) ); + +/** + * Read-only Status, Write-1-to-clear masks for DMAR registers (group 1). + */ +static uint32_t const g_au32Rw1cMasks1[] = +{ + /* Offset Register Low High */ + /* 0xe00 VCCAP_REG */ 0, 0, + /* 0xe08 VCMD_EO_REG */ 0, 0, + /* 0xe10 VCMD_REG */ 0, 0, + /* 0xe18 VCMDRSVD_REG */ 0, 0, + /* 0xe20 VCRSP_REG */ 0, 0, + /* 0xe28 VCRSPRSVD_REG */ 0, 0, + /* 0xe30 Reserved */ 0, 0, + /* 0xe38 Reserved */ 0, 0, + /* 0xe40 Reserved */ 0, 0, + /* 0xe48 Reserved */ 0, 0, + /* 0xe50 IVA_REG */ 0, 0, + /* 0xe58 IOTLB_REG */ 0, 0, + /* 0xe60 Reserved */ 0, 0, + /* 0xe68 Reserved */ 0, 0, + /* 0xe70 FRCD_REG_LO */ DMAR_LO_U32(VTD_FRCD_REG_LO_RW1C_MASK), DMAR_HI_U32(VTD_FRCD_REG_LO_RW1C_MASK), + /* 0xe78 FRCD_REG_HI */ DMAR_LO_U32(VTD_FRCD_REG_HI_RW1C_MASK), DMAR_HI_U32(VTD_FRCD_REG_HI_RW1C_MASK), +}; +AssertCompile(sizeof(g_au32Rw1cMasks1) == DMAR_MMIO_GROUP_1_SIZE); + +/** Array of RW masks for each register group. */ +static uint8_t const *g_apbRwMasks[] = { (uint8_t *)&g_au32RwMasks0[0], (uint8_t *)&g_au32RwMasks1[0] }; + +/** Array of RW1C masks for each register group. */ +static uint8_t const *g_apbRw1cMasks[] = { (uint8_t *)&g_au32Rw1cMasks0[0], (uint8_t *)&g_au32Rw1cMasks1[0] }; + +/* Masks arrays must be identical in size (even bounds checking code assumes this). */ +AssertCompile(sizeof(g_apbRw1cMasks) == sizeof(g_apbRwMasks)); + +#ifdef IN_RING3 +/** Array of valid domain-ID bits. */ +static uint16_t const g_auNdMask[] = { 0xf, 0x3f, 0xff, 0x3ff, 0xfff, 0x3fff, 0xffff, 0 }; +AssertCompile(RT_ELEMENTS(g_auNdMask) >= DMAR_ND); +#endif + + +#ifndef VBOX_DEVICE_STRUCT_TESTCASE +#ifdef IN_RING3 +/** + * Returns the supported adjusted guest-address width (SAGAW) given the maximum + * guest address width (MGAW). + * + * @returns The CAP_REG.SAGAW value. + * @param uMgaw The CAP_REG.MGAW value. + */ +static uint8_t vtdCapRegGetSagaw(uint8_t uMgaw) +{ + /* + * It doesn't make sense to me that a CPU (or IOMMU hardware) will ever support + * 5-level paging but not 4 or 3-level paging. So smaller page-table levels + * are always OR'ed in below. + * + * The bit values below (57, 48, 39 bits) represents the levels of page-table walks + * for 4KB base page size (5-level, 4-level and 3-level paging respectively). + * + * See Intel VT-d spec. 10.4.2 "Capability Register". + */ + ++uMgaw; + uint8_t const fSagaw = uMgaw >= 57 ? RT_BIT(3) | RT_BIT(2) | RT_BIT(1) + : uMgaw >= 48 ? RT_BIT(2) | RT_BIT(1) + : uMgaw >= 39 ? RT_BIT(1) + : 0; + return fSagaw; +} + + +/** + * Returns the maximum supported paging level given the supported adjusted + * guest-address width (SAGAW) field. + * + * @returns The highest paging level supported, 0 if invalid. + * @param fSagaw The CAP_REG.SAGAW value. + */ +static uint8_t vtdCapRegGetMaxPagingLevel(uint8_t fSagaw) +{ + uint8_t const cMaxPagingLevel = fSagaw & RT_BIT(3) ? 5 + : fSagaw & RT_BIT(2) ? 4 + : fSagaw & RT_BIT(1) ? 3 + : 0; + return cMaxPagingLevel; +} + + +/** + * Returns table translation mode's descriptive name. + * + * @returns The descriptive name. + * @param uTtm The RTADDR_REG.TTM value. + */ +static const char* vtdRtaddrRegGetTtmDesc(uint8_t uTtm) +{ + Assert(!(uTtm & 3)); + static const char* s_apszTtmNames[] = + { + "Legacy Mode", + "Scalable Mode", + "Reserved", + "Abort-DMA Mode" + }; + return s_apszTtmNames[uTtm & (RT_ELEMENTS(s_apszTtmNames) - 1)]; +} +#endif /* IN_RING3 */ + + +/** + * Returns whether the interrupt remapping (IR) fault is qualified or not. + * + * @returns @c true if qualified, @c false otherwise. + * @param enmIrFault The interrupt remapping fault condition. + */ +static bool vtdIrFaultIsQualified(VTDIRFAULT enmIrFault) +{ + switch (enmIrFault) + { + case VTDIRFAULT_IRTE_NOT_PRESENT: + case VTDIRFAULT_IRTE_PRESENT_RSVD: + case VTDIRFAULT_IRTE_PRESENT_INVALID: + case VTDIRFAULT_PID_READ_FAILED: + case VTDIRFAULT_PID_RSVD: + return true; + default: + return false; + } +} + + +/** + * Gets the index of the group the register belongs to given its MMIO offset. + * + * @returns The group index. + * @param offReg The MMIO offset of the register. + * @param cbReg The size of the access being made (for bounds checking on + * debug builds). + */ +DECLINLINE(uint8_t) dmarRegGetGroupIndex(uint16_t offReg, uint8_t cbReg) +{ + uint16_t const offLast = offReg + cbReg - 1; + AssertCompile(DMAR_MMIO_GROUP_0_OFF_FIRST == 0); + AssertMsg(DMAR_IS_MMIO_OFF_VALID(offLast), ("off=%#x cb=%u\n", offReg, cbReg)); + return !(offLast < DMAR_MMIO_GROUP_0_OFF_END); +} + + +/** + * Gets the group the register belongs to given its MMIO offset. + * + * @returns Pointer to the first element of the register group. + * @param pThis The shared DMAR device state. + * @param offReg The MMIO offset of the register. + * @param cbReg The size of the access being made (for bounds checking on + * debug builds). + * @param pIdxGroup Where to store the index of the register group the register + * belongs to. + */ +DECLINLINE(uint8_t *) dmarRegGetGroup(PDMAR pThis, uint16_t offReg, uint8_t cbReg, uint8_t *pIdxGroup) +{ + *pIdxGroup = dmarRegGetGroupIndex(offReg, cbReg); + uint8_t *apbRegs[] = { &pThis->abRegs0[0], &pThis->abRegs1[0] }; + return apbRegs[*pIdxGroup]; +} + + +/** + * Const/read-only version of dmarRegGetGroup. + * + * @copydoc dmarRegGetGroup + */ +DECLINLINE(uint8_t const*) dmarRegGetGroupRo(PCDMAR pThis, uint16_t offReg, uint8_t cbReg, uint8_t *pIdxGroup) +{ + *pIdxGroup = dmarRegGetGroupIndex(offReg, cbReg); + uint8_t const *apbRegs[] = { &pThis->abRegs0[0], &pThis->abRegs1[0] }; + return apbRegs[*pIdxGroup]; +} + + +/** + * Writes a 32-bit register with the exactly the supplied value. + * + * @param pThis The shared DMAR device state. + * @param offReg The MMIO offset of the register. + * @param uReg The 32-bit value to write. + */ +static void dmarRegWriteRaw32(PDMAR pThis, uint16_t offReg, uint32_t uReg) +{ + uint8_t idxGroup; + uint8_t *pabRegs = dmarRegGetGroup(pThis, offReg, sizeof(uint32_t), &idxGroup); + NOREF(idxGroup); + *(uint32_t *)(pabRegs + offReg) = uReg; +} + + +/** + * Writes a 64-bit register with the exactly the supplied value. + * + * @param pThis The shared DMAR device state. + * @param offReg The MMIO offset of the register. + * @param uReg The 64-bit value to write. + */ +static void dmarRegWriteRaw64(PDMAR pThis, uint16_t offReg, uint64_t uReg) +{ + uint8_t idxGroup; + uint8_t *pabRegs = dmarRegGetGroup(pThis, offReg, sizeof(uint64_t), &idxGroup); + NOREF(idxGroup); + *(uint64_t *)(pabRegs + offReg) = uReg; +} + + +/** + * Reads a 32-bit register with exactly the value it contains. + * + * @returns The raw register value. + * @param pThis The shared DMAR device state. + * @param offReg The MMIO offset of the register. + */ +static uint32_t dmarRegReadRaw32(PCDMAR pThis, uint16_t offReg) +{ + uint8_t idxGroup; + uint8_t const *pabRegs = dmarRegGetGroupRo(pThis, offReg, sizeof(uint32_t), &idxGroup); + NOREF(idxGroup); + return *(uint32_t *)(pabRegs + offReg); +} + + +/** + * Reads a 64-bit register with exactly the value it contains. + * + * @returns The raw register value. + * @param pThis The shared DMAR device state. + * @param offReg The MMIO offset of the register. + */ +static uint64_t dmarRegReadRaw64(PCDMAR pThis, uint16_t offReg) +{ + uint8_t idxGroup; + uint8_t const *pabRegs = dmarRegGetGroupRo(pThis, offReg, sizeof(uint64_t), &idxGroup); + NOREF(idxGroup); + return *(uint64_t *)(pabRegs + offReg); +} + + +/** + * Reads a 32-bit register with exactly the value it contains along with their + * corresponding masks + * + * @param pThis The shared DMAR device state. + * @param offReg The MMIO offset of the register. + * @param puReg Where to store the raw 32-bit register value. + * @param pfRwMask Where to store the RW mask corresponding to this register. + * @param pfRw1cMask Where to store the RW1C mask corresponding to this register. + */ +static void dmarRegReadRaw32Ex(PCDMAR pThis, uint16_t offReg, uint32_t *puReg, uint32_t *pfRwMask, uint32_t *pfRw1cMask) +{ + uint8_t idxGroup; + uint8_t const *pabRegs = dmarRegGetGroupRo(pThis, offReg, sizeof(uint32_t), &idxGroup); + Assert(idxGroup < RT_ELEMENTS(g_apbRwMasks)); + uint8_t const *pabRwMasks = g_apbRwMasks[idxGroup]; + uint8_t const *pabRw1cMasks = g_apbRw1cMasks[idxGroup]; + *puReg = *(uint32_t *)(pabRegs + offReg); + *pfRwMask = *(uint32_t *)(pabRwMasks + offReg); + *pfRw1cMask = *(uint32_t *)(pabRw1cMasks + offReg); +} + + +/** + * Reads a 64-bit register with exactly the value it contains along with their + * corresponding masks. + * + * @param pThis The shared DMAR device state. + * @param offReg The MMIO offset of the register. + * @param puReg Where to store the raw 64-bit register value. + * @param pfRwMask Where to store the RW mask corresponding to this register. + * @param pfRw1cMask Where to store the RW1C mask corresponding to this register. + */ +static void dmarRegReadRaw64Ex(PCDMAR pThis, uint16_t offReg, uint64_t *puReg, uint64_t *pfRwMask, uint64_t *pfRw1cMask) +{ + uint8_t idxGroup; + uint8_t const *pabRegs = dmarRegGetGroupRo(pThis, offReg, sizeof(uint64_t), &idxGroup); + Assert(idxGroup < RT_ELEMENTS(g_apbRwMasks)); + uint8_t const *pabRwMasks = g_apbRwMasks[idxGroup]; + uint8_t const *pabRw1cMasks = g_apbRw1cMasks[idxGroup]; + *puReg = *(uint64_t *)(pabRegs + offReg); + *pfRwMask = *(uint64_t *)(pabRwMasks + offReg); + *pfRw1cMask = *(uint64_t *)(pabRw1cMasks + offReg); +} + + +/** + * Writes a 32-bit register as it would be when written by software. + * This will preserve read-only bits, mask off reserved bits and clear RW1C bits. + * + * @returns The value that's actually written to the register. + * @param pThis The shared DMAR device state. + * @param offReg The MMIO offset of the register. + * @param uReg The 32-bit value to write. + * @param puPrev Where to store the register value prior to writing. + */ +static uint32_t dmarRegWrite32(PDMAR pThis, uint16_t offReg, uint32_t uReg, uint32_t *puPrev) +{ + /* Read current value from the 32-bit register. */ + uint32_t uCurReg; + uint32_t fRwMask; + uint32_t fRw1cMask; + dmarRegReadRaw32Ex(pThis, offReg, &uCurReg, &fRwMask, &fRw1cMask); + *puPrev = uCurReg; + + uint32_t const fRoBits = uCurReg & ~fRwMask; /* Preserve current read-only and reserved bits. */ + uint32_t const fRwBits = uReg & fRwMask; /* Merge newly written read/write bits. */ + uint32_t const fRw1cBits = uReg & fRw1cMask; /* Clear 1s written to RW1C bits. */ + uint32_t const uNewReg = (fRoBits | fRwBits) & ~fRw1cBits; + + /* Write new value to the 32-bit register. */ + dmarRegWriteRaw32(pThis, offReg, uNewReg); + return uNewReg; +} + + +/** + * Writes a 64-bit register as it would be when written by software. + * This will preserve read-only bits, mask off reserved bits and clear RW1C bits. + * + * @returns The value that's actually written to the register. + * @param pThis The shared DMAR device state. + * @param offReg The MMIO offset of the register. + * @param uReg The 64-bit value to write. + * @param puPrev Where to store the register value prior to writing. + */ +static uint64_t dmarRegWrite64(PDMAR pThis, uint16_t offReg, uint64_t uReg, uint64_t *puPrev) +{ + /* Read current value from the 64-bit register. */ + uint64_t uCurReg; + uint64_t fRwMask; + uint64_t fRw1cMask; + dmarRegReadRaw64Ex(pThis, offReg, &uCurReg, &fRwMask, &fRw1cMask); + *puPrev = uCurReg; + + uint64_t const fRoBits = uCurReg & ~fRwMask; /* Preserve current read-only and reserved bits. */ + uint64_t const fRwBits = uReg & fRwMask; /* Merge newly written read/write bits. */ + uint64_t const fRw1cBits = uReg & fRw1cMask; /* Clear 1s written to RW1C bits. */ + uint64_t const uNewReg = (fRoBits | fRwBits) & ~fRw1cBits; + + /* Write new value to the 64-bit register. */ + dmarRegWriteRaw64(pThis, offReg, uNewReg); + return uNewReg; +} + + +/** + * Reads a 32-bit register as it would be when read by software. + * + * @returns The register value. + * @param pThis The shared DMAR device state. + * @param offReg The MMIO offset of the register. + */ +static uint32_t dmarRegRead32(PCDMAR pThis, uint16_t offReg) +{ + return dmarRegReadRaw32(pThis, offReg); +} + + +/** + * Reads a 64-bit register as it would be when read by software. + * + * @returns The register value. + * @param pThis The shared DMAR device state. + * @param offReg The MMIO offset of the register. + */ +static uint64_t dmarRegRead64(PCDMAR pThis, uint16_t offReg) +{ + return dmarRegReadRaw64(pThis, offReg); +} + + +/** + * Modifies a 32-bit register. + * + * @param pThis The shared DMAR device state. + * @param offReg The MMIO offset of the register. + * @param fAndMask The AND mask (applied first). + * @param fOrMask The OR mask. + * @remarks This does NOT apply RO or RW1C masks while modifying the + * register. + */ +static void dmarRegChangeRaw32(PDMAR pThis, uint16_t offReg, uint32_t fAndMask, uint32_t fOrMask) +{ + uint32_t uReg = dmarRegReadRaw32(pThis, offReg); + uReg = (uReg & fAndMask) | fOrMask; + dmarRegWriteRaw32(pThis, offReg, uReg); +} + + +/** + * Modifies a 64-bit register. + * + * @param pThis The shared DMAR device state. + * @param offReg The MMIO offset of the register. + * @param fAndMask The AND mask (applied first). + * @param fOrMask The OR mask. + * @remarks This does NOT apply RO or RW1C masks while modifying the + * register. + */ +static void dmarRegChangeRaw64(PDMAR pThis, uint16_t offReg, uint64_t fAndMask, uint64_t fOrMask) +{ + uint64_t uReg = dmarRegReadRaw64(pThis, offReg); + uReg = (uReg & fAndMask) | fOrMask; + dmarRegWriteRaw64(pThis, offReg, uReg); +} + + +/** + * Checks if the invalidation-queue is empty. + * + * Extended version which optionally returns the current queue head and tail + * offsets. + * + * @returns @c true if empty, @c false otherwise. + * @param pThis The shared DMAR device state. + * @param poffQh Where to store the queue head offset. Optional, can be NULL. + * @param poffQt Where to store the queue tail offset. Optional, can be NULL. + */ +static bool dmarInvQueueIsEmptyEx(PCDMAR pThis, uint32_t *poffQh, uint32_t *poffQt) +{ + /* Read only the low-32 bits of the queue head and queue tail as high bits are all RsvdZ.*/ + uint32_t const uIqtReg = dmarRegReadRaw32(pThis, VTD_MMIO_OFF_IQT_REG); + uint32_t const uIqhReg = dmarRegReadRaw32(pThis, VTD_MMIO_OFF_IQH_REG); + + /* Don't bother masking QT, QH since other bits are RsvdZ. */ + Assert(!(uIqtReg & ~VTD_BF_IQT_REG_QT_MASK)); + Assert(!(uIqhReg & ~VTD_BF_IQH_REG_QH_MASK)); + if (poffQh) + *poffQh = uIqhReg; + if (poffQt) + *poffQt = uIqtReg; + return uIqtReg == uIqhReg; +} + + +/** + * Checks if the invalidation-queue is empty. + * + * @returns @c true if empty, @c false otherwise. + * @param pThis The shared DMAR device state. + */ +static bool dmarInvQueueIsEmpty(PCDMAR pThis) +{ + return dmarInvQueueIsEmptyEx(pThis, NULL /* poffQh */, NULL /* poffQt */); +} + + +/** + * Checks if the invalidation-queue is capable of processing requests. + * + * @returns @c true if the invalidation-queue can process requests, @c false + * otherwise. + * @param pThis The shared DMAR device state. + */ +static bool dmarInvQueueCanProcessRequests(PCDMAR pThis) +{ + /* Check if queued-invalidation is enabled. */ + uint32_t const uGstsReg = dmarRegReadRaw32(pThis, VTD_MMIO_OFF_GSTS_REG); + if (uGstsReg & VTD_BF_GSTS_REG_QIES_MASK) + { + /* Check if there are no invalidation-queue or timeout errors. */ + uint32_t const uFstsReg = dmarRegReadRaw32(pThis, VTD_MMIO_OFF_FSTS_REG); + if (!(uFstsReg & (VTD_BF_FSTS_REG_IQE_MASK | VTD_BF_FSTS_REG_ITE_MASK))) + return true; + } + return false; +} + + +/** + * Wakes up the invalidation-queue thread if there are requests to be processed. + * + * @param pDevIns The IOMMU device instance. + */ +static void dmarInvQueueThreadWakeUpIfNeeded(PPDMDEVINS pDevIns) +{ + PDMAR pThis = PDMDEVINS_2_DATA(pDevIns, PDMAR); + PCDMARCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PCDMARCC); + LogFlowFunc(("\n")); + + DMAR_ASSERT_LOCK_IS_OWNER(pDevIns, pThisCC); + + if ( dmarInvQueueCanProcessRequests(pThis) + && !dmarInvQueueIsEmpty(pThis)) + { + Log4Func(("Signaling the invalidation-queue thread\n")); + PDMDevHlpSUPSemEventSignal(pDevIns, pThis->hEvtInvQueue); + } +} + + +/** + * Raises an event on behalf of the DMAR. + * + * These are events that are generated by the DMAR itself (like faults and + * invalidation completion notifications). + * + * @param pDevIns The IOMMU device instance. + * @param enmEventType The DMAR event type. + * + * @remarks The DMAR lock must be held while calling this function. + */ +static void dmarEventRaiseInterrupt(PPDMDEVINS pDevIns, DMAREVENTTYPE enmEventType) +{ + uint16_t offCtlReg; + uint32_t fIntrMaskedMask; + uint32_t fIntrPendingMask; + uint16_t offMsiAddrLoReg; + uint16_t offMsiAddrHiReg; + uint16_t offMsiDataReg; + switch (enmEventType) + { + case DMAREVENTTYPE_INV_COMPLETE: + { + offCtlReg = VTD_MMIO_OFF_IECTL_REG; + fIntrMaskedMask = VTD_BF_IECTL_REG_IM_MASK; + fIntrPendingMask = VTD_BF_IECTL_REG_IP_MASK; + offMsiAddrLoReg = VTD_MMIO_OFF_IEADDR_REG; + offMsiAddrHiReg = VTD_MMIO_OFF_IEUADDR_REG; + offMsiDataReg = VTD_MMIO_OFF_IEDATA_REG; + break; + } + + case DMAREVENTTYPE_FAULT: + { + offCtlReg = VTD_MMIO_OFF_FECTL_REG; + fIntrMaskedMask = VTD_BF_FECTL_REG_IM_MASK; + fIntrPendingMask = VTD_BF_FECTL_REG_IP_MASK; + offMsiAddrLoReg = VTD_MMIO_OFF_FEADDR_REG; + offMsiAddrHiReg = VTD_MMIO_OFF_FEUADDR_REG; + offMsiDataReg = VTD_MMIO_OFF_FEDATA_REG; + break; + } + + default: + { + /* Shouldn't ever happen. */ + AssertMsgFailedReturnVoid(("DMAR event type %#x unknown!\n", enmEventType)); + } + } + + /* Check if software has masked the interrupt. */ + PDMAR pThis = PDMDEVINS_2_DATA(pDevIns, PDMAR); + uint32_t uCtlReg = dmarRegReadRaw32(pThis, offCtlReg); + if (!(uCtlReg & fIntrMaskedMask)) + { + /* + * Interrupt is unmasked, raise it. + * Interrupts generated by the DMAR have trigger mode and level as 0. + * See Intel spec. 5.1.6 "Remapping Hardware Event Interrupt Programming". + */ + MSIMSG Msi; + Msi.Addr.au32[0] = dmarRegReadRaw32(pThis, offMsiAddrLoReg); + Msi.Addr.au32[1] = (pThis->fExtCapReg & VTD_BF_ECAP_REG_EIM_MASK) ? dmarRegReadRaw32(pThis, offMsiAddrHiReg) : 0; + Msi.Data.u32 = dmarRegReadRaw32(pThis, offMsiDataReg); + Assert(Msi.Data.n.u1Level == 0); + Assert(Msi.Data.n.u1TriggerMode == 0); + + PCDMARCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PCDMARCC); + pThisCC->CTX_SUFF(pIommuHlp)->pfnSendMsi(pDevIns, &Msi, 0 /* uTagSrc */); + + /* Clear interrupt pending bit. */ + uCtlReg &= ~fIntrPendingMask; + dmarRegWriteRaw32(pThis, offCtlReg, uCtlReg); + } + else + { + /* Interrupt is masked, set the interrupt pending bit. */ + uCtlReg |= fIntrPendingMask; + dmarRegWriteRaw32(pThis, offCtlReg, uCtlReg); + } +} + + +/** + * Raises an interrupt in response to a fault event. + * + * @param pDevIns The IOMMU device instance. + * + * @remarks This assumes the caller has already set the required status bits in the + * FSTS_REG (namely one or more of PPF, PFO, IQE, ICE or ITE bits). + */ +static void dmarFaultEventRaiseInterrupt(PPDMDEVINS pDevIns) +{ + PCDMARCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PCDMARCC); + DMAR_ASSERT_LOCK_IS_OWNER(pDevIns, pThisCC); + +#ifdef VBOX_STRICT + { + PCDMAR pThis = PDMDEVINS_2_DATA(pDevIns, PCDMAR); + uint32_t const uFstsReg = dmarRegReadRaw32(pThis, VTD_MMIO_OFF_FSTS_REG); + uint32_t const fFaultMask = VTD_BF_FSTS_REG_PPF_MASK | VTD_BF_FSTS_REG_PFO_MASK + /* | VTD_BF_FSTS_REG_APF_MASK | VTD_BF_FSTS_REG_AFO_MASK */ /* AFL not supported */ + /* | VTD_BF_FSTS_REG_ICE_MASK | VTD_BF_FSTS_REG_ITE_MASK */ /* Device-TLBs not supported */ + | VTD_BF_FSTS_REG_IQE_MASK; + Assert(uFstsReg & fFaultMask); + } +#endif + dmarEventRaiseInterrupt(pDevIns, DMAREVENTTYPE_FAULT); +} + + +#ifdef IN_RING3 +/** + * Raises an interrupt in response to an invalidation (complete) event. + * + * @param pDevIns The IOMMU device instance. + */ +static void dmarR3InvEventRaiseInterrupt(PPDMDEVINS pDevIns) +{ + PDMAR pThis = PDMDEVINS_2_DATA(pDevIns, PDMAR); + PCDMARCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PCDMARCC); + DMAR_ASSERT_LOCK_IS_OWNER(pDevIns, pThisCC); + + uint32_t const uIcsReg = dmarRegReadRaw32(pThis, VTD_MMIO_OFF_ICS_REG); + if (!(uIcsReg & VTD_BF_ICS_REG_IWC_MASK)) + { + dmarRegChangeRaw32(pThis, VTD_MMIO_OFF_ICS_REG, UINT32_MAX, VTD_BF_ICS_REG_IWC_MASK); + dmarEventRaiseInterrupt(pDevIns, DMAREVENTTYPE_INV_COMPLETE); + } +} +#endif /* IN_RING3 */ + + +/** + * Checks if a primary fault can be recorded. + * + * @returns @c true if the fault can be recorded, @c false otherwise. + * @param pDevIns The IOMMU device instance. + * @param pThis The shared DMAR device state. + * + * @remarks Warning: This function has side-effects wrt the DMAR register state. Do + * NOT call it unless there is a fault condition! + */ +static bool dmarPrimaryFaultCanRecord(PPDMDEVINS pDevIns, PDMAR pThis) +{ + PCDMARCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PCDMARCC); + DMAR_ASSERT_LOCK_IS_OWNER(pDevIns, pThisCC); + + uint32_t uFstsReg = dmarRegReadRaw32(pThis, VTD_MMIO_OFF_FSTS_REG); + if (uFstsReg & VTD_BF_FSTS_REG_PFO_MASK) + return false; + + /* + * If we add more FRCD registers, we'll have to loop through them here. + * Since we support only one FRCD_REG, we don't support "compression of multiple faults", + * nor do we need to increment FRI. + * + * See Intel VT-d spec. 7.2.1 "Primary Fault Logging". + */ + AssertCompile(DMAR_FRCD_REG_COUNT == 1); + uint64_t const uFrcdRegHi = dmarRegReadRaw64(pThis, DMAR_MMIO_OFF_FRCD_HI_REG); + if (uFrcdRegHi & VTD_BF_1_FRCD_REG_F_MASK) + { + uFstsReg |= VTD_BF_FSTS_REG_PFO_MASK; + dmarRegWriteRaw32(pThis, VTD_MMIO_OFF_FSTS_REG, uFstsReg); + return false; + } + + return true; +} + + +/** + * Records a primary fault. + * + * @param pDevIns The IOMMU device instance. + * @param uFrcdHi The FRCD_HI_REG value for this fault. + * @param uFrcdLo The FRCD_LO_REG value for this fault. + */ +static void dmarPrimaryFaultRecord(PPDMDEVINS pDevIns, uint64_t uFrcdHi, uint64_t uFrcdLo) +{ + PDMAR pThis = PDMDEVINS_2_DATA(pDevIns, PDMAR); + PCDMARCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PCDMARCC); + + DMAR_LOCK(pDevIns, pThisCC); + + /* We don't support advance fault logging. */ + Assert(!(dmarRegRead32(pThis, VTD_MMIO_OFF_GSTS_REG) & VTD_BF_GSTS_REG_AFLS_MASK)); + + if (dmarPrimaryFaultCanRecord(pDevIns, pThis)) + { + /* Update the fault recording registers with the fault information. */ + dmarRegWriteRaw64(pThis, DMAR_MMIO_OFF_FRCD_HI_REG, uFrcdHi); + dmarRegWriteRaw64(pThis, DMAR_MMIO_OFF_FRCD_LO_REG, uFrcdLo); + + /* Set the Pending Primary Fault (PPF) field in the status register. */ + dmarRegChangeRaw32(pThis, VTD_MMIO_OFF_FSTS_REG, UINT32_MAX, VTD_BF_FSTS_REG_PPF_MASK); + + /* Raise interrupt if necessary. */ + dmarFaultEventRaiseInterrupt(pDevIns); + } + + DMAR_UNLOCK(pDevIns, pThisCC); +} + + +/** + * Records an interrupt request fault. + * + * @param pDevIns The IOMMU device instance. + * @param enmDiag The diagnostic reason. + * @param idDevice The device ID (bus, device, function). + * @param idxIntr The interrupt index. + * @param pIrte The IRTE that caused this fault. Can be NULL if the fault is + * not qualified. + */ +static void dmarIrFaultRecord(PPDMDEVINS pDevIns, DMARDIAG enmDiag, uint16_t idDevice, uint16_t idxIntr, PCVTD_IRTE_T pIrte) +{ + /* + * Update the diagnostic reason (even if software wants to supress faults). + */ + PDMAR pThis = PDMDEVINS_2_DATA(pDevIns, PDMAR); + pThis->enmDiag = enmDiag; + + /* + * Figure out the fault reason to report to software from our diagnostic code. + * The case labels below are sorted alphabetically for convenience. + */ + VTDIRFAULT enmIrFault; + switch (enmDiag) + { + case kDmarDiag_Ir_Cfi_Blocked: enmIrFault = VTDIRFAULT_CFI_BLOCKED; break; + case kDmarDiag_Ir_Rfi_Intr_Index_Invalid: enmIrFault = VTDIRFAULT_INTR_INDEX_INVALID; break; + case kDmarDiag_Ir_Rfi_Irte_Mode_Invalid: enmIrFault = VTDIRFAULT_IRTE_PRESENT_RSVD; break; + case kDmarDiag_Ir_Rfi_Irte_Not_Present: enmIrFault = VTDIRFAULT_IRTE_NOT_PRESENT; break; + case kDmarDiag_Ir_Rfi_Irte_Read_Failed: enmIrFault = VTDIRFAULT_IRTE_READ_FAILED; break; + case kDmarDiag_Ir_Rfi_Irte_Rsvd: + case kDmarDiag_Ir_Rfi_Irte_Svt_Bus: + case kDmarDiag_Ir_Rfi_Irte_Svt_Masked: + case kDmarDiag_Ir_Rfi_Irte_Svt_Rsvd: enmIrFault = VTDIRFAULT_IRTE_PRESENT_RSVD; break; + case kDmarDiag_Ir_Rfi_Rsvd: enmIrFault = VTDIRFAULT_REMAPPABLE_INTR_RSVD; break; + + /* Shouldn't ever happen. */ + default: + { + AssertLogRelMsgFailedReturnVoid(("%s: Invalid interrupt remapping fault diagnostic code %#x\n", DMAR_LOG_PFX, + enmDiag)); + } + } + + /* + * Qualified faults are those that can be suppressed by software using the FPD bit + * in the interrupt-remapping table entry. + */ + bool fFpd; + bool const fQualifiedFault = vtdIrFaultIsQualified(enmIrFault); + if (fQualifiedFault) + { + AssertReturnVoid(pIrte); + fFpd = RT_BOOL(pIrte->au64[0] & VTD_BF_0_IRTE_FPD_MASK); + } + else + fFpd = false; + + if (!fFpd) + { + /* Construct and record the error. */ + uint64_t const uFrcdHi = RT_BF_MAKE(VTD_BF_1_FRCD_REG_SID, idDevice) + | RT_BF_MAKE(VTD_BF_1_FRCD_REG_FR, enmIrFault) + | RT_BF_MAKE(VTD_BF_1_FRCD_REG_F, 1); + uint64_t const uFrcdLo = (uint64_t)idxIntr << 48; + dmarPrimaryFaultRecord(pDevIns, uFrcdHi, uFrcdLo); + } +} + + +/** + * Records an address translation fault. + * + * @param pDevIns The IOMMU device instance. + * @param enmDiag The diagnostic reason. + * @param pMemReqIn The DMA memory request input. + * @param pMemReqAux The DMA memory request auxiliary info. + */ +static void dmarAtFaultRecord(PPDMDEVINS pDevIns, DMARDIAG enmDiag, PCDMARMEMREQIN pMemReqIn, PCDMARMEMREQAUX pMemReqAux) +{ + /* + * Update the diagnostic reason (even if software wants to supress faults). + */ + PDMAR pThis = PDMDEVINS_2_DATA(pDevIns, PDMAR); + pThis->enmDiag = enmDiag; + + /* + * Qualified faults are those that can be suppressed by software using the FPD bit + * in the context entry, scalable-mode context entry etc. + */ + if (!pMemReqAux->fFpd) + { + /* + * Figure out the fault reason to report to software from our diagnostic code. + * The case labels below are sorted alphabetically for convenience. + */ + VTDATFAULT enmAtFault; + bool const fLm = pMemReqAux->fTtm == VTD_TTM_LEGACY_MODE; + switch (enmDiag) + { + /* LM (Legacy Mode) faults. */ + case kDmarDiag_At_Lm_CtxEntry_Not_Present: enmAtFault = VTDATFAULT_LCT_2; break; + case kDmarDiag_At_Lm_CtxEntry_Read_Failed: enmAtFault = VTDATFAULT_LCT_1; break; + case kDmarDiag_At_Lm_CtxEntry_Rsvd: enmAtFault = VTDATFAULT_LCT_3; break; + case kDmarDiag_At_Lm_Pt_At_Block: enmAtFault = VTDATFAULT_LCT_5; break; + case kDmarDiag_At_Lm_Pt_Aw_Invalid: enmAtFault = VTDATFAULT_LGN_1_3; break; + case kDmarDiag_At_Lm_RootEntry_Not_Present: enmAtFault = VTDATFAULT_LRT_2; break; + case kDmarDiag_At_Lm_RootEntry_Read_Failed: enmAtFault = VTDATFAULT_LRT_1; break; + case kDmarDiag_At_Lm_RootEntry_Rsvd: enmAtFault = VTDATFAULT_LRT_3; break; + case kDmarDiag_At_Lm_Tt_Invalid: enmAtFault = VTDATFAULT_LCT_4_2; break; + case kDmarDiag_At_Lm_Ut_At_Block: enmAtFault = VTDATFAULT_LCT_5; break; + case kDmarDiag_At_Lm_Ut_Aw_Invalid: enmAtFault = VTDATFAULT_LCT_4_1; break; + + /* RTA (Root Table Address) faults. */ + case kDmarDiag_At_Rta_Adms_Not_Supported: enmAtFault = VTDATFAULT_RTA_1_1; break; + case kDmarDiag_At_Rta_Rsvd: enmAtFault = VTDATFAULT_RTA_1_2; break; + case kDmarDiag_At_Rta_Smts_Not_Supported: enmAtFault = VTDATFAULT_RTA_1_3; break; + + /* XM (Legacy mode or Scalable Mode) faults. */ + case kDmarDiag_At_Xm_AddrIn_Invalid: enmAtFault = fLm ? VTDATFAULT_LGN_1_1 : VTDATFAULT_SGN_5; break; + case kDmarDiag_At_Xm_AddrOut_Invalid: enmAtFault = fLm ? VTDATFAULT_LGN_4 : VTDATFAULT_SGN_8; break; + case kDmarDiag_At_Xm_Perm_Read_Denied: enmAtFault = fLm ? VTDATFAULT_LGN_3 : VTDATFAULT_SGN_7; break; + case kDmarDiag_At_Xm_Perm_Write_Denied: enmAtFault = fLm ? VTDATFAULT_LGN_2 : VTDATFAULT_SGN_6; break; + case kDmarDiag_At_Xm_Pte_Not_Present: + case kDmarDiag_At_Xm_Pte_Rsvd: enmAtFault = fLm ? VTDATFAULT_LSL_2 : VTDATFAULT_SSL_2; break; + case kDmarDiag_At_Xm_Pte_Sllps_Invalid: enmAtFault = fLm ? VTDATFAULT_LSL_2 : VTDATFAULT_SSL_3; break; + case kDmarDiag_At_Xm_Read_Pte_Failed: enmAtFault = fLm ? VTDATFAULT_LSL_1 : VTDATFAULT_SSL_1; break; + case kDmarDiag_At_Xm_Slpptr_Read_Failed: enmAtFault = fLm ? VTDATFAULT_LCT_4_3 : VTDATFAULT_SSL_4; break; + + /* Shouldn't ever happen. */ + default: + { + AssertLogRelMsgFailedReturnVoid(("%s: Invalid address translation fault diagnostic code %#x\n", + DMAR_LOG_PFX, enmDiag)); + } + } + + /* Construct and record the error. */ + uint16_t const idDevice = pMemReqIn->idDevice; + uint8_t const fType1 = pMemReqIn->enmReqType & RT_BIT(1); + uint8_t const fType2 = pMemReqIn->enmReqType & RT_BIT(0); + uint8_t const fExec = pMemReqIn->AddrRange.fPerm & DMAR_PERM_EXE; + uint8_t const fPriv = pMemReqIn->AddrRange.fPerm & DMAR_PERM_PRIV; + bool const fHasPasid = PCIPASID_IS_VALID(pMemReqIn->Pasid); + uint32_t const uPasid = PCIPASID_VAL(pMemReqIn->Pasid); + PCIADDRTYPE const enmAt = pMemReqIn->enmAddrType; + + uint64_t const uFrcdHi = RT_BF_MAKE(VTD_BF_1_FRCD_REG_SID, idDevice) + | RT_BF_MAKE(VTD_BF_1_FRCD_REG_T2, fType2) + | RT_BF_MAKE(VTD_BF_1_FRCD_REG_PP, fHasPasid) + | RT_BF_MAKE(VTD_BF_1_FRCD_REG_EXE, fExec) + | RT_BF_MAKE(VTD_BF_1_FRCD_REG_PRIV, fPriv) + | RT_BF_MAKE(VTD_BF_1_FRCD_REG_FR, enmAtFault) + | RT_BF_MAKE(VTD_BF_1_FRCD_REG_PV, uPasid) + | RT_BF_MAKE(VTD_BF_1_FRCD_REG_AT, enmAt) + | RT_BF_MAKE(VTD_BF_1_FRCD_REG_T1, fType1) + | RT_BF_MAKE(VTD_BF_1_FRCD_REG_F, 1); + uint64_t const uFrcdLo = pMemReqIn->AddrRange.uAddr & X86_PAGE_BASE_MASK; + dmarPrimaryFaultRecord(pDevIns, uFrcdHi, uFrcdLo); + } +} + + +/** + * Records an IQE fault. + * + * @param pDevIns The IOMMU device instance. + * @param enmIqei The IQE information. + * @param enmDiag The diagnostic reason. + */ +static void dmarIqeFaultRecord(PPDMDEVINS pDevIns, DMARDIAG enmDiag, VTDIQEI enmIqei) +{ + PDMAR pThis = PDMDEVINS_2_DATA(pDevIns, PDMAR); + PCDMARCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PCDMARCC); + + DMAR_LOCK(pDevIns, pThisCC); + + /* Update the diagnostic reason. */ + pThis->enmDiag = enmDiag; + + /* Set the error bit. */ + uint32_t const fIqe = RT_BF_MAKE(VTD_BF_FSTS_REG_IQE, 1); + dmarRegChangeRaw32(pThis, VTD_MMIO_OFF_FSTS_REG, UINT32_MAX, fIqe); + + /* Set the error information. */ + uint64_t const fIqei = RT_BF_MAKE(VTD_BF_IQERCD_REG_IQEI, enmIqei); + dmarRegChangeRaw64(pThis, VTD_MMIO_OFF_IQERCD_REG, UINT64_MAX, fIqei); + + dmarFaultEventRaiseInterrupt(pDevIns); + + DMAR_UNLOCK(pDevIns, pThisCC); +} + + +/** + * Handles writes to GCMD_REG. + * + * @returns Strict VBox status code. + * @param pDevIns The IOMMU device instance. + * @param uGcmdReg The value written to GCMD_REG. + */ +static VBOXSTRICTRC dmarGcmdRegWrite(PPDMDEVINS pDevIns, uint32_t uGcmdReg) +{ + PDMAR pThis = PDMDEVINS_2_DATA(pDevIns, PDMAR); + uint32_t const uGstsReg = dmarRegReadRaw32(pThis, VTD_MMIO_OFF_GSTS_REG); + uint32_t const fChanged = uGstsReg ^ uGcmdReg; + uint64_t const fExtCapReg = pThis->fExtCapReg; + + /* Queued-invalidation. */ + if ( (fExtCapReg & VTD_BF_ECAP_REG_QI_MASK) + && (fChanged & VTD_BF_GCMD_REG_QIE_MASK)) + { + if (uGcmdReg & VTD_BF_GCMD_REG_QIE_MASK) + { + dmarRegChangeRaw32(pThis, VTD_MMIO_OFF_GSTS_REG, UINT32_MAX, VTD_BF_GSTS_REG_QIES_MASK); + dmarInvQueueThreadWakeUpIfNeeded(pDevIns); + } + else + { + dmarRegChangeRaw32(pThis, VTD_MMIO_OFF_GSTS_REG, ~VTD_BF_GSTS_REG_QIES_MASK, 0 /* fOrMask */); + dmarRegWriteRaw32(pThis, VTD_MMIO_OFF_IQH_REG, 0); + } + } + + if (fExtCapReg & VTD_BF_ECAP_REG_IR_MASK) + { + /* Set Interrupt Remapping Table Pointer (SIRTP). */ + if (uGcmdReg & VTD_BF_GCMD_REG_SIRTP_MASK) + { + /** @todo Perform global invalidation of all interrupt-entry cache when ESIRTPS is + * supported. */ + pThis->uIrtaReg = dmarRegReadRaw64(pThis, VTD_MMIO_OFF_IRTA_REG); + dmarRegChangeRaw32(pThis, VTD_MMIO_OFF_GSTS_REG, UINT32_MAX, VTD_BF_GSTS_REG_IRTPS_MASK); + } + + /* Interrupt remapping. */ + if (fChanged & VTD_BF_GCMD_REG_IRE_MASK) + { + if (uGcmdReg & VTD_BF_GCMD_REG_IRE_MASK) + dmarRegChangeRaw32(pThis, VTD_MMIO_OFF_GSTS_REG, UINT32_MAX, VTD_BF_GSTS_REG_IRES_MASK); + else + dmarRegChangeRaw32(pThis, VTD_MMIO_OFF_GSTS_REG, ~VTD_BF_GSTS_REG_IRES_MASK, 0 /* fOrMask */); + } + + /* Compatibility format interrupts. */ + if (fChanged & VTD_BF_GCMD_REG_CFI_MASK) + { + if (uGcmdReg & VTD_BF_GCMD_REG_CFI_MASK) + dmarRegChangeRaw32(pThis, VTD_MMIO_OFF_GSTS_REG, UINT32_MAX, VTD_BF_GSTS_REG_CFIS_MASK); + else + dmarRegChangeRaw32(pThis, VTD_MMIO_OFF_GSTS_REG, ~VTD_BF_GSTS_REG_CFIS_MASK, 0 /* fOrMask */); + } + } + + /* Set Root Table Pointer (SRTP). */ + if (uGcmdReg & VTD_BF_GCMD_REG_SRTP_MASK) + { + /** @todo Perform global invalidation of all remapping translation caches when + * ESRTPS is supported. */ + pThis->uRtaddrReg = dmarRegReadRaw64(pThis, VTD_MMIO_OFF_RTADDR_REG); + dmarRegChangeRaw32(pThis, VTD_MMIO_OFF_GSTS_REG, UINT32_MAX, VTD_BF_GSTS_REG_RTPS_MASK); + } + + /* Translation (DMA remapping). */ + if (fChanged & VTD_BF_GCMD_REG_TE_MASK) + { + if (uGcmdReg & VTD_BF_GCMD_REG_TE_MASK) + dmarRegChangeRaw32(pThis, VTD_MMIO_OFF_GSTS_REG, UINT32_MAX, VTD_BF_GSTS_REG_TES_MASK); + else + dmarRegChangeRaw32(pThis, VTD_MMIO_OFF_GSTS_REG, ~VTD_BF_GSTS_REG_TES_MASK, 0 /* fOrMask */); + } + + return VINF_SUCCESS; +} + + +/** + * Handles writes to CCMD_REG. + * + * @returns Strict VBox status code. + * @param pDevIns The IOMMU device instance. + * @param offReg The MMIO register offset. + * @param cbReg The size of the MMIO access (in bytes). + * @param uCcmdReg The value written to CCMD_REG. + */ +static VBOXSTRICTRC dmarCcmdRegWrite(PPDMDEVINS pDevIns, uint16_t offReg, uint8_t cbReg, uint64_t uCcmdReg) +{ + /* At present, we only care about responding to high 32-bits writes, low 32-bits are data. */ + if (offReg + cbReg > VTD_MMIO_OFF_CCMD_REG + 4) + { + /* Check if we need to invalidate the context-context. */ + bool const fIcc = RT_BF_GET(uCcmdReg, VTD_BF_CCMD_REG_ICC); + if (fIcc) + { + PDMAR pThis = PDMDEVINS_2_DATA(pDevIns, PDMAR); + uint8_t const uMajorVersion = RT_BF_GET(pThis->uVerReg, VTD_BF_VER_REG_MAX); + if (uMajorVersion < 6) + { + /* Register-based invalidation can only be used when queued-invalidations are not enabled. */ + uint32_t const uGstsReg = dmarRegReadRaw32(pThis, VTD_MMIO_OFF_GSTS_REG); + if (!(uGstsReg & VTD_BF_GSTS_REG_QIES_MASK)) + { + /* Verify table translation mode is legacy. */ + uint8_t const fTtm = RT_BF_GET(pThis->uRtaddrReg, VTD_BF_RTADDR_REG_TTM); + if (fTtm == VTD_TTM_LEGACY_MODE) + { + /** @todo Invalidate. */ + return VINF_SUCCESS; + } + pThis->enmDiag = kDmarDiag_CcmdReg_Ttm_Invalid; + } + else + pThis->enmDiag = kDmarDiag_CcmdReg_Qi_Enabled; + } + else + pThis->enmDiag = kDmarDiag_CcmdReg_Not_Supported; + dmarRegChangeRaw64(pThis, VTD_MMIO_OFF_GSTS_REG, ~VTD_BF_CCMD_REG_CAIG_MASK, 0 /* fOrMask */); + } + } + return VINF_SUCCESS; +} + + +/** + * Handles writes to FECTL_REG. + * + * @returns Strict VBox status code. + * @param pDevIns The IOMMU device instance. + * @param uFectlReg The value written to FECTL_REG. + */ +static VBOXSTRICTRC dmarFectlRegWrite(PPDMDEVINS pDevIns, uint32_t uFectlReg) +{ + /* + * If software unmasks the interrupt when the interrupt is pending, we must raise + * the interrupt now (which will consequently clear the interrupt pending (IP) bit). + */ + if ( (uFectlReg & VTD_BF_FECTL_REG_IP_MASK) + && ~(uFectlReg & VTD_BF_FECTL_REG_IM_MASK)) + dmarEventRaiseInterrupt(pDevIns, DMAREVENTTYPE_FAULT); + return VINF_SUCCESS; +} + + +/** + * Handles writes to FSTS_REG. + * + * @returns Strict VBox status code. + * @param pDevIns The IOMMU device instance. + * @param uFstsReg The value written to FSTS_REG. + * @param uPrev The value in FSTS_REG prior to writing it. + */ +static VBOXSTRICTRC dmarFstsRegWrite(PPDMDEVINS pDevIns, uint32_t uFstsReg, uint32_t uPrev) +{ + /* + * If software clears other status bits in FSTS_REG (pertaining to primary fault logging), + * the interrupt pending (IP) bit must be cleared. + * + * See Intel VT-d spec. 10.4.10 "Fault Event Control Register". + */ + uint32_t const fChanged = uPrev ^ uFstsReg; + if (fChanged & ( VTD_BF_FSTS_REG_ICE_MASK | VTD_BF_FSTS_REG_ITE_MASK + | VTD_BF_FSTS_REG_IQE_MASK | VTD_BF_FSTS_REG_PFO_MASK)) + { + PDMAR pThis = PDMDEVINS_2_DATA(pDevIns, PDMAR); + dmarRegChangeRaw32(pThis, VTD_MMIO_OFF_FECTL_REG, ~VTD_BF_FECTL_REG_IP_MASK, 0 /* fOrMask */); + } + return VINF_SUCCESS; +} + + +/** + * Handles writes to IQT_REG. + * + * @returns Strict VBox status code. + * @param pDevIns The IOMMU device instance. + * @param offReg The MMIO register offset. + * @param uIqtReg The value written to IQT_REG. + */ +static VBOXSTRICTRC dmarIqtRegWrite(PPDMDEVINS pDevIns, uint16_t offReg, uint64_t uIqtReg) +{ + /* We only care about the low 32-bits, high 32-bits are reserved. */ + Assert(offReg == VTD_MMIO_OFF_IQT_REG); + PDMAR pThis = PDMDEVINS_2_DATA(pDevIns, PDMAR); + + /* Paranoia. */ + Assert(!(uIqtReg & ~VTD_BF_IQT_REG_QT_MASK)); + + uint32_t const offQt = uIqtReg; + uint64_t const uIqaReg = dmarRegReadRaw64(pThis, VTD_MMIO_OFF_IQA_REG); + uint8_t const fDw = RT_BF_GET(uIqaReg, VTD_BF_IQA_REG_DW); + + /* If the descriptor width is 256-bits, the queue tail offset must be aligned accordingly. */ + if ( fDw != VTD_IQA_REG_DW_256_BIT + || !(offQt & RT_BIT(4))) + dmarInvQueueThreadWakeUpIfNeeded(pDevIns); + else + { + /* Hardware treats bit 4 as RsvdZ in this situation, so clear it. */ + dmarRegChangeRaw32(pThis, offReg, ~RT_BIT(4), 0 /* fOrMask */); + dmarIqeFaultRecord(pDevIns, kDmarDiag_IqtReg_Qt_Not_Aligned, VTDIQEI_QUEUE_TAIL_MISALIGNED); + } + return VINF_SUCCESS; +} + + +/** + * Handles writes to IQA_REG. + * + * @returns Strict VBox status code. + * @param pDevIns The IOMMU device instance. + * @param offReg The MMIO register offset. + * @param uIqaReg The value written to IQA_REG. + */ +static VBOXSTRICTRC dmarIqaRegWrite(PPDMDEVINS pDevIns, uint16_t offReg, uint64_t uIqaReg) +{ + /* At present, we only care about the low 32-bits, high 32-bits are data. */ + Assert(offReg == VTD_MMIO_OFF_IQA_REG); NOREF(offReg); + + /** @todo What happens if IQA_REG is written when dmarInvQueueCanProcessRequests + * returns true? The Intel VT-d spec. doesn't state anywhere that it + * cannot happen or that it's ignored when it does happen. */ + + PDMAR pThis = PDMDEVINS_2_DATA(pDevIns, PDMAR); + uint8_t const fDw = RT_BF_GET(uIqaReg, VTD_BF_IQA_REG_DW); + if (fDw == VTD_IQA_REG_DW_256_BIT) + { + bool const fSupports256BitDw = (pThis->fExtCapReg & (VTD_BF_ECAP_REG_SMTS_MASK | VTD_BF_ECAP_REG_ADMS_MASK)); + if (fSupports256BitDw) + { /* likely */ } + else + dmarIqeFaultRecord(pDevIns, kDmarDiag_IqaReg_Dw_256_Invalid, VTDIQEI_INVALID_DESCRIPTOR_WIDTH); + } + /* else: 128-bit descriptor width is validated lazily, see explanation in dmarR3InvQueueProcessRequests. */ + + return VINF_SUCCESS; +} + + +/** + * Handles writes to ICS_REG. + * + * @returns Strict VBox status code. + * @param pDevIns The IOMMU device instance. + * @param uIcsReg The value written to ICS_REG. + */ +static VBOXSTRICTRC dmarIcsRegWrite(PPDMDEVINS pDevIns, uint32_t uIcsReg) +{ + /* + * If the IP field is set when software services the interrupt condition, + * (by clearing the IWC field), the IP field must be cleared. + */ + if (!(uIcsReg & VTD_BF_ICS_REG_IWC_MASK)) + { + PDMAR pThis = PDMDEVINS_2_DATA(pDevIns, PDMAR); + dmarRegChangeRaw32(pThis, VTD_MMIO_OFF_IECTL_REG, ~VTD_BF_IECTL_REG_IP_MASK, 0 /* fOrMask */); + } + return VINF_SUCCESS; +} + + +/** + * Handles writes to IECTL_REG. + * + * @returns Strict VBox status code. + * @param pDevIns The IOMMU device instance. + * @param uIectlReg The value written to IECTL_REG. + */ +static VBOXSTRICTRC dmarIectlRegWrite(PPDMDEVINS pDevIns, uint32_t uIectlReg) +{ + /* + * If software unmasks the interrupt when the interrupt is pending, we must raise + * the interrupt now (which will consequently clear the interrupt pending (IP) bit). + */ + if ( (uIectlReg & VTD_BF_IECTL_REG_IP_MASK) + && ~(uIectlReg & VTD_BF_IECTL_REG_IM_MASK)) + dmarEventRaiseInterrupt(pDevIns, DMAREVENTTYPE_INV_COMPLETE); + return VINF_SUCCESS; +} + + +/** + * Handles writes to FRCD_REG (High 64-bits). + * + * @returns Strict VBox status code. + * @param pDevIns The IOMMU device instance. + * @param offReg The MMIO register offset. + * @param cbReg The size of the MMIO access (in bytes). + * @param uFrcdHiReg The value written to FRCD_REG. + * @param uPrev The value in FRCD_REG prior to writing it. + */ +static VBOXSTRICTRC dmarFrcdHiRegWrite(PPDMDEVINS pDevIns, uint16_t offReg, uint8_t cbReg, uint64_t uFrcdHiReg, uint64_t uPrev) +{ + /* We only care about responding to high 32-bits, low 32-bits are read-only. */ + if (offReg + cbReg > DMAR_MMIO_OFF_FRCD_HI_REG + 4) + { + /* + * If software cleared the RW1C F (fault) bit in all FRCD_REGs, hardware clears the + * Primary Pending Fault (PPF) and the interrupt pending (IP) bits. Our implementation + * has only 1 FRCD register. + * + * See Intel VT-d spec. 10.4.10 "Fault Event Control Register". + */ + AssertCompile(DMAR_FRCD_REG_COUNT == 1); + uint64_t const fChanged = uPrev ^ uFrcdHiReg; + if (fChanged & VTD_BF_1_FRCD_REG_F_MASK) + { + Assert(!(uFrcdHiReg & VTD_BF_1_FRCD_REG_F_MASK)); /* Software should only ever be able to clear this bit. */ + PDMAR pThis = PDMDEVINS_2_DATA(pDevIns, PDMAR); + dmarRegChangeRaw32(pThis, VTD_MMIO_OFF_FSTS_REG, ~VTD_BF_FSTS_REG_PPF_MASK, 0 /* fOrMask */); + dmarRegChangeRaw32(pThis, VTD_MMIO_OFF_FECTL_REG, ~VTD_BF_FECTL_REG_IP_MASK, 0 /* fOrMask */); + } + } + return VINF_SUCCESS; +} + + +/** + * Performs a PCI target abort for a DMA remapping (DR) operation. + * + * @param pDevIns The IOMMU device instance. + */ +static void dmarDrTargetAbort(PPDMDEVINS pDevIns) +{ + /** @todo r=ramshankar: I don't know for sure if a PCI target abort is caused or not + * as the Intel VT-d spec. is vague. Wording seems to suggest it does, but + * who knows. */ + PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0]; + uint16_t const u16Status = PDMPciDevGetStatus(pPciDev) | VBOX_PCI_STATUS_SIG_TARGET_ABORT; + PDMPciDevSetStatus(pPciDev, u16Status); +} + + +/** + * Checks whether the address width (AW) is supported by our hardware + * implementation for legacy mode address translation. + * + * @returns @c true if it's supported, @c false otherwise. + * @param pThis The shared DMAR device state. + * @param pCtxEntry The context entry. + * @param pcPagingLevel Where to store the paging level. Optional, can be NULL. + */ +static bool dmarDrLegacyModeIsAwValid(PCDMAR pThis, PCVTD_CONTEXT_ENTRY_T pCtxEntry, uint8_t *pcPagingLevel) +{ + uint8_t const fTt = RT_BF_GET(pCtxEntry->au64[0], VTD_BF_0_CONTEXT_ENTRY_TT); + uint8_t const fAw = RT_BF_GET(pCtxEntry->au64[1], VTD_BF_1_CONTEXT_ENTRY_AW); + uint8_t const fAwMask = RT_BIT(fAw); + uint8_t const fSagaw = RT_BF_GET(pThis->fCapReg, VTD_BF_CAP_REG_SAGAW); + Assert(!(fSagaw & ~(RT_BIT(1) | RT_BIT(2) | RT_BIT(3)))); + + uint8_t const cPagingLevel = fAw + 2; + if (pcPagingLevel) + *pcPagingLevel = cPagingLevel; + + /* With pass-through, the address width must be the largest AGAW supported by hardware. */ + if (fTt == VTD_TT_UNTRANSLATED_PT) + { + Assert(pThis->cMaxPagingLevel >= 3 && pThis->cMaxPagingLevel <= 5); /* Paranoia. */ + return cPagingLevel == pThis->cMaxPagingLevel; + } + + /* The address width must be any of the ones supported by hardware. */ + if (fAw < 4) + return (fSagaw & fAwMask) != 0; + + return false; +} + + +/** + * Reads a root entry from guest memory. + * + * @returns VBox status code. + * @param pDevIns The IOMMU device instance. + * @param uRtaddrReg The current RTADDR_REG value. + * @param idxRootEntry The index of the root entry to read. + * @param pRootEntry Where to store the read root entry. + */ +static int dmarDrReadRootEntry(PPDMDEVINS pDevIns, uint64_t uRtaddrReg, uint8_t idxRootEntry, PVTD_ROOT_ENTRY_T pRootEntry) +{ + size_t const cbRootEntry = sizeof(*pRootEntry); + RTGCPHYS const GCPhysRootEntry = (uRtaddrReg & VTD_BF_RTADDR_REG_RTA_MASK) + (idxRootEntry * cbRootEntry); + return PDMDevHlpPhysReadMeta(pDevIns, GCPhysRootEntry, pRootEntry, cbRootEntry); +} + + +/** + * Reads a context entry from guest memory. + * + * @returns VBox status code. + * @param pDevIns The IOMMU device instance. + * @param GCPhysCtxTable The physical address of the context table. + * @param idxCtxEntry The index of the context entry to read. + * @param pCtxEntry Where to store the read context entry. + */ +static int dmarDrReadCtxEntry(PPDMDEVINS pDevIns, RTGCPHYS GCPhysCtxTable, uint8_t idxCtxEntry, PVTD_CONTEXT_ENTRY_T pCtxEntry) +{ + /* We don't verify bits 63:HAW of GCPhysCtxTable is 0 since reading from such an address should fail anyway. */ + size_t const cbCtxEntry = sizeof(*pCtxEntry); + RTGCPHYS const GCPhysCtxEntry = GCPhysCtxTable + (idxCtxEntry * cbCtxEntry); + return PDMDevHlpPhysReadMeta(pDevIns, GCPhysCtxEntry, pCtxEntry, cbCtxEntry); +} + + +/** + * Validates and updates the output I/O page of a translation. + * + * @returns VBox status code. + * @param pDevIns The IOMMU device instance. + * @param GCPhysBase The output address of the translation. + * @param cShift The page shift of the translated address. + * @param fPerm The permissions granted for the translated region. + * @param pMemReqIn The DMA memory request input. + * @param pMemReqAux The DMA memory request auxiliary info. + * @param pIoPageOut Where to store the output of the translation. + */ +static int dmarDrUpdateIoPageOut(PPDMDEVINS pDevIns, RTGCPHYS GCPhysBase, uint8_t cShift, uint8_t fPerm, + PCDMARMEMREQIN pMemReqIn, PCDMARMEMREQAUX pMemReqAux, PDMARIOPAGE pIoPageOut) +{ + Assert(!(GCPhysBase & X86_PAGE_4K_OFFSET_MASK)); + + /* Ensure the output address is not in the interrupt address range. */ + if (GCPhysBase - VBOX_MSI_ADDR_BASE >= VBOX_MSI_ADDR_SIZE) + { + pIoPageOut->GCPhysBase = GCPhysBase; + pIoPageOut->cShift = cShift; + pIoPageOut->fPerm = fPerm; + return VINF_SUCCESS; + } + + dmarAtFaultRecord(pDevIns, kDmarDiag_At_Xm_AddrOut_Invalid, pMemReqIn, pMemReqAux); + return VERR_IOMMU_ADDR_TRANSLATION_FAILED; +} + + +/** + * Performs second level translation by walking the I/O page tables. + * + * This is a DMA address-lookup callback function which performs the translation + * (and access control) as part of the lookup. + * + * @returns VBox status code. + * @param pDevIns The IOMMU device instance. + * @param pMemReqIn The DMA memory request input. + * @param pMemReqAux The DMA memory request auxiliary info. + * @param pIoPageOut Where to store the output of the translation. + */ +static DECLCALLBACK(int) dmarDrSecondLevelTranslate(PPDMDEVINS pDevIns, PCDMARMEMREQIN pMemReqIn, PCDMARMEMREQAUX pMemReqAux, + PDMARIOPAGE pIoPageOut) +{ + PCDMAR pThis = PDMDEVINS_2_DATA(pDevIns, PCDMAR); + + /* Sanity. */ + Assert(pIoPageOut); + Assert(pMemReqIn->AddrRange.fPerm & (DMAR_PERM_READ | DMAR_PERM_WRITE)); + Assert( pMemReqAux->fTtm == VTD_TTM_LEGACY_MODE + || pMemReqAux->fTtm == VTD_TTM_SCALABLE_MODE); + Assert(!(pMemReqAux->GCPhysSlPt & X86_PAGE_4K_OFFSET_MASK)); + + /* Mask of reserved paging entry bits. */ + static uint64_t const s_auPtEntityInvMasks[] = + { + ~VTD_SL_PTE_VALID_MASK, + ~VTD_SL_PDE_VALID_MASK, + ~VTD_SL_PDPE_VALID_MASK, + ~VTD_SL_PML4E_VALID_MASK, + ~VTD_SL_PML5E_VALID_MASK + }; + + /* Paranoia. */ + Assert(pMemReqAux->cPagingLevel >= 3 && pMemReqAux->cPagingLevel <= 5); + AssertCompile(RT_ELEMENTS(s_auPtEntityInvMasks) == 5); + + /* Second-level translations restricts input address to an implementation-specific MGAW. */ + uint64_t const uAddrIn = pMemReqIn->AddrRange.uAddr; + if (!(uAddrIn & pThis->fMgawInvMask)) + { /* likely */ } + else + { + dmarAtFaultRecord(pDevIns, kDmarDiag_At_Xm_AddrIn_Invalid, pMemReqIn, pMemReqAux); + return VERR_IOMMU_ADDR_TRANSLATION_FAILED; + } + + /* + * Traverse the I/O page table starting with the SLPTPTR (second-level page table pointer). + * Unlike AMD IOMMU paging, here there is no feature for "skipping" levels. + */ + if (pMemReqAux->cPagingLevel > 0) + { + uint64_t uPtEntity = pMemReqAux->GCPhysSlPt; + for (uint8_t idxLevel = pMemReqAux->cPagingLevel - 1; /* not needed: idxLevel >= 0 */; idxLevel--) + { + /* + * Read the paging entry for the current level. + */ + uint8_t const cLevelShift = X86_PAGE_4K_SHIFT + (idxLevel * 9); + { + uint16_t const idxPte = (uAddrIn >> cLevelShift) & UINT64_C(0x1ff); + uint16_t const offPte = idxPte << 3; + RTGCPHYS const GCPhysPtEntity = (uPtEntity & X86_PAGE_4K_BASE_MASK) | offPte; + int const rc = PDMDevHlpPhysReadMeta(pDevIns, GCPhysPtEntity, &uPtEntity, sizeof(uPtEntity)); + if (RT_SUCCESS(rc)) + { /* likely */ } + else + { + if ((GCPhysPtEntity & X86_PAGE_BASE_MASK) == pMemReqAux->GCPhysSlPt) + dmarAtFaultRecord(pDevIns, kDmarDiag_At_Xm_Slpptr_Read_Failed, pMemReqIn, pMemReqAux); + else + dmarAtFaultRecord(pDevIns, kDmarDiag_At_Xm_Read_Pte_Failed, pMemReqIn, pMemReqAux); + break; + } + } + + /* + * Check I/O permissions. + * This must be done prior to check reserved bits for properly reporting errors SSL.2 and SSL.3. + * See Intel spec. 7.1.3 "Fault conditions and Remapping hardware behavior for various request". + */ + uint8_t const fReqPerm = pMemReqIn->AddrRange.fPerm & pThis->fPermValidMask; + uint8_t const fPtPerm = uPtEntity & pThis->fPermValidMask; + Assert(!(fReqPerm & DMAR_PERM_EXE)); /* No Execute-requests support yet. */ + Assert(!(pThis->fExtCapReg & VTD_BF_ECAP_REG_SLADS_MASK)); /* No Second-level access/dirty support. */ + if ((fPtPerm & fReqPerm) == fReqPerm) + { /* likely */ } + else + { + if ((fPtPerm & (VTD_BF_SL_PTE_R_MASK | VTD_BF_SL_PTE_W_MASK)) == 0) + dmarAtFaultRecord(pDevIns, kDmarDiag_At_Xm_Pte_Not_Present, pMemReqIn, pMemReqAux); + else if ((pMemReqIn->AddrRange.fPerm & DMAR_PERM_READ) != (fPtPerm & VTD_BF_SL_PTE_R_MASK)) + dmarAtFaultRecord(pDevIns, kDmarDiag_At_Xm_Perm_Read_Denied, pMemReqIn, pMemReqAux); + else + dmarAtFaultRecord(pDevIns, kDmarDiag_At_Xm_Perm_Write_Denied, pMemReqIn, pMemReqAux); + break; + } + + /* + * Validate reserved bits of the current paging entry. + */ + if (!(uPtEntity & s_auPtEntityInvMasks[(uintptr_t)idxLevel])) + { /* likely */ } + else + { + dmarAtFaultRecord(pDevIns, kDmarDiag_At_Xm_Pte_Rsvd, pMemReqIn, pMemReqAux); + break; + } + + /* + * Check if this is a 1GB page or a 2MB page. + */ + AssertCompile(VTD_BF_SL_PDE_PS_MASK == VTD_BF_SL_PDPE_PS_MASK); + uint8_t const fLargePage = RT_BF_GET(uPtEntity, VTD_BF_SL_PDE_PS); + if (fLargePage && idxLevel > 0) + { + Assert(idxLevel == 1 || idxLevel == 2); /* Is guaranteed by the reserved bits check above. */ + uint8_t const fSllpsMask = RT_BF_GET(pThis->fCapReg, VTD_BF_CAP_REG_SLLPS); + if (fSllpsMask & RT_BIT(idxLevel - 1)) + { + /* + * We don't support MTS (asserted below), hence IPAT and EMT fields of the paging entity are ignored. + * All other reserved bits are identical to the regular page-size paging entity which we've already + * checked above. + */ + Assert(!(pThis->fExtCapReg & VTD_BF_ECAP_REG_MTS_MASK)); + + RTGCPHYS const GCPhysBase = uPtEntity & X86_GET_PAGE_BASE_MASK(cLevelShift); + return dmarDrUpdateIoPageOut(pDevIns, GCPhysBase, cLevelShift, fPtPerm, pMemReqIn, pMemReqAux, pIoPageOut); + } + + dmarAtFaultRecord(pDevIns, kDmarDiag_At_Xm_Pte_Sllps_Invalid, pMemReqIn, pMemReqAux); + break; + } + + /* + * If this is the final PTE, compute the translation address and we're done. + */ + if (idxLevel == 0) + { + RTGCPHYS const GCPhysBase = uPtEntity & X86_GET_PAGE_BASE_MASK(cLevelShift); + return dmarDrUpdateIoPageOut(pDevIns, GCPhysBase, cLevelShift, fPtPerm, pMemReqIn, pMemReqAux, pIoPageOut); + } + } + } + + return VERR_IOMMU_ADDR_TRANSLATION_FAILED; +} + + +/** + * Looks up the range of addresses for a DMA memory request remapping. + * + * @returns VBox status code. + * @param pDevIns The IOMMU device instance. + * @param pfnLookup The DMA address lookup function. + * @param pMemReqRemap The DMA memory request remapping info. + */ +static int dmarDrMemRangeLookup(PPDMDEVINS pDevIns, PFNDMADDRLOOKUP pfnLookup, PDMARMEMREQREMAP pMemReqRemap) +{ + AssertPtr(pfnLookup); + + RTGCPHYS GCPhysAddrOut = NIL_RTGCPHYS; + DMARMEMREQIN MemReqIn = pMemReqRemap->In; + uint64_t const uAddrIn = MemReqIn.AddrRange.uAddr; + size_t const cbAddrIn = MemReqIn.AddrRange.cb; + uint64_t uAddrInBase = MemReqIn.AddrRange.uAddr & X86_PAGE_4K_BASE_MASK; + uint64_t offAddrIn = MemReqIn.AddrRange.uAddr & X86_PAGE_4K_OFFSET_MASK; + size_t cbRemaining = cbAddrIn; + size_t const cbPage = X86_PAGE_4K_SIZE; + + int rc; + DMARIOPAGE IoPagePrev; + RT_ZERO(IoPagePrev); + for (;;) + { + /* Update the input memory request with the next address in our range that needs translation. */ + MemReqIn.AddrRange.uAddr = uAddrInBase; + MemReqIn.AddrRange.cb = cbRemaining; /* Not currently accessed by pfnLookup, but keep things consistent. */ + + /* Lookup the physical page corresponding to the DMA virtual address. */ + DMARIOPAGE IoPage; + rc = pfnLookup(pDevIns, &MemReqIn, &pMemReqRemap->Aux, &IoPage); + if (RT_SUCCESS(rc)) + { + /* Validate results of the translation. */ + Assert(IoPage.cShift >= X86_PAGE_4K_SHIFT && IoPage.cShift <= X86_PAGE_1G_SHIFT); + Assert(!(IoPage.GCPhysBase & X86_GET_PAGE_OFFSET_MASK(IoPage.cShift))); + Assert((IoPage.fPerm & MemReqIn.AddrRange.fPerm) == MemReqIn.AddrRange.fPerm); + + /* Store the translated address and permissions before continuing to access more pages. */ + if (cbRemaining == cbAddrIn) + { + uint64_t const offAddrOut = uAddrIn & X86_GET_PAGE_OFFSET_MASK(IoPage.cShift); + GCPhysAddrOut = IoPage.GCPhysBase | offAddrOut; + } + /* Check if addresses translated so far result in a physically contiguous region. */ + /** @todo Ensure permissions are identical as well if we implementing IOTLB caching + * that relies on it being so. */ + else if (IoPagePrev.GCPhysBase + cbPage == IoPage.GCPhysBase) + { /* likely */ } + else + { + rc = VERR_OUT_OF_RANGE; + break; + } + + /* Store the I/O page lookup from the first/previous access. */ + IoPagePrev = IoPage; + + /* Check if we need to access more pages. */ + if (cbRemaining > cbPage - offAddrIn) + { + cbRemaining -= (cbPage - offAddrIn); /* Calculate how much more we need to access. */ + uAddrInBase += cbPage; /* Update address of the next access. */ + offAddrIn = 0; /* After the first page, remaining pages are accessed from offset 0. */ + } + else + { + /* Caller (PDM) doesn't expect more data accessed than what was requested. */ + cbRemaining = 0; + break; + } + } + else + break; + } + + pMemReqRemap->Out.AddrRange.uAddr = GCPhysAddrOut; + pMemReqRemap->Out.AddrRange.cb = cbAddrIn - cbRemaining; + pMemReqRemap->Out.AddrRange.fPerm = IoPagePrev.fPerm; + return rc; +} + + +/** + * Handles legacy mode DMA address remapping. + * + * @returns VBox status code. + * @param pDevIns The IOMMU device instance. + * @param uRtaddrReg The current RTADDR_REG value. + * @param pMemReqRemap The DMA memory request remapping info. + */ +static int dmarDrLegacyModeRemapAddr(PPDMDEVINS pDevIns, uint64_t uRtaddrReg, PDMARMEMREQREMAP pMemReqRemap) +{ + PCDMARMEMREQIN pMemReqIn = &pMemReqRemap->In; + PDMARMEMREQAUX pMemReqAux = &pMemReqRemap->Aux; + PDMARMEMREQOUT pMemReqOut = &pMemReqRemap->Out; + Assert(pMemReqAux->fTtm == VTD_TTM_LEGACY_MODE); /* Paranoia. */ + + /* Read the root-entry from guest memory. */ + uint8_t const idxRootEntry = RT_HI_U8(pMemReqIn->idDevice); + VTD_ROOT_ENTRY_T RootEntry; + int rc = dmarDrReadRootEntry(pDevIns, uRtaddrReg, idxRootEntry, &RootEntry); + if (RT_SUCCESS(rc)) + { + /* Check if the root entry is present (must be done before validating reserved bits). */ + uint64_t const uRootEntryQword0 = RootEntry.au64[0]; + uint64_t const uRootEntryQword1 = RootEntry.au64[1]; + bool const fRootEntryPresent = RT_BF_GET(uRootEntryQword0, VTD_BF_0_ROOT_ENTRY_P); + if (fRootEntryPresent) + { + /* Validate reserved bits in the root entry. */ + if ( !(uRootEntryQword0 & ~VTD_ROOT_ENTRY_0_VALID_MASK) + && !(uRootEntryQword1 & ~VTD_ROOT_ENTRY_1_VALID_MASK)) + { + /* Read the context-entry from guest memory. */ + RTGCPHYS const GCPhysCtxTable = uRootEntryQword0 & VTD_BF_0_ROOT_ENTRY_CTP_MASK; + uint8_t const idxCtxEntry = RT_LO_U8(pMemReqIn->idDevice); + VTD_CONTEXT_ENTRY_T CtxEntry; + rc = dmarDrReadCtxEntry(pDevIns, GCPhysCtxTable, idxCtxEntry, &CtxEntry); + if (RT_SUCCESS(rc)) + { + uint64_t const uCtxEntryQword0 = CtxEntry.au64[0]; + uint64_t const uCtxEntryQword1 = CtxEntry.au64[1]; + + /* Note the FPD bit which software can use to supress translation faults from here on in. */ + pMemReqAux->fFpd = RT_BF_GET(uCtxEntryQword0, VTD_BF_0_CONTEXT_ENTRY_FPD); + + /* Check if the context-entry is present (must be done before validating reserved bits). */ + bool const fCtxEntryPresent = RT_BF_GET(uCtxEntryQword0, VTD_BF_0_CONTEXT_ENTRY_P); + if (fCtxEntryPresent) + { + /* Validate reserved bits in the context-entry. */ + PCDMAR pThis = PDMDEVINS_2_DATA(pDevIns, PCDMAR); + if ( !(uCtxEntryQword0 & ~VTD_CONTEXT_ENTRY_0_VALID_MASK) + && !(uCtxEntryQword1 & ~pThis->fCtxEntryQw1ValidMask)) + { + /* Get the domain ID for this mapping. */ + pMemReqOut->idDomain = RT_BF_GET(uCtxEntryQword1, VTD_BF_1_CONTEXT_ENTRY_DID); + + /* Validate the translation type (TT). */ + uint8_t const fTt = RT_BF_GET(uCtxEntryQword0, VTD_BF_0_CONTEXT_ENTRY_TT); + switch (fTt) + { + case VTD_TT_UNTRANSLATED_SLP: + { + /* + * Untranslated requests are translated using second-level paging structures referenced + * through SLPTPTR. Translated requests and Translation Requests are blocked. + */ + if (pMemReqIn->enmAddrType == PCIADDRTYPE_UNTRANSLATED) + { + /* Validate the address width and get the paging level. */ + uint8_t cPagingLevel; + if (dmarDrLegacyModeIsAwValid(pThis, &CtxEntry, &cPagingLevel)) + { + /* + * The second-level page table is located at the physical address specified + * in the context entry with which we can finally perform second-level translation. + */ + pMemReqAux->cPagingLevel = cPagingLevel; + pMemReqAux->GCPhysSlPt = uCtxEntryQword0 & VTD_BF_0_CONTEXT_ENTRY_SLPTPTR_MASK; + rc = dmarDrMemRangeLookup(pDevIns, dmarDrSecondLevelTranslate, pMemReqRemap); + if (rc == VERR_OUT_OF_RANGE) + rc = VINF_SUCCESS; + return rc; + } + dmarAtFaultRecord(pDevIns, kDmarDiag_At_Lm_Ut_Aw_Invalid, pMemReqIn, pMemReqAux); + } + else + dmarAtFaultRecord(pDevIns, kDmarDiag_At_Lm_Ut_At_Block, pMemReqIn, pMemReqAux); + break; + } + + case VTD_TT_UNTRANSLATED_PT: + { + /* + * Untranslated requests are processed as pass-through (PT) if PT is supported. + * Translated and translation requests are blocked. If PT isn't supported this TT value + * is reserved which I assume raises a fault (hence fallthru below). + */ + if (pThis->fExtCapReg & VTD_BF_ECAP_REG_PT_MASK) + { + if (pMemReqRemap->In.enmAddrType == PCIADDRTYPE_UNTRANSLATED) + { + if (dmarDrLegacyModeIsAwValid(pThis, &CtxEntry, NULL /* pcPagingLevel */)) + { + PDMARMEMREQOUT pOut = &pMemReqRemap->Out; + PCDMARMEMREQIN pIn = &pMemReqRemap->In; + pOut->AddrRange.uAddr = pIn->AddrRange.uAddr; + pOut->AddrRange.cb = pIn->AddrRange.cb; + pOut->AddrRange.fPerm = DMAR_PERM_ALL; + return VINF_SUCCESS; + } + dmarAtFaultRecord(pDevIns, kDmarDiag_At_Lm_Pt_Aw_Invalid, pMemReqIn, pMemReqAux); + } + else + dmarAtFaultRecord(pDevIns, kDmarDiag_At_Lm_Pt_At_Block, pMemReqIn, pMemReqAux); + break; + } + RT_FALL_THRU(); + } + + case VTD_TT_UNTRANSLATED_DEV_TLB: + { + /* + * Untranslated, translated and translation requests are supported but requires + * device-TLB support. We don't support device-TLBs, so it's treated as reserved. + */ + Assert(!(pThis->fExtCapReg & VTD_BF_ECAP_REG_DT_MASK)); + RT_FALL_THRU(); + } + + default: + { + /* Any other TT value is reserved. */ + dmarAtFaultRecord(pDevIns, kDmarDiag_At_Lm_Tt_Invalid, pMemReqIn, pMemReqAux); + break; + } + } + } + else + dmarAtFaultRecord(pDevIns, kDmarDiag_At_Lm_CtxEntry_Rsvd, pMemReqIn, pMemReqAux); + } + else + dmarAtFaultRecord(pDevIns, kDmarDiag_At_Lm_CtxEntry_Not_Present, pMemReqIn, pMemReqAux); + } + else + dmarAtFaultRecord(pDevIns, kDmarDiag_At_Lm_CtxEntry_Read_Failed, pMemReqIn, pMemReqAux); + } + else + dmarAtFaultRecord(pDevIns, kDmarDiag_At_Lm_RootEntry_Rsvd, pMemReqIn, pMemReqAux); + } + else + dmarAtFaultRecord(pDevIns, kDmarDiag_At_Lm_RootEntry_Not_Present, pMemReqIn, pMemReqAux); + } + else + dmarAtFaultRecord(pDevIns, kDmarDiag_At_Lm_RootEntry_Read_Failed, pMemReqIn, pMemReqAux); + return VERR_IOMMU_ADDR_TRANSLATION_FAILED; +} + + +/** + * Handles remapping of DMA address requests in scalable mode. + * + * @returns VBox status code. + * @param pDevIns The IOMMU device instance. + * @param uRtaddrReg The current RTADDR_REG value. + * @param pMemReqRemap The DMA memory request remapping info. + */ +static int dmarDrScalableModeRemapAddr(PPDMDEVINS pDevIns, uint64_t uRtaddrReg, PDMARMEMREQREMAP pMemReqRemap) +{ + RT_NOREF3(pDevIns, uRtaddrReg, pMemReqRemap); + return VERR_NOT_IMPLEMENTED; +} + + +/** + * Gets the DMA access permissions and the address-translation request + * type given the PDM IOMMU memory access flags. + * + * @param pDevIns The IOMMU device instance. + * @param fFlags The access flags, see PDMIOMMU_MEM_F_XXX. + * @param fBulk Whether this is a bulk memory access (used for + * statistics). + * @param penmReqType Where to store the address-translation request type. + * @param pfReqPerm Where to store the DMA access permissions. + */ +static void dmarDrGetPermAndReqType(PPDMDEVINS pDevIns, uint32_t fFlags, bool fBulk, PVTDREQTYPE penmReqType, uint8_t *pfReqPerm) +{ + PDMAR pThis = PDMDEVINS_2_DATA(pDevIns, PDMAR); + if (fFlags & PDMIOMMU_MEM_F_READ) + { + *penmReqType = VTDREQTYPE_READ; + *pfReqPerm = DMAR_PERM_READ; +#ifdef VBOX_WITH_STATISTICS + if (!fBulk) + STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMemRead)); + else + STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMemBulkRead)); +#else + RT_NOREF2(pThis, fBulk); +#endif + } + else + { + *penmReqType = VTDREQTYPE_WRITE; + *pfReqPerm = DMAR_PERM_WRITE; +#ifdef VBOX_WITH_STATISTICS + if (!fBulk) + STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMemWrite)); + else + STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMemBulkWrite)); +#else + RT_NOREF2(pThis, fBulk); +#endif + } +} + + +/** + * Handles DMA remapping based on the table translation mode (TTM). + * + * @returns VBox status code. + * @param pDevIns The IOMMU device instance. + * @param uRtaddrReg The current RTADDR_REG value. + * @param pMemReqRemap The DMA memory request remapping info. + */ +static int dmarDrMemReqRemap(PPDMDEVINS pDevIns, uint64_t uRtaddrReg, PDMARMEMREQREMAP pMemReqRemap) +{ + int rc; + switch (pMemReqRemap->Aux.fTtm) + { + case VTD_TTM_LEGACY_MODE: + { + rc = dmarDrLegacyModeRemapAddr(pDevIns, uRtaddrReg, pMemReqRemap); + break; + } + + case VTD_TTM_SCALABLE_MODE: + { + PCDMAR pThis = PDMDEVINS_2_DATA(pDevIns, PCDMAR); + if (pThis->fExtCapReg & VTD_BF_ECAP_REG_SMTS_MASK) + rc = dmarDrScalableModeRemapAddr(pDevIns, uRtaddrReg, pMemReqRemap); + else + { + rc = VERR_IOMMU_ADDR_TRANSLATION_FAILED; + dmarAtFaultRecord(pDevIns, kDmarDiag_At_Rta_Smts_Not_Supported, &pMemReqRemap->In, &pMemReqRemap->Aux); + } + break; + } + + case VTD_TTM_ABORT_DMA_MODE: + { + PCDMAR pThis = PDMDEVINS_2_DATA(pDevIns, PCDMAR); + if (pThis->fExtCapReg & VTD_BF_ECAP_REG_ADMS_MASK) + dmarDrTargetAbort(pDevIns); + else + dmarAtFaultRecord(pDevIns, kDmarDiag_At_Rta_Adms_Not_Supported, &pMemReqRemap->In, &pMemReqRemap->Aux); + rc = VERR_IOMMU_ADDR_TRANSLATION_FAILED; + break; + } + + default: + { + rc = VERR_IOMMU_ADDR_TRANSLATION_FAILED; + dmarAtFaultRecord(pDevIns, kDmarDiag_At_Rta_Rsvd, &pMemReqRemap->In, &pMemReqRemap->Aux); + break; + } + } + return rc; +} + + +/** + * Memory access bulk (one or more 4K pages) request from a device. + * + * @returns VBox status code. + * @param pDevIns The IOMMU device instance. + * @param idDevice The device ID (bus, device, function). + * @param cIovas The number of addresses being accessed. + * @param pauIovas The I/O virtual addresses for each page being accessed. + * @param fFlags The access flags, see PDMIOMMU_MEM_F_XXX. + * @param paGCPhysSpa Where to store the translated physical addresses. + * + * @thread Any. + */ +static DECLCALLBACK(int) iommuIntelMemBulkAccess(PPDMDEVINS pDevIns, uint16_t idDevice, size_t cIovas, uint64_t const *pauIovas, + uint32_t fFlags, PRTGCPHYS paGCPhysSpa) +{ + /* Validate. */ + AssertPtr(pDevIns); + Assert(cIovas > 0); + AssertPtr(pauIovas); + AssertPtr(paGCPhysSpa); + Assert(!(fFlags & ~PDMIOMMU_MEM_F_VALID_MASK)); + + PDMAR pThis = PDMDEVINS_2_DATA(pDevIns, PDMAR); + PCDMARCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PCDMARCC); + + DMAR_LOCK(pDevIns, pThisCC); + uint32_t const uGstsReg = dmarRegReadRaw32(pThis, VTD_MMIO_OFF_GSTS_REG); + uint64_t const uRtaddrReg = pThis->uRtaddrReg; + DMAR_UNLOCK(pDevIns, pThisCC); + + if (uGstsReg & VTD_BF_GSTS_REG_TES_MASK) + { + VTDREQTYPE enmReqType; + uint8_t fReqPerm; + dmarDrGetPermAndReqType(pDevIns, fFlags, true /* fBulk */, &enmReqType, &fReqPerm); + + DMARMEMREQREMAP MemReqRemap; + RT_ZERO(MemReqRemap); + MemReqRemap.In.AddrRange.cb = X86_PAGE_SIZE; + MemReqRemap.In.AddrRange.fPerm = fReqPerm; + MemReqRemap.In.idDevice = idDevice; + MemReqRemap.In.Pasid = NIL_PCIPASID; + MemReqRemap.In.enmAddrType = PCIADDRTYPE_UNTRANSLATED; + MemReqRemap.In.enmReqType = enmReqType; + MemReqRemap.Aux.fTtm = RT_BF_GET(uRtaddrReg, VTD_BF_RTADDR_REG_TTM); + MemReqRemap.Out.AddrRange.uAddr = NIL_RTGCPHYS; + + for (size_t i = 0; i < cIovas; i++) + { + MemReqRemap.In.AddrRange.uAddr = pauIovas[i] & X86_PAGE_BASE_MASK; + int const rc = dmarDrMemReqRemap(pDevIns, uRtaddrReg, &MemReqRemap); + if (RT_SUCCESS(rc)) + { + paGCPhysSpa[i] = MemReqRemap.Out.AddrRange.uAddr | (pauIovas[i] & X86_PAGE_OFFSET_MASK); + Assert(MemReqRemap.Out.AddrRange.cb == MemReqRemap.In.AddrRange.cb); + } + else + { + LogFlowFunc(("idDevice=%#x uIova=%#RX64 fPerm=%#x rc=%Rrc\n", idDevice, pauIovas[i], fReqPerm, rc)); + return rc; + } + } + } + else + { + /* Addresses are forwarded without translation when the translation is disabled. */ + for (size_t i = 0; i < cIovas; i++) + paGCPhysSpa[i] = pauIovas[i]; + } + + return VINF_SUCCESS; +} + + +/** + * Memory access transaction from a device. + * + * @returns VBox status code. + * @param pDevIns The IOMMU device instance. + * @param idDevice The device ID (bus, device, function). + * @param uIova The I/O virtual address being accessed. + * @param cbIova The size of the access. + * @param fFlags The access flags, see PDMIOMMU_MEM_F_XXX. + * @param pGCPhysSpa Where to store the translated system physical address. + * @param pcbContiguous Where to store the number of contiguous bytes translated + * and permission-checked. + * + * @thread Any. + */ +static DECLCALLBACK(int) iommuIntelMemAccess(PPDMDEVINS pDevIns, uint16_t idDevice, uint64_t uIova, size_t cbIova, + uint32_t fFlags, PRTGCPHYS pGCPhysSpa, size_t *pcbContiguous) +{ + /* Validate. */ + AssertPtr(pDevIns); + AssertPtr(pGCPhysSpa); + AssertPtr(pcbContiguous); + Assert(cbIova > 0); /** @todo Are we going to support ZLR (zero-length reads to write-only pages)? */ + Assert(!(fFlags & ~PDMIOMMU_MEM_F_VALID_MASK)); + + PDMAR pThis = PDMDEVINS_2_DATA(pDevIns, PDMAR); + PCDMARCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PCDMARCC); + + DMAR_LOCK(pDevIns, pThisCC); + uint32_t const uGstsReg = dmarRegReadRaw32(pThis, VTD_MMIO_OFF_GSTS_REG); + uint64_t const uRtaddrReg = pThis->uRtaddrReg; + DMAR_UNLOCK(pDevIns, pThisCC); + + if (uGstsReg & VTD_BF_GSTS_REG_TES_MASK) + { + VTDREQTYPE enmReqType; + uint8_t fReqPerm; + dmarDrGetPermAndReqType(pDevIns, fFlags, false /* fBulk */, &enmReqType, &fReqPerm); + + DMARMEMREQREMAP MemReqRemap; + RT_ZERO(MemReqRemap); + MemReqRemap.In.AddrRange.uAddr = uIova; + MemReqRemap.In.AddrRange.cb = cbIova; + MemReqRemap.In.AddrRange.fPerm = fReqPerm; + MemReqRemap.In.idDevice = idDevice; + MemReqRemap.In.Pasid = NIL_PCIPASID; + MemReqRemap.In.enmAddrType = PCIADDRTYPE_UNTRANSLATED; + MemReqRemap.In.enmReqType = enmReqType; + MemReqRemap.Aux.fTtm = RT_BF_GET(uRtaddrReg, VTD_BF_RTADDR_REG_TTM); + MemReqRemap.Out.AddrRange.uAddr = NIL_RTGCPHYS; + + int const rc = dmarDrMemReqRemap(pDevIns, uRtaddrReg, &MemReqRemap); + *pGCPhysSpa = MemReqRemap.Out.AddrRange.uAddr; + *pcbContiguous = MemReqRemap.Out.AddrRange.cb; + return rc; + } + + *pGCPhysSpa = uIova; + *pcbContiguous = cbIova; + return VINF_SUCCESS; +} + + +/** + * Reads an IRTE from guest memory. + * + * @returns VBox status code. + * @param pDevIns The IOMMU device instance. + * @param uIrtaReg The IRTA_REG. + * @param idxIntr The interrupt index. + * @param pIrte Where to store the read IRTE. + */ +static int dmarIrReadIrte(PPDMDEVINS pDevIns, uint64_t uIrtaReg, uint16_t idxIntr, PVTD_IRTE_T pIrte) +{ + Assert(idxIntr < VTD_IRTA_REG_GET_ENTRY_COUNT(uIrtaReg)); + + size_t const cbIrte = sizeof(*pIrte); + RTGCPHYS const GCPhysIrte = (uIrtaReg & VTD_BF_IRTA_REG_IRTA_MASK) + (idxIntr * cbIrte); + return PDMDevHlpPhysReadMeta(pDevIns, GCPhysIrte, pIrte, cbIrte); +} + + +/** + * Remaps the source MSI to the destination MSI given the IRTE. + * + * @param fExtIntrMode Whether extended interrupt mode is enabled (i.e + * IRTA_REG.EIME). + * @param pIrte The IRTE used for the remapping. + * @param pMsiIn The source MSI (currently unused). + * @param pMsiOut Where to store the remapped MSI. + */ +static void dmarIrRemapFromIrte(bool fExtIntrMode, PCVTD_IRTE_T pIrte, PCMSIMSG pMsiIn, PMSIMSG pMsiOut) +{ + NOREF(pMsiIn); + uint64_t const uIrteQword0 = pIrte->au64[0]; + + /* + * Let's start with a clean slate and preserve unspecified bits if the need arises. + * For instance, address bits 1:0 is supposed to be "ignored" by remapping hardware, + * but it's not clear if hardware zeroes out these bits in the remapped MSI or if + * it copies it from the source MSI. + */ + RT_ZERO(*pMsiOut); + pMsiOut->Addr.n.u1DestMode = RT_BF_GET(uIrteQword0, VTD_BF_0_IRTE_DM); + pMsiOut->Addr.n.u1RedirHint = RT_BF_GET(uIrteQword0, VTD_BF_0_IRTE_RH); + pMsiOut->Addr.n.u12Addr = VBOX_MSI_ADDR_BASE >> VBOX_MSI_ADDR_SHIFT; + if (fExtIntrMode) + { + /* + * Apparently the DMAR stuffs the high 24-bits of the destination ID into the + * high 24-bits of the upper 32-bits of the message address, see @bugref{9967#c22}. + */ + uint32_t const idDest = RT_BF_GET(uIrteQword0, VTD_BF_0_IRTE_DST); + pMsiOut->Addr.n.u8DestId = idDest; + pMsiOut->Addr.n.u32Rsvd0 = idDest & UINT32_C(0xffffff00); + } + else + pMsiOut->Addr.n.u8DestId = RT_BF_GET(uIrteQword0, VTD_BF_0_IRTE_DST_XAPIC); + + pMsiOut->Data.n.u8Vector = RT_BF_GET(uIrteQword0, VTD_BF_0_IRTE_V); + pMsiOut->Data.n.u3DeliveryMode = RT_BF_GET(uIrteQword0, VTD_BF_0_IRTE_DLM); + pMsiOut->Data.n.u1Level = 1; + pMsiOut->Data.n.u1TriggerMode = RT_BF_GET(uIrteQword0, VTD_BF_0_IRTE_TM); +} + + +/** + * Handles remapping of interrupts in remappable interrupt format. + * + * @returns VBox status code. + * @param pDevIns The IOMMU device instance. + * @param uIrtaReg The IRTA_REG. + * @param idDevice The device ID (bus, device, function). + * @param pMsiIn The source MSI. + * @param pMsiOut Where to store the remapped MSI. + */ +static int dmarIrRemapIntr(PPDMDEVINS pDevIns, uint64_t uIrtaReg, uint16_t idDevice, PCMSIMSG pMsiIn, PMSIMSG pMsiOut) +{ + Assert(pMsiIn->Addr.dmar_remap.fIntrFormat == VTD_INTR_FORMAT_REMAPPABLE); + + /* Validate reserved bits in the interrupt request. */ + AssertCompile(VTD_REMAPPABLE_MSI_ADDR_VALID_MASK == UINT32_MAX); + if (!(pMsiIn->Data.u32 & ~VTD_REMAPPABLE_MSI_DATA_VALID_MASK)) + { + /* Compute the index into the interrupt remap table. */ + uint16_t const uHandleHi = RT_BF_GET(pMsiIn->Addr.au32[0], VTD_BF_REMAPPABLE_MSI_ADDR_HANDLE_HI); + uint16_t const uHandleLo = RT_BF_GET(pMsiIn->Addr.au32[0], VTD_BF_REMAPPABLE_MSI_ADDR_HANDLE_LO); + uint16_t const uHandle = uHandleLo | (uHandleHi << 15); + bool const fSubHandleValid = RT_BF_GET(pMsiIn->Addr.au32[0], VTD_BF_REMAPPABLE_MSI_ADDR_SHV); + uint16_t const idxIntr = fSubHandleValid + ? uHandle + RT_BF_GET(pMsiIn->Data.u32, VTD_BF_REMAPPABLE_MSI_DATA_SUBHANDLE) + : uHandle; + + /* Validate the index. */ + uint32_t const cEntries = VTD_IRTA_REG_GET_ENTRY_COUNT(uIrtaReg); + if (idxIntr < cEntries) + { + /** @todo Implement and read IRTE from interrupt-entry cache here. */ + + /* Read the interrupt remap table entry (IRTE) at the index. */ + VTD_IRTE_T Irte; + int rc = dmarIrReadIrte(pDevIns, uIrtaReg, idxIntr, &Irte); + if (RT_SUCCESS(rc)) + { + /* Check if the IRTE is present (this must be done -before- checking reserved bits). */ + uint64_t const uIrteQword0 = Irte.au64[0]; + uint64_t const uIrteQword1 = Irte.au64[1]; + bool const fPresent = RT_BF_GET(uIrteQword0, VTD_BF_0_IRTE_P); + if (fPresent) + { + /* Validate reserved bits in the IRTE. */ + bool const fExtIntrMode = RT_BF_GET(uIrtaReg, VTD_BF_IRTA_REG_EIME); + uint64_t const fQw0ValidMask = fExtIntrMode ? VTD_IRTE_0_X2APIC_VALID_MASK : VTD_IRTE_0_XAPIC_VALID_MASK; + if ( !(uIrteQword0 & ~fQw0ValidMask) + && !(uIrteQword1 & ~VTD_IRTE_1_VALID_MASK)) + { + /* Validate requester id (the device ID) as configured in the IRTE. */ + bool fSrcValid; + DMARDIAG enmIrDiag; + uint8_t const fSvt = RT_BF_GET(uIrteQword1, VTD_BF_1_IRTE_SVT); + switch (fSvt) + { + case VTD_IRTE_SVT_NONE: + { + fSrcValid = true; + enmIrDiag = kDmarDiag_None; + break; + } + + case VTD_IRTE_SVT_VALIDATE_MASK: + { + static uint16_t const s_afValidMasks[] = { 0xffff, 0xfffb, 0xfff9, 0xfff8 }; + uint8_t const idxMask = RT_BF_GET(uIrteQword1, VTD_BF_1_IRTE_SQ) & 3; + uint16_t const fValidMask = s_afValidMasks[idxMask]; + uint16_t const idSource = RT_BF_GET(uIrteQword1, VTD_BF_1_IRTE_SID); + fSrcValid = (idDevice & fValidMask) == (idSource & fValidMask); + enmIrDiag = kDmarDiag_Ir_Rfi_Irte_Svt_Masked; + break; + } + + case VTD_IRTE_SVT_VALIDATE_BUS_RANGE: + { + uint16_t const idSource = RT_BF_GET(uIrteQword1, VTD_BF_1_IRTE_SID); + uint8_t const uBusFirst = RT_HI_U8(idSource); + uint8_t const uBusLast = RT_LO_U8(idSource); + uint8_t const idDeviceBus = idDevice >> VBOX_PCI_BUS_SHIFT; + fSrcValid = (idDeviceBus >= uBusFirst && idDeviceBus <= uBusLast); + enmIrDiag = kDmarDiag_Ir_Rfi_Irte_Svt_Bus; + break; + } + + default: + { + fSrcValid = false; + enmIrDiag = kDmarDiag_Ir_Rfi_Irte_Svt_Rsvd; + break; + } + } + + if (fSrcValid) + { + uint8_t const fPostedMode = RT_BF_GET(uIrteQword0, VTD_BF_0_IRTE_IM); + if (!fPostedMode) + { + dmarIrRemapFromIrte(fExtIntrMode, &Irte, pMsiIn, pMsiOut); + return VINF_SUCCESS; + } + dmarIrFaultRecord(pDevIns, kDmarDiag_Ir_Rfi_Irte_Mode_Invalid, idDevice, idxIntr, &Irte); + } + else + dmarIrFaultRecord(pDevIns, enmIrDiag, idDevice, idxIntr, &Irte); + } + else + dmarIrFaultRecord(pDevIns, kDmarDiag_Ir_Rfi_Irte_Rsvd, idDevice, idxIntr, &Irte); + } + else + dmarIrFaultRecord(pDevIns, kDmarDiag_Ir_Rfi_Irte_Not_Present, idDevice, idxIntr, &Irte); + } + else + dmarIrFaultRecord(pDevIns, kDmarDiag_Ir_Rfi_Irte_Read_Failed, idDevice, idxIntr, NULL /* pIrte */); + } + else + dmarIrFaultRecord(pDevIns, kDmarDiag_Ir_Rfi_Intr_Index_Invalid, idDevice, idxIntr, NULL /* pIrte */); + } + else + dmarIrFaultRecord(pDevIns, kDmarDiag_Ir_Rfi_Rsvd, idDevice, 0 /* idxIntr */, NULL /* pIrte */); + return VERR_IOMMU_INTR_REMAP_DENIED; +} + + +/** + * Interrupt remap request from a device. + * + * @returns VBox status code. + * @param pDevIns The IOMMU device instance. + * @param idDevice The device ID (bus, device, function). + * @param pMsiIn The source MSI. + * @param pMsiOut Where to store the remapped MSI. + */ +static DECLCALLBACK(int) iommuIntelMsiRemap(PPDMDEVINS pDevIns, uint16_t idDevice, PCMSIMSG pMsiIn, PMSIMSG pMsiOut) +{ + /* Validate. */ + Assert(pDevIns); + Assert(pMsiIn); + Assert(pMsiOut); + RT_NOREF1(idDevice); + + PDMAR pThis = PDMDEVINS_2_DATA(pDevIns, PDMAR); + PCDMARCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PCDMARCC); + + /* Lock and read all registers required for interrupt remapping up-front. */ + DMAR_LOCK(pDevIns, pThisCC); + uint32_t const uGstsReg = dmarRegReadRaw32(pThis, VTD_MMIO_OFF_GSTS_REG); + uint64_t const uIrtaReg = pThis->uIrtaReg; + DMAR_UNLOCK(pDevIns, pThisCC); + + /* Check if interrupt remapping is enabled. */ + if (uGstsReg & VTD_BF_GSTS_REG_IRES_MASK) + { + bool const fIsRemappable = RT_BF_GET(pMsiIn->Addr.au32[0], VTD_BF_REMAPPABLE_MSI_ADDR_INTR_FMT); + if (!fIsRemappable) + { + /* Handle compatibility format interrupts. */ + STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMsiRemapCfi)); + + /* If EIME is enabled or CFIs are disabled, block the interrupt. */ + if ( (uIrtaReg & VTD_BF_IRTA_REG_EIME_MASK) + || !(uGstsReg & VTD_BF_GSTS_REG_CFIS_MASK)) + { + dmarIrFaultRecord(pDevIns, kDmarDiag_Ir_Cfi_Blocked, VTDIRFAULT_CFI_BLOCKED, idDevice, 0 /* idxIntr */); + return VERR_IOMMU_INTR_REMAP_DENIED; + } + + /* Interrupt isn't subject to remapping, pass-through the interrupt. */ + *pMsiOut = *pMsiIn; + return VINF_SUCCESS; + } + + /* Handle remappable format interrupts. */ + STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMsiRemapRfi)); + return dmarIrRemapIntr(pDevIns, uIrtaReg, idDevice, pMsiIn, pMsiOut); + } + + /* Interrupt-remapping isn't enabled, all interrupts are pass-through. */ + *pMsiOut = *pMsiIn; + return VINF_SUCCESS; +} + + +/** + * @callback_method_impl{FNIOMMMIONEWWRITE} + */ +static DECLCALLBACK(VBOXSTRICTRC) dmarMmioWrite(PPDMDEVINS pDevIns, void *pvUser, RTGCPHYS off, void const *pv, unsigned cb) +{ + RT_NOREF1(pvUser); + DMAR_ASSERT_MMIO_ACCESS_RET(off, cb); + + PDMAR pThis = PDMDEVINS_2_DATA(pDevIns, PDMAR); + STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMmioWrite)); + + uint16_t const offReg = off; + uint16_t const offLast = offReg + cb - 1; + if (DMAR_IS_MMIO_OFF_VALID(offLast)) + { + PCDMARCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PCDMARCC); + DMAR_LOCK_RET(pDevIns, pThisCC, VINF_IOM_R3_MMIO_WRITE); + + uint64_t uPrev = 0; + uint64_t const uRegWritten = cb == 8 ? dmarRegWrite64(pThis, offReg, *(uint64_t *)pv, &uPrev) + : dmarRegWrite32(pThis, offReg, *(uint32_t *)pv, (uint32_t *)&uPrev); + VBOXSTRICTRC rcStrict = VINF_SUCCESS; + switch (off) + { + case VTD_MMIO_OFF_GCMD_REG: /* 32-bit */ + { + rcStrict = dmarGcmdRegWrite(pDevIns, uRegWritten); + break; + } + + case VTD_MMIO_OFF_CCMD_REG: /* 64-bit */ + case VTD_MMIO_OFF_CCMD_REG + 4: + { + rcStrict = dmarCcmdRegWrite(pDevIns, offReg, cb, uRegWritten); + break; + } + + case VTD_MMIO_OFF_FSTS_REG: /* 32-bit */ + { + rcStrict = dmarFstsRegWrite(pDevIns, uRegWritten, uPrev); + break; + } + + case VTD_MMIO_OFF_FECTL_REG: /* 32-bit */ + { + rcStrict = dmarFectlRegWrite(pDevIns, uRegWritten); + break; + } + + case VTD_MMIO_OFF_IQT_REG: /* 64-bit */ + /* VTD_MMIO_OFF_IQT_REG + 4: */ /* High 32-bits reserved. */ + { + rcStrict = dmarIqtRegWrite(pDevIns, offReg, uRegWritten); + break; + } + + case VTD_MMIO_OFF_IQA_REG: /* 64-bit */ + /* VTD_MMIO_OFF_IQA_REG + 4: */ /* High 32-bits data. */ + { + rcStrict = dmarIqaRegWrite(pDevIns, offReg, uRegWritten); + break; + } + + case VTD_MMIO_OFF_ICS_REG: /* 32-bit */ + { + rcStrict = dmarIcsRegWrite(pDevIns, uRegWritten); + break; + } + + case VTD_MMIO_OFF_IECTL_REG: /* 32-bit */ + { + rcStrict = dmarIectlRegWrite(pDevIns, uRegWritten); + break; + } + + case DMAR_MMIO_OFF_FRCD_HI_REG: /* 64-bit */ + case DMAR_MMIO_OFF_FRCD_HI_REG + 4: + { + rcStrict = dmarFrcdHiRegWrite(pDevIns, offReg, cb, uRegWritten, uPrev); + break; + } + } + + DMAR_UNLOCK(pDevIns, pThisCC); + LogFlowFunc(("offReg=%#x uRegWritten=%#RX64 rc=%Rrc\n", offReg, uRegWritten, VBOXSTRICTRC_VAL(rcStrict))); + return rcStrict; + } + + return VINF_IOM_MMIO_UNUSED_FF; +} + + +/** + * @callback_method_impl{FNIOMMMIONEWREAD} + */ +static DECLCALLBACK(VBOXSTRICTRC) dmarMmioRead(PPDMDEVINS pDevIns, void *pvUser, RTGCPHYS off, void *pv, unsigned cb) +{ + RT_NOREF1(pvUser); + DMAR_ASSERT_MMIO_ACCESS_RET(off, cb); + + PDMAR pThis = PDMDEVINS_2_DATA(pDevIns, PDMAR); + STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMmioRead)); + + uint16_t const offReg = off; + uint16_t const offLast = offReg + cb - 1; + if (DMAR_IS_MMIO_OFF_VALID(offLast)) + { + PCDMARCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PCDMARCC); + DMAR_LOCK_RET(pDevIns, pThisCC, VINF_IOM_R3_MMIO_READ); + + if (cb == 8) + { + *(uint64_t *)pv = dmarRegRead64(pThis, offReg); + LogFlowFunc(("offReg=%#x pv=%#RX64\n", offReg, *(uint64_t *)pv)); + } + else + { + *(uint32_t *)pv = dmarRegRead32(pThis, offReg); + LogFlowFunc(("offReg=%#x pv=%#RX32\n", offReg, *(uint32_t *)pv)); + } + + DMAR_UNLOCK(pDevIns, pThisCC); + return VINF_SUCCESS; + } + + return VINF_IOM_MMIO_UNUSED_FF; +} + + +#ifdef IN_RING3 +/** + * Process requests in the invalidation queue. + * + * @param pDevIns The IOMMU device instance. + * @param pvRequests The requests to process. + * @param cbRequests The size of all requests (in bytes). + * @param fDw The descriptor width (VTD_IQA_REG_DW_128_BIT or + * VTD_IQA_REG_DW_256_BIT). + * @param fTtm The table translation mode. Must not be VTD_TTM_RSVD. + */ +static void dmarR3InvQueueProcessRequests(PPDMDEVINS pDevIns, void const *pvRequests, uint32_t cbRequests, uint8_t fDw, + uint8_t fTtm) +{ +#define DMAR_IQE_FAULT_RECORD_RET(a_enmDiag, a_enmIqei) \ + do \ + { \ + dmarIqeFaultRecord(pDevIns, (a_enmDiag), (a_enmIqei)); \ + return; \ + } while (0) + + PDMAR pThis = PDMDEVINS_2_DATA(pDevIns, PDMAR); + PCDMARR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PCDMARR3); + + DMAR_ASSERT_LOCK_IS_NOT_OWNER(pDevIns, pThisR3); + Assert(fTtm != VTD_TTM_RSVD); /* Should've beeen handled by caller. */ + + /* + * The below check is redundant since we check both TTM and DW for each + * descriptor type we process. However, the order of errors reported by hardware + * may differ hence this is kept commented out but not removed if we need to + * change this in the future. + * + * In our implementation, we would report the descriptor type as invalid, + * while on real hardware it may report descriptor width as invalid. + * The Intel VT-d spec. is not clear which error takes preceedence. + */ +#if 0 + /* + * Verify that 128-bit descriptors are not used when operating in scalable mode. + * We don't check this while software writes IQA_REG but defer it until now because + * RTADDR_REG can be updated lazily (via GCMD_REG.SRTP). The 256-bit descriptor check + * -IS- performed when software writes IQA_REG since it only requires checking against + * immutable hardware features. + */ + if ( fTtm != VTD_TTM_SCALABLE_MODE + || fDw != VTD_IQA_REG_DW_128_BIT) + { /* likely */ } + else + DMAR_IQE_FAULT_RECORD_RET(kDmarDiag_IqaReg_Dw_128_Invalid, VTDIQEI_INVALID_DESCRIPTOR_WIDTH); +#endif + + /* + * Process requests in FIFO order. + */ + uint8_t const cbDsc = fDw == VTD_IQA_REG_DW_256_BIT ? 32 : 16; + for (uint32_t offDsc = 0; offDsc < cbRequests; offDsc += cbDsc) + { + uint64_t const *puDscQwords = (uint64_t const *)((uintptr_t)pvRequests + offDsc); + uint64_t const uQword0 = puDscQwords[0]; + uint64_t const uQword1 = puDscQwords[1]; + uint8_t const fDscType = VTD_GENERIC_INV_DSC_GET_TYPE(uQword0); + switch (fDscType) + { + case VTD_INV_WAIT_DSC_TYPE: + { + /* Validate descriptor type. */ + if ( fTtm == VTD_TTM_LEGACY_MODE + || fDw == VTD_IQA_REG_DW_256_BIT) + { /* likely */ } + else + DMAR_IQE_FAULT_RECORD_RET(kDmarDiag_Iqei_Inv_Wait_Dsc_Invalid, VTDIQEI_INVALID_DESCRIPTOR_TYPE); + + /* Validate reserved bits. */ + uint64_t const fValidMask0 = !(pThis->fExtCapReg & VTD_BF_ECAP_REG_PDS_MASK) + ? VTD_INV_WAIT_DSC_0_VALID_MASK & ~VTD_BF_0_INV_WAIT_DSC_PD_MASK + : VTD_INV_WAIT_DSC_0_VALID_MASK; + if ( !(uQword0 & ~fValidMask0) + && !(uQword1 & ~VTD_INV_WAIT_DSC_1_VALID_MASK)) + { /* likely */ } + else + DMAR_IQE_FAULT_RECORD_RET(kDmarDiag_Iqei_Inv_Wait_Dsc_0_1_Rsvd, VTDIQEI_RSVD_FIELD_VIOLATION); + + if (fDw == VTD_IQA_REG_DW_256_BIT) + { + if ( !puDscQwords[2] + && !puDscQwords[3]) + { /* likely */ } + else + DMAR_IQE_FAULT_RECORD_RET(kDmarDiag_Iqei_Inv_Wait_Dsc_2_3_Rsvd, VTDIQEI_RSVD_FIELD_VIOLATION); + } + + /* Perform status write (this must be done prior to generating the completion interrupt). */ + bool const fSw = RT_BF_GET(uQword0, VTD_BF_0_INV_WAIT_DSC_SW); + if (fSw) + { + uint32_t const uStatus = RT_BF_GET(uQword0, VTD_BF_0_INV_WAIT_DSC_STDATA); + RTGCPHYS const GCPhysStatus = uQword1 & VTD_BF_1_INV_WAIT_DSC_STADDR_MASK; + int const rc = PDMDevHlpPhysWrite(pDevIns, GCPhysStatus, (void const*)&uStatus, sizeof(uStatus)); + AssertRC(rc); + } + + /* Generate invalidation event interrupt. */ + bool const fIf = RT_BF_GET(uQword0, VTD_BF_0_INV_WAIT_DSC_IF); + if (fIf) + { + DMAR_LOCK(pDevIns, pThisR3); + dmarR3InvEventRaiseInterrupt(pDevIns); + DMAR_UNLOCK(pDevIns, pThisR3); + } + + STAM_COUNTER_INC(&pThis->StatInvWaitDsc); + break; + } + + case VTD_CC_INV_DSC_TYPE: STAM_COUNTER_INC(&pThis->StatCcInvDsc); break; + case VTD_IOTLB_INV_DSC_TYPE: STAM_COUNTER_INC(&pThis->StatIotlbInvDsc); break; + case VTD_DEV_TLB_INV_DSC_TYPE: STAM_COUNTER_INC(&pThis->StatDevtlbInvDsc); break; + case VTD_IEC_INV_DSC_TYPE: STAM_COUNTER_INC(&pThis->StatIecInvDsc); break; + case VTD_P_IOTLB_INV_DSC_TYPE: STAM_COUNTER_INC(&pThis->StatPasidIotlbInvDsc); break; + case VTD_PC_INV_DSC_TYPE: STAM_COUNTER_INC(&pThis->StatPasidCacheInvDsc); break; + case VTD_P_DEV_TLB_INV_DSC_TYPE: STAM_COUNTER_INC(&pThis->StatPasidDevtlbInvDsc); break; + default: + { + /* Stop processing further requests. */ + LogFunc(("Invalid descriptor type: %#x\n", fDscType)); + DMAR_IQE_FAULT_RECORD_RET(kDmarDiag_Iqei_Dsc_Type_Invalid, VTDIQEI_INVALID_DESCRIPTOR_TYPE); + } + } + } +#undef DMAR_IQE_FAULT_RECORD_RET +} + + +/** + * The invalidation-queue thread. + * + * @returns VBox status code. + * @param pDevIns The IOMMU device instance. + * @param pThread The command thread. + */ +static DECLCALLBACK(int) dmarR3InvQueueThread(PPDMDEVINS pDevIns, PPDMTHREAD pThread) +{ + NOREF(pThread); + LogFlowFunc(("\n")); + + if (pThread->enmState == PDMTHREADSTATE_INITIALIZING) + return VINF_SUCCESS; + + /* + * Pre-allocate the maximum size of the invalidation queue allowed by the spec. + * This prevents trashing the heap as well as deal with out-of-memory situations + * up-front while starting the VM. It also simplifies the code from having to + * dynamically grow/shrink the allocation based on how software sizes the queue. + * Guests normally don't alter the queue size all the time, but that's not an + * assumption we can make. + */ + uint8_t const cMaxPages = 1 << VTD_BF_IQA_REG_QS_MASK; + size_t const cbMaxQs = cMaxPages << X86_PAGE_SHIFT; + void *pvRequests = RTMemAllocZ(cbMaxQs); + AssertPtrReturn(pvRequests, VERR_NO_MEMORY); + + PDMAR pThis = PDMDEVINS_2_DATA(pDevIns, PDMAR); + PCDMARR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PCDMARR3); + + while (pThread->enmState == PDMTHREADSTATE_RUNNING) + { + /* + * Sleep until we are woken up. + */ + { + int const rc = PDMDevHlpSUPSemEventWaitNoResume(pDevIns, pThis->hEvtInvQueue, RT_INDEFINITE_WAIT); + AssertLogRelMsgReturn(RT_SUCCESS(rc) || rc == VERR_INTERRUPTED, ("%Rrc\n", rc), rc); + if (RT_UNLIKELY(pThread->enmState != PDMTHREADSTATE_RUNNING)) + break; + } + + DMAR_LOCK(pDevIns, pThisR3); + if (dmarInvQueueCanProcessRequests(pThis)) + { + uint32_t offQueueHead; + uint32_t offQueueTail; + bool const fIsEmpty = dmarInvQueueIsEmptyEx(pThis, &offQueueHead, &offQueueTail); + if (!fIsEmpty) + { + /* + * Get the current queue size, descriptor width, queue base address and the + * table translation mode while the lock is still held. + */ + uint64_t const uIqaReg = dmarRegReadRaw64(pThis, VTD_MMIO_OFF_IQA_REG); + uint8_t const cQueuePages = 1 << (uIqaReg & VTD_BF_IQA_REG_QS_MASK); + uint32_t const cbQueue = cQueuePages << X86_PAGE_SHIFT; + uint8_t const fDw = RT_BF_GET(uIqaReg, VTD_BF_IQA_REG_DW); + uint8_t const fTtm = RT_BF_GET(pThis->uRtaddrReg, VTD_BF_RTADDR_REG_TTM); + RTGCPHYS const GCPhysRequests = (uIqaReg & VTD_BF_IQA_REG_IQA_MASK) + offQueueHead; + + /* Paranoia. */ + Assert(cbQueue <= cbMaxQs); + Assert(!(offQueueTail & ~VTD_BF_IQT_REG_QT_MASK)); + Assert(!(offQueueHead & ~VTD_BF_IQH_REG_QH_MASK)); + Assert(fDw != VTD_IQA_REG_DW_256_BIT || !(offQueueTail & RT_BIT(4))); + Assert(fDw != VTD_IQA_REG_DW_256_BIT || !(offQueueHead & RT_BIT(4))); + Assert(offQueueHead < cbQueue); + + /* + * A table translation mode of "reserved" isn't valid for any descriptor type. + * However, RTADDR_REG can be modified in parallel to invalidation-queue processing, + * but if ESRTPS is support, we will perform a global invalidation when software + * changes RTADDR_REG, or it's the responsibility of software to do it explicitly. + * So caching TTM while reading all descriptors should not be a problem. + * + * Also, validate the queue tail offset as it's mutable by software. + */ + if ( fTtm != VTD_TTM_RSVD + && offQueueTail < cbQueue) + { + /* Don't hold the lock while reading (a potentially large amount of) requests */ + DMAR_UNLOCK(pDevIns, pThisR3); + + int rc; + uint32_t cbRequests; + if (offQueueTail > offQueueHead) + { + /* The requests have not wrapped around, read them in one go. */ + cbRequests = offQueueTail - offQueueHead; + rc = PDMDevHlpPhysReadMeta(pDevIns, GCPhysRequests, pvRequests, cbRequests); + } + else + { + /* The requests have wrapped around, read forward and wrapped-around. */ + uint32_t const cbForward = cbQueue - offQueueHead; + rc = PDMDevHlpPhysReadMeta(pDevIns, GCPhysRequests, pvRequests, cbForward); + + uint32_t const cbWrapped = offQueueTail; + if ( RT_SUCCESS(rc) + && cbWrapped > 0) + { + rc = PDMDevHlpPhysReadMeta(pDevIns, GCPhysRequests + cbForward, + (void *)((uintptr_t)pvRequests + cbForward), cbWrapped); + } + cbRequests = cbForward + cbWrapped; + } + + /* Re-acquire the lock since we need to update device state. */ + DMAR_LOCK(pDevIns, pThisR3); + + if (RT_SUCCESS(rc)) + { + /* Indicate to software we've fetched all requests. */ + dmarRegWriteRaw64(pThis, VTD_MMIO_OFF_IQH_REG, offQueueTail); + + /* Don't hold the lock while processing requests. */ + DMAR_UNLOCK(pDevIns, pThisR3); + + /* Process all requests. */ + Assert(cbRequests <= cbQueue); + dmarR3InvQueueProcessRequests(pDevIns, pvRequests, cbRequests, fDw, fTtm); + + /* + * We've processed all requests and the lock shouldn't be held at this point. + * Using 'continue' here allows us to skip re-acquiring the lock just to release + * it again before going back to the thread loop. It's a bit ugly but it certainly + * helps with performance. + */ + DMAR_ASSERT_LOCK_IS_NOT_OWNER(pDevIns, pThisR3); + continue; + } + dmarIqeFaultRecord(pDevIns, kDmarDiag_IqaReg_Dsc_Fetch_Error, VTDIQEI_FETCH_DESCRIPTOR_ERR); + } + else + { + if (fTtm == VTD_TTM_RSVD) + dmarIqeFaultRecord(pDevIns, kDmarDiag_Iqei_Ttm_Rsvd, VTDIQEI_INVALID_TTM); + else + { + Assert(offQueueTail >= cbQueue); + dmarIqeFaultRecord(pDevIns, kDmarDiag_IqtReg_Qt_Invalid, VTDIQEI_INVALID_TAIL_PTR); + } + } + } + } + DMAR_UNLOCK(pDevIns, pThisR3); + } + + RTMemFree(pvRequests); + pvRequests = NULL; + + LogFlowFunc(("Invalidation-queue thread terminating\n")); + return VINF_SUCCESS; +} + + +/** + * Wakes up the invalidation-queue thread so it can respond to a state + * change. + * + * @returns VBox status code. + * @param pDevIns The IOMMU device instance. + * @param pThread The invalidation-queue thread. + * + * @thread EMT. + */ +static DECLCALLBACK(int) dmarR3InvQueueThreadWakeUp(PPDMDEVINS pDevIns, PPDMTHREAD pThread) +{ + RT_NOREF(pThread); + LogFlowFunc(("\n")); + PCDMAR pThis = PDMDEVINS_2_DATA(pDevIns, PDMAR); + return PDMDevHlpSUPSemEventSignal(pDevIns, pThis->hEvtInvQueue); +} + + +/** + * @callback_method_impl{FNDBGFHANDLERDEV} + */ +static DECLCALLBACK(void) dmarR3DbgInfo(PPDMDEVINS pDevIns, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + PCDMAR pThis = PDMDEVINS_2_DATA(pDevIns, PDMAR); + PCDMARR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PCDMARR3); + bool const fVerbose = RTStrCmp(pszArgs, "verbose") == 0; + + /* + * We lock the device to get a consistent register state as it is + * ASSUMED pHlp->pfnPrintf is expensive, so we copy the registers (the + * ones we care about here) into temporaries and release the lock ASAP. + * + * Order of register being read and outputted is in accordance with the + * spec. for no particular reason. + * See Intel VT-d spec. 10.4 "Register Descriptions". + */ + DMAR_LOCK(pDevIns, pThisR3); + + DMARDIAG const enmDiag = pThis->enmDiag; + uint32_t const uVerReg = dmarRegReadRaw32(pThis, VTD_MMIO_OFF_VER_REG); + uint64_t const uCapReg = dmarRegReadRaw64(pThis, VTD_MMIO_OFF_CAP_REG); + uint64_t const uEcapReg = dmarRegReadRaw64(pThis, VTD_MMIO_OFF_ECAP_REG); + uint32_t const uGcmdReg = dmarRegReadRaw32(pThis, VTD_MMIO_OFF_GCMD_REG); + uint32_t const uGstsReg = dmarRegReadRaw32(pThis, VTD_MMIO_OFF_GSTS_REG); + uint64_t const uRtaddrReg = dmarRegReadRaw64(pThis, VTD_MMIO_OFF_RTADDR_REG); + uint64_t const uCcmdReg = dmarRegReadRaw64(pThis, VTD_MMIO_OFF_CCMD_REG); + uint32_t const uFstsReg = dmarRegReadRaw32(pThis, VTD_MMIO_OFF_FSTS_REG); + uint32_t const uFectlReg = dmarRegReadRaw32(pThis, VTD_MMIO_OFF_FECTL_REG); + uint32_t const uFedataReg = dmarRegReadRaw32(pThis, VTD_MMIO_OFF_FEDATA_REG); + uint32_t const uFeaddrReg = dmarRegReadRaw32(pThis, VTD_MMIO_OFF_FEADDR_REG); + uint32_t const uFeuaddrReg = dmarRegReadRaw32(pThis, VTD_MMIO_OFF_FEUADDR_REG); + uint64_t const uAflogReg = dmarRegReadRaw64(pThis, VTD_MMIO_OFF_AFLOG_REG); + uint32_t const uPmenReg = dmarRegReadRaw32(pThis, VTD_MMIO_OFF_PMEN_REG); + uint32_t const uPlmbaseReg = dmarRegReadRaw32(pThis, VTD_MMIO_OFF_PLMBASE_REG); + uint32_t const uPlmlimitReg = dmarRegReadRaw32(pThis, VTD_MMIO_OFF_PLMLIMIT_REG); + uint64_t const uPhmbaseReg = dmarRegReadRaw64(pThis, VTD_MMIO_OFF_PHMBASE_REG); + uint64_t const uPhmlimitReg = dmarRegReadRaw64(pThis, VTD_MMIO_OFF_PHMLIMIT_REG); + uint64_t const uIqhReg = dmarRegReadRaw64(pThis, VTD_MMIO_OFF_IQH_REG); + uint64_t const uIqtReg = dmarRegReadRaw64(pThis, VTD_MMIO_OFF_IQT_REG); + uint64_t const uIqaReg = dmarRegReadRaw64(pThis, VTD_MMIO_OFF_IQA_REG); + uint32_t const uIcsReg = dmarRegReadRaw32(pThis, VTD_MMIO_OFF_ICS_REG); + uint32_t const uIectlReg = dmarRegReadRaw32(pThis, VTD_MMIO_OFF_IECTL_REG); + uint32_t const uIedataReg = dmarRegReadRaw32(pThis, VTD_MMIO_OFF_IEDATA_REG); + uint32_t const uIeaddrReg = dmarRegReadRaw32(pThis, VTD_MMIO_OFF_IEADDR_REG); + uint32_t const uIeuaddrReg = dmarRegReadRaw32(pThis, VTD_MMIO_OFF_IEUADDR_REG); + uint64_t const uIqercdReg = dmarRegReadRaw64(pThis, VTD_MMIO_OFF_IQERCD_REG); + uint64_t const uIrtaReg = dmarRegReadRaw64(pThis, VTD_MMIO_OFF_IRTA_REG); + uint64_t const uPqhReg = dmarRegReadRaw64(pThis, VTD_MMIO_OFF_PQH_REG); + uint64_t const uPqtReg = dmarRegReadRaw64(pThis, VTD_MMIO_OFF_PQT_REG); + uint64_t const uPqaReg = dmarRegReadRaw64(pThis, VTD_MMIO_OFF_PQA_REG); + uint32_t const uPrsReg = dmarRegReadRaw32(pThis, VTD_MMIO_OFF_PRS_REG); + uint32_t const uPectlReg = dmarRegReadRaw32(pThis, VTD_MMIO_OFF_PECTL_REG); + uint32_t const uPedataReg = dmarRegReadRaw32(pThis, VTD_MMIO_OFF_PEDATA_REG); + uint32_t const uPeaddrReg = dmarRegReadRaw32(pThis, VTD_MMIO_OFF_PEADDR_REG); + uint32_t const uPeuaddrReg = dmarRegReadRaw32(pThis, VTD_MMIO_OFF_PEUADDR_REG); + uint64_t const uMtrrcapReg = dmarRegReadRaw64(pThis, VTD_MMIO_OFF_MTRRCAP_REG); + uint64_t const uMtrrdefReg = dmarRegReadRaw64(pThis, VTD_MMIO_OFF_MTRRDEF_REG); + + DMAR_UNLOCK(pDevIns, pThisR3); + + const char *const pszDiag = enmDiag < RT_ELEMENTS(g_apszDmarDiagDesc) ? g_apszDmarDiagDesc[enmDiag] : "(Unknown)"; + pHlp->pfnPrintf(pHlp, "Intel-IOMMU:\n"); + pHlp->pfnPrintf(pHlp, " Diag = %s\n", pszDiag); + + /* + * Non-verbose output. + */ + if (!fVerbose) + { + pHlp->pfnPrintf(pHlp, " VER_REG = %#RX32\n", uVerReg); + pHlp->pfnPrintf(pHlp, " CAP_REG = %#RX64\n", uCapReg); + pHlp->pfnPrintf(pHlp, " ECAP_REG = %#RX64\n", uEcapReg); + pHlp->pfnPrintf(pHlp, " GCMD_REG = %#RX32\n", uGcmdReg); + pHlp->pfnPrintf(pHlp, " GSTS_REG = %#RX32\n", uGstsReg); + pHlp->pfnPrintf(pHlp, " RTADDR_REG = %#RX64\n", uRtaddrReg); + pHlp->pfnPrintf(pHlp, " CCMD_REG = %#RX64\n", uCcmdReg); + pHlp->pfnPrintf(pHlp, " FSTS_REG = %#RX32\n", uFstsReg); + pHlp->pfnPrintf(pHlp, " FECTL_REG = %#RX32\n", uFectlReg); + pHlp->pfnPrintf(pHlp, " FEDATA_REG = %#RX32\n", uFedataReg); + pHlp->pfnPrintf(pHlp, " FEADDR_REG = %#RX32\n", uFeaddrReg); + pHlp->pfnPrintf(pHlp, " FEUADDR_REG = %#RX32\n", uFeuaddrReg); + pHlp->pfnPrintf(pHlp, " AFLOG_REG = %#RX64\n", uAflogReg); + pHlp->pfnPrintf(pHlp, " PMEN_REG = %#RX32\n", uPmenReg); + pHlp->pfnPrintf(pHlp, " PLMBASE_REG = %#RX32\n", uPlmbaseReg); + pHlp->pfnPrintf(pHlp, " PLMLIMIT_REG = %#RX32\n", uPlmlimitReg); + pHlp->pfnPrintf(pHlp, " PHMBASE_REG = %#RX64\n", uPhmbaseReg); + pHlp->pfnPrintf(pHlp, " PHMLIMIT_REG = %#RX64\n", uPhmlimitReg); + pHlp->pfnPrintf(pHlp, " IQH_REG = %#RX64\n", uIqhReg); + pHlp->pfnPrintf(pHlp, " IQT_REG = %#RX64\n", uIqtReg); + pHlp->pfnPrintf(pHlp, " IQA_REG = %#RX64\n", uIqaReg); + pHlp->pfnPrintf(pHlp, " ICS_REG = %#RX32\n", uIcsReg); + pHlp->pfnPrintf(pHlp, " IECTL_REG = %#RX32\n", uIectlReg); + pHlp->pfnPrintf(pHlp, " IEDATA_REG = %#RX32\n", uIedataReg); + pHlp->pfnPrintf(pHlp, " IEADDR_REG = %#RX32\n", uIeaddrReg); + pHlp->pfnPrintf(pHlp, " IEUADDR_REG = %#RX32\n", uIeuaddrReg); + pHlp->pfnPrintf(pHlp, " IQERCD_REG = %#RX64\n", uIqercdReg); + pHlp->pfnPrintf(pHlp, " IRTA_REG = %#RX64\n", uIrtaReg); + pHlp->pfnPrintf(pHlp, " PQH_REG = %#RX64\n", uPqhReg); + pHlp->pfnPrintf(pHlp, " PQT_REG = %#RX64\n", uPqtReg); + pHlp->pfnPrintf(pHlp, " PQA_REG = %#RX64\n", uPqaReg); + pHlp->pfnPrintf(pHlp, " PRS_REG = %#RX32\n", uPrsReg); + pHlp->pfnPrintf(pHlp, " PECTL_REG = %#RX32\n", uPectlReg); + pHlp->pfnPrintf(pHlp, " PEDATA_REG = %#RX32\n", uPedataReg); + pHlp->pfnPrintf(pHlp, " PEADDR_REG = %#RX32\n", uPeaddrReg); + pHlp->pfnPrintf(pHlp, " PEUADDR_REG = %#RX32\n", uPeuaddrReg); + pHlp->pfnPrintf(pHlp, " MTRRCAP_REG = %#RX64\n", uMtrrcapReg); + pHlp->pfnPrintf(pHlp, " MTRRDEF_REG = %#RX64\n", uMtrrdefReg); + pHlp->pfnPrintf(pHlp, "\n"); + return; + } + + /* + * Verbose output. + */ + pHlp->pfnPrintf(pHlp, " VER_REG = %#RX32\n", uVerReg); + { + pHlp->pfnPrintf(pHlp, " MAJ = %#x\n", RT_BF_GET(uVerReg, VTD_BF_VER_REG_MAX)); + pHlp->pfnPrintf(pHlp, " MIN = %#x\n", RT_BF_GET(uVerReg, VTD_BF_VER_REG_MIN)); + } + pHlp->pfnPrintf(pHlp, " CAP_REG = %#RX64\n", uCapReg); + { + uint8_t const uMgaw = RT_BF_GET(uCapReg, VTD_BF_CAP_REG_MGAW); + uint8_t const uNfr = RT_BF_GET(uCapReg, VTD_BF_CAP_REG_NFR); + pHlp->pfnPrintf(pHlp, " ND = %u\n", RT_BF_GET(uCapReg, VTD_BF_CAP_REG_ND)); + pHlp->pfnPrintf(pHlp, " AFL = %RTbool\n", RT_BF_GET(uCapReg, VTD_BF_CAP_REG_AFL)); + pHlp->pfnPrintf(pHlp, " RWBF = %RTbool\n", RT_BF_GET(uCapReg, VTD_BF_CAP_REG_RWBF)); + pHlp->pfnPrintf(pHlp, " PLMR = %RTbool\n", RT_BF_GET(uCapReg, VTD_BF_CAP_REG_PLMR)); + pHlp->pfnPrintf(pHlp, " PHMR = %RTbool\n", RT_BF_GET(uCapReg, VTD_BF_CAP_REG_PHMR)); + pHlp->pfnPrintf(pHlp, " CM = %RTbool\n", RT_BF_GET(uCapReg, VTD_BF_CAP_REG_CM)); + pHlp->pfnPrintf(pHlp, " SAGAW = %#x\n", RT_BF_GET(uCapReg, VTD_BF_CAP_REG_SAGAW)); + pHlp->pfnPrintf(pHlp, " MGAW = %#x (%u bits)\n", uMgaw, uMgaw + 1); + pHlp->pfnPrintf(pHlp, " ZLR = %RTbool\n", RT_BF_GET(uCapReg, VTD_BF_CAP_REG_ZLR)); + pHlp->pfnPrintf(pHlp, " FRO = %#x bytes\n", RT_BF_GET(uCapReg, VTD_BF_CAP_REG_FRO)); + pHlp->pfnPrintf(pHlp, " SLLPS = %#x\n", RT_BF_GET(uCapReg, VTD_BF_CAP_REG_SLLPS)); + pHlp->pfnPrintf(pHlp, " PSI = %RTbool\n", RT_BF_GET(uCapReg, VTD_BF_CAP_REG_PSI)); + pHlp->pfnPrintf(pHlp, " NFR = %u (%u FRCD register%s)\n", uNfr, uNfr + 1, uNfr > 0 ? "s" : ""); + pHlp->pfnPrintf(pHlp, " MAMV = %#x\n", RT_BF_GET(uCapReg, VTD_BF_CAP_REG_MAMV)); + pHlp->pfnPrintf(pHlp, " DWD = %RTbool\n", RT_BF_GET(uCapReg, VTD_BF_CAP_REG_DWD)); + pHlp->pfnPrintf(pHlp, " DRD = %RTbool\n", RT_BF_GET(uCapReg, VTD_BF_CAP_REG_DRD)); + pHlp->pfnPrintf(pHlp, " FL1GP = %RTbool\n", RT_BF_GET(uCapReg, VTD_BF_CAP_REG_FL1GP)); + pHlp->pfnPrintf(pHlp, " PI = %RTbool\n", RT_BF_GET(uCapReg, VTD_BF_CAP_REG_PI)); + pHlp->pfnPrintf(pHlp, " FL5LP = %RTbool\n", RT_BF_GET(uCapReg, VTD_BF_CAP_REG_FL5LP)); + pHlp->pfnPrintf(pHlp, " ESIRTPS = %RTbool\n", RT_BF_GET(uCapReg, VTD_BF_CAP_REG_ESIRTPS)); + pHlp->pfnPrintf(pHlp, " ESRTPS = %RTbool\n", RT_BF_GET(uCapReg, VTD_BF_CAP_REG_ESRTPS)); + } + pHlp->pfnPrintf(pHlp, " ECAP_REG = %#RX64\n", uEcapReg); + { + uint8_t const uPss = RT_BF_GET(uEcapReg, VTD_BF_ECAP_REG_PSS); + pHlp->pfnPrintf(pHlp, " C = %RTbool\n", RT_BF_GET(uEcapReg, VTD_BF_ECAP_REG_C)); + pHlp->pfnPrintf(pHlp, " QI = %RTbool\n", RT_BF_GET(uEcapReg, VTD_BF_ECAP_REG_QI)); + pHlp->pfnPrintf(pHlp, " DT = %RTbool\n", RT_BF_GET(uEcapReg, VTD_BF_ECAP_REG_DT)); + pHlp->pfnPrintf(pHlp, " IR = %RTbool\n", RT_BF_GET(uEcapReg, VTD_BF_ECAP_REG_IR)); + pHlp->pfnPrintf(pHlp, " EIM = %RTbool\n", RT_BF_GET(uEcapReg, VTD_BF_ECAP_REG_EIM)); + pHlp->pfnPrintf(pHlp, " PT = %RTbool\n", RT_BF_GET(uEcapReg, VTD_BF_ECAP_REG_PT)); + pHlp->pfnPrintf(pHlp, " SC = %RTbool\n", RT_BF_GET(uEcapReg, VTD_BF_ECAP_REG_SC)); + pHlp->pfnPrintf(pHlp, " IRO = %#x bytes\n", RT_BF_GET(uEcapReg, VTD_BF_ECAP_REG_IRO)); + pHlp->pfnPrintf(pHlp, " MHMV = %#x\n", RT_BF_GET(uEcapReg, VTD_BF_ECAP_REG_MHMV)); + pHlp->pfnPrintf(pHlp, " MTS = %RTbool\n", RT_BF_GET(uEcapReg, VTD_BF_ECAP_REG_MTS)); + pHlp->pfnPrintf(pHlp, " NEST = %RTbool\n", RT_BF_GET(uEcapReg, VTD_BF_ECAP_REG_NEST)); + pHlp->pfnPrintf(pHlp, " PRS = %RTbool\n", RT_BF_GET(uEcapReg, VTD_BF_ECAP_REG_PRS)); + pHlp->pfnPrintf(pHlp, " ERS = %RTbool\n", RT_BF_GET(uEcapReg, VTD_BF_ECAP_REG_ERS)); + pHlp->pfnPrintf(pHlp, " SRS = %RTbool\n", RT_BF_GET(uEcapReg, VTD_BF_ECAP_REG_SRS)); + pHlp->pfnPrintf(pHlp, " NWFS = %RTbool\n", RT_BF_GET(uEcapReg, VTD_BF_ECAP_REG_NWFS)); + pHlp->pfnPrintf(pHlp, " EAFS = %RTbool\n", RT_BF_GET(uEcapReg, VTD_BF_ECAP_REG_EAFS)); + pHlp->pfnPrintf(pHlp, " PSS = %u (%u bits)\n", uPss, uPss > 0 ? uPss + 1 : 0); + pHlp->pfnPrintf(pHlp, " PASID = %RTbool\n", RT_BF_GET(uEcapReg, VTD_BF_ECAP_REG_PASID)); + pHlp->pfnPrintf(pHlp, " DIT = %RTbool\n", RT_BF_GET(uEcapReg, VTD_BF_ECAP_REG_DIT)); + pHlp->pfnPrintf(pHlp, " PDS = %RTbool\n", RT_BF_GET(uEcapReg, VTD_BF_ECAP_REG_PDS)); + pHlp->pfnPrintf(pHlp, " SMTS = %RTbool\n", RT_BF_GET(uEcapReg, VTD_BF_ECAP_REG_SMTS)); + pHlp->pfnPrintf(pHlp, " VCS = %RTbool\n", RT_BF_GET(uEcapReg, VTD_BF_ECAP_REG_VCS)); + pHlp->pfnPrintf(pHlp, " SLADS = %RTbool\n", RT_BF_GET(uEcapReg, VTD_BF_ECAP_REG_SLADS)); + pHlp->pfnPrintf(pHlp, " SLTS = %RTbool\n", RT_BF_GET(uEcapReg, VTD_BF_ECAP_REG_SLTS)); + pHlp->pfnPrintf(pHlp, " FLTS = %RTbool\n", RT_BF_GET(uEcapReg, VTD_BF_ECAP_REG_FLTS)); + pHlp->pfnPrintf(pHlp, " SMPWCS = %RTbool\n", RT_BF_GET(uEcapReg, VTD_BF_ECAP_REG_SMPWCS)); + pHlp->pfnPrintf(pHlp, " RPS = %RTbool\n", RT_BF_GET(uEcapReg, VTD_BF_ECAP_REG_RPS)); + pHlp->pfnPrintf(pHlp, " ADMS = %RTbool\n", RT_BF_GET(uEcapReg, VTD_BF_ECAP_REG_ADMS)); + pHlp->pfnPrintf(pHlp, " RPRIVS = %RTbool\n", RT_BF_GET(uEcapReg, VTD_BF_ECAP_REG_RPRIVS)); + } + pHlp->pfnPrintf(pHlp, " GCMD_REG = %#RX32\n", uGcmdReg); + { + uint8_t const fCfi = RT_BF_GET(uGcmdReg, VTD_BF_GCMD_REG_CFI); + pHlp->pfnPrintf(pHlp, " CFI = %u (%s)\n", fCfi, fCfi ? "Passthrough" : "Blocked"); + pHlp->pfnPrintf(pHlp, " SIRTP = %u\n", RT_BF_GET(uGcmdReg, VTD_BF_GCMD_REG_SIRTP)); + pHlp->pfnPrintf(pHlp, " IRE = %u\n", RT_BF_GET(uGcmdReg, VTD_BF_GCMD_REG_IRE)); + pHlp->pfnPrintf(pHlp, " QIE = %u\n", RT_BF_GET(uGcmdReg, VTD_BF_GCMD_REG_QIE)); + pHlp->pfnPrintf(pHlp, " WBF = %u\n", RT_BF_GET(uGcmdReg, VTD_BF_GCMD_REG_WBF)); + pHlp->pfnPrintf(pHlp, " EAFL = %u\n", RT_BF_GET(uGcmdReg, VTD_BF_GCMD_REG_SFL)); + pHlp->pfnPrintf(pHlp, " SFL = %u\n", RT_BF_GET(uGcmdReg, VTD_BF_GCMD_REG_SFL)); + pHlp->pfnPrintf(pHlp, " SRTP = %u\n", RT_BF_GET(uGcmdReg, VTD_BF_GCMD_REG_SRTP)); + pHlp->pfnPrintf(pHlp, " TE = %u\n", RT_BF_GET(uGcmdReg, VTD_BF_GCMD_REG_TE)); + } + pHlp->pfnPrintf(pHlp, " GSTS_REG = %#RX32\n", uGstsReg); + { + uint8_t const fCfis = RT_BF_GET(uGstsReg, VTD_BF_GSTS_REG_CFIS); + pHlp->pfnPrintf(pHlp, " CFIS = %u (%s)\n", fCfis, fCfis ? "Passthrough" : "Blocked"); + pHlp->pfnPrintf(pHlp, " IRTPS = %u\n", RT_BF_GET(uGstsReg, VTD_BF_GSTS_REG_IRTPS)); + pHlp->pfnPrintf(pHlp, " IRES = %u\n", RT_BF_GET(uGstsReg, VTD_BF_GSTS_REG_IRES)); + pHlp->pfnPrintf(pHlp, " QIES = %u\n", RT_BF_GET(uGstsReg, VTD_BF_GSTS_REG_QIES)); + pHlp->pfnPrintf(pHlp, " WBFS = %u\n", RT_BF_GET(uGstsReg, VTD_BF_GSTS_REG_WBFS)); + pHlp->pfnPrintf(pHlp, " AFLS = %u\n", RT_BF_GET(uGstsReg, VTD_BF_GSTS_REG_AFLS)); + pHlp->pfnPrintf(pHlp, " FLS = %u\n", RT_BF_GET(uGstsReg, VTD_BF_GSTS_REG_FLS)); + pHlp->pfnPrintf(pHlp, " RTPS = %u\n", RT_BF_GET(uGstsReg, VTD_BF_GSTS_REG_RTPS)); + pHlp->pfnPrintf(pHlp, " TES = %u\n", RT_BF_GET(uGstsReg, VTD_BF_GSTS_REG_TES)); + } + pHlp->pfnPrintf(pHlp, " RTADDR_REG = %#RX64\n", uRtaddrReg); + { + uint8_t const uTtm = RT_BF_GET(uRtaddrReg, VTD_BF_RTADDR_REG_TTM); + pHlp->pfnPrintf(pHlp, " RTA = %#RX64\n", uRtaddrReg & VTD_BF_RTADDR_REG_RTA_MASK); + pHlp->pfnPrintf(pHlp, " TTM = %u (%s)\n", uTtm, vtdRtaddrRegGetTtmDesc(uTtm)); + } + pHlp->pfnPrintf(pHlp, " CCMD_REG = %#RX64\n", uCcmdReg); + pHlp->pfnPrintf(pHlp, " FSTS_REG = %#RX32\n", uFstsReg); + { + pHlp->pfnPrintf(pHlp, " PFO = %u\n", RT_BF_GET(uFstsReg, VTD_BF_FSTS_REG_PFO)); + pHlp->pfnPrintf(pHlp, " PPF = %u\n", RT_BF_GET(uFstsReg, VTD_BF_FSTS_REG_PPF)); + pHlp->pfnPrintf(pHlp, " AFO = %u\n", RT_BF_GET(uFstsReg, VTD_BF_FSTS_REG_AFO)); + pHlp->pfnPrintf(pHlp, " APF = %u\n", RT_BF_GET(uFstsReg, VTD_BF_FSTS_REG_APF)); + pHlp->pfnPrintf(pHlp, " IQE = %u\n", RT_BF_GET(uFstsReg, VTD_BF_FSTS_REG_IQE)); + pHlp->pfnPrintf(pHlp, " ICS = %u\n", RT_BF_GET(uFstsReg, VTD_BF_FSTS_REG_ICE)); + pHlp->pfnPrintf(pHlp, " ITE = %u\n", RT_BF_GET(uFstsReg, VTD_BF_FSTS_REG_ITE)); + pHlp->pfnPrintf(pHlp, " FRI = %u\n", RT_BF_GET(uFstsReg, VTD_BF_FSTS_REG_FRI)); + } + pHlp->pfnPrintf(pHlp, " FECTL_REG = %#RX32\n", uFectlReg); + { + pHlp->pfnPrintf(pHlp, " IM = %RTbool\n", RT_BF_GET(uFectlReg, VTD_BF_FECTL_REG_IM)); + pHlp->pfnPrintf(pHlp, " IP = %RTbool\n", RT_BF_GET(uFectlReg, VTD_BF_FECTL_REG_IP)); + } + pHlp->pfnPrintf(pHlp, " FEDATA_REG = %#RX32\n", uFedataReg); + pHlp->pfnPrintf(pHlp, " FEADDR_REG = %#RX32\n", uFeaddrReg); + pHlp->pfnPrintf(pHlp, " FEUADDR_REG = %#RX32\n", uFeuaddrReg); + pHlp->pfnPrintf(pHlp, " AFLOG_REG = %#RX64\n", uAflogReg); + pHlp->pfnPrintf(pHlp, " PMEN_REG = %#RX32\n", uPmenReg); + pHlp->pfnPrintf(pHlp, " PLMBASE_REG = %#RX32\n", uPlmbaseReg); + pHlp->pfnPrintf(pHlp, " PLMLIMIT_REG = %#RX32\n", uPlmlimitReg); + pHlp->pfnPrintf(pHlp, " PHMBASE_REG = %#RX64\n", uPhmbaseReg); + pHlp->pfnPrintf(pHlp, " PHMLIMIT_REG = %#RX64\n", uPhmlimitReg); + pHlp->pfnPrintf(pHlp, " IQH_REG = %#RX64\n", uIqhReg); + pHlp->pfnPrintf(pHlp, " IQT_REG = %#RX64\n", uIqtReg); + pHlp->pfnPrintf(pHlp, " IQA_REG = %#RX64\n", uIqaReg); + { + uint8_t const fDw = RT_BF_GET(uIqaReg, VTD_BF_IQA_REG_DW); + uint8_t const fQs = RT_BF_GET(uIqaReg, VTD_BF_IQA_REG_QS); + uint8_t const cQueuePages = 1 << fQs; + pHlp->pfnPrintf(pHlp, " DW = %u (%s)\n", fDw, fDw == VTD_IQA_REG_DW_128_BIT ? "128-bit" : "256-bit"); + pHlp->pfnPrintf(pHlp, " QS = %u (%u page%s)\n", fQs, cQueuePages, cQueuePages > 1 ? "s" : ""); + } + pHlp->pfnPrintf(pHlp, " ICS_REG = %#RX32\n", uIcsReg); + { + pHlp->pfnPrintf(pHlp, " IWC = %u\n", RT_BF_GET(uIcsReg, VTD_BF_ICS_REG_IWC)); + } + pHlp->pfnPrintf(pHlp, " IECTL_REG = %#RX32\n", uIectlReg); + { + pHlp->pfnPrintf(pHlp, " IM = %RTbool\n", RT_BF_GET(uIectlReg, VTD_BF_IECTL_REG_IM)); + pHlp->pfnPrintf(pHlp, " IP = %RTbool\n", RT_BF_GET(uIectlReg, VTD_BF_IECTL_REG_IP)); + } + pHlp->pfnPrintf(pHlp, " IEDATA_REG = %#RX32\n", uIedataReg); + pHlp->pfnPrintf(pHlp, " IEADDR_REG = %#RX32\n", uIeaddrReg); + pHlp->pfnPrintf(pHlp, " IEUADDR_REG = %#RX32\n", uIeuaddrReg); + pHlp->pfnPrintf(pHlp, " IQERCD_REG = %#RX64\n", uIqercdReg); + { + pHlp->pfnPrintf(pHlp, " ICESID = %#RX32\n", RT_BF_GET(uIqercdReg, VTD_BF_IQERCD_REG_ICESID)); + pHlp->pfnPrintf(pHlp, " ITESID = %#RX32\n", RT_BF_GET(uIqercdReg, VTD_BF_IQERCD_REG_ITESID)); + pHlp->pfnPrintf(pHlp, " IQEI = %#RX32\n", RT_BF_GET(uIqercdReg, VTD_BF_IQERCD_REG_IQEI)); + } + pHlp->pfnPrintf(pHlp, " IRTA_REG = %#RX64\n", uIrtaReg); + { + uint32_t const cIrtEntries = VTD_IRTA_REG_GET_ENTRY_COUNT(uIrtaReg); + uint32_t const cbIrt = sizeof(VTD_IRTE_T) * cIrtEntries; + pHlp->pfnPrintf(pHlp, " IRTA = %#RX64\n", uIrtaReg & VTD_BF_IRTA_REG_IRTA_MASK); + pHlp->pfnPrintf(pHlp, " EIME = %RTbool\n", RT_BF_GET(uIrtaReg, VTD_BF_IRTA_REG_EIME)); + pHlp->pfnPrintf(pHlp, " S = %u entries (%u bytes)\n", cIrtEntries, cbIrt); + } + pHlp->pfnPrintf(pHlp, " PQH_REG = %#RX64\n", uPqhReg); + pHlp->pfnPrintf(pHlp, " PQT_REG = %#RX64\n", uPqtReg); + pHlp->pfnPrintf(pHlp, " PQA_REG = %#RX64\n", uPqaReg); + pHlp->pfnPrintf(pHlp, " PRS_REG = %#RX32\n", uPrsReg); + pHlp->pfnPrintf(pHlp, " PECTL_REG = %#RX32\n", uPectlReg); + pHlp->pfnPrintf(pHlp, " PEDATA_REG = %#RX32\n", uPedataReg); + pHlp->pfnPrintf(pHlp, " PEADDR_REG = %#RX32\n", uPeaddrReg); + pHlp->pfnPrintf(pHlp, " PEUADDR_REG = %#RX32\n", uPeuaddrReg); + pHlp->pfnPrintf(pHlp, " MTRRCAP_REG = %#RX64\n", uMtrrcapReg); + pHlp->pfnPrintf(pHlp, " MTRRDEF_REG = %#RX64\n", uMtrrdefReg); + pHlp->pfnPrintf(pHlp, "\n"); +} + + +/** + * Initializes all registers in the DMAR unit. + * + * @param pDevIns The IOMMU device instance. + */ +static void dmarR3RegsInit(PPDMDEVINS pDevIns) +{ + PDMAR pThis = PDMDEVINS_2_DATA(pDevIns, PDMAR); + LogFlowFunc(("\n")); + + /* + * Wipe all registers (required on reset). + */ + RT_ZERO(pThis->abRegs0); + RT_ZERO(pThis->abRegs1); + + /* + * Initialize registers not mutable by software prior to initializing other registers. + */ + /* VER_REG */ + { + pThis->uVerReg = RT_BF_MAKE(VTD_BF_VER_REG_MIN, DMAR_VER_MINOR) + | RT_BF_MAKE(VTD_BF_VER_REG_MAX, DMAR_VER_MAJOR); + dmarRegWriteRaw64(pThis, VTD_MMIO_OFF_VER_REG, pThis->uVerReg); + } + + uint8_t const fFlts = 0; /* First-level translation support. */ + uint8_t const fSlts = 1; /* Second-level translation support. */ + uint8_t const fPt = 1; /* Pass-Through support. */ + uint8_t const fSmts = fFlts & fSlts & fPt; /* Scalable mode translation support.*/ + uint8_t const fNest = 0; /* Nested translation support. */ + + /* CAP_REG */ + { + uint8_t cGstPhysAddrBits; + uint8_t cGstLinearAddrBits; + PDMDevHlpCpuGetGuestAddrWidths(pDevIns, &cGstPhysAddrBits, &cGstLinearAddrBits); + + uint8_t const fFl1gp = 1; /* First-level 1GB pages support. */ + uint8_t const fFl5lp = 1; /* First-level 5-level paging support (PML5E). */ + uint8_t const fSl2mp = 1; /* Second-level 2MB pages support. */ + uint8_t const fSl2gp = fSl2mp & 1; /* Second-level 1GB pages support. */ + uint8_t const fSllps = fSl2mp | (fSl2gp << 1); /* Second-level large page support. */ + uint8_t const fMamv = (fSl2gp ? X86_PAGE_1G_SHIFT /* Maximum address mask value (for 2nd-level invalidations). */ + : X86_PAGE_2M_SHIFT) + - X86_PAGE_4K_SHIFT; + uint8_t const fNd = DMAR_ND; /* Number of domains supported. */ + uint8_t const fPsi = 1; /* Page selective invalidation. */ + uint8_t const uMgaw = cGstPhysAddrBits - 1; /* Maximum guest address width. */ + uint8_t const fSagaw = vtdCapRegGetSagaw(uMgaw); /* Supported adjust guest address width. */ + uint16_t const offFro = DMAR_MMIO_OFF_FRCD_LO_REG >> 4; /* MMIO offset of FRCD registers. */ + uint8_t const fEsrtps = 1; /* Enhanced SRTPS (auto invalidate cache on SRTP). */ + uint8_t const fEsirtps = 1; /* Enhanced SIRTPS (auto invalidate cache on SIRTP). */ + + pThis->fCapReg = RT_BF_MAKE(VTD_BF_CAP_REG_ND, fNd) + | RT_BF_MAKE(VTD_BF_CAP_REG_AFL, 0) /* Advanced fault logging not supported. */ + | RT_BF_MAKE(VTD_BF_CAP_REG_RWBF, 0) /* Software need not flush write-buffers. */ + | RT_BF_MAKE(VTD_BF_CAP_REG_PLMR, 0) /* Protected Low-Memory Region not supported. */ + | RT_BF_MAKE(VTD_BF_CAP_REG_PHMR, 0) /* Protected High-Memory Region not supported. */ + | RT_BF_MAKE(VTD_BF_CAP_REG_CM, 1) /* Software should invalidate on mapping structure changes. */ + | RT_BF_MAKE(VTD_BF_CAP_REG_SAGAW, fSlts ? fSagaw : 0) + | RT_BF_MAKE(VTD_BF_CAP_REG_MGAW, uMgaw) + | RT_BF_MAKE(VTD_BF_CAP_REG_ZLR, 1) /** @todo Figure out if/how to support zero-length reads. */ + | RT_BF_MAKE(VTD_BF_CAP_REG_FRO, offFro) + | RT_BF_MAKE(VTD_BF_CAP_REG_SLLPS, fSlts & fSllps) + | RT_BF_MAKE(VTD_BF_CAP_REG_PSI, fPsi) + | RT_BF_MAKE(VTD_BF_CAP_REG_NFR, DMAR_FRCD_REG_COUNT - 1) + | RT_BF_MAKE(VTD_BF_CAP_REG_MAMV, fPsi & fMamv) + | RT_BF_MAKE(VTD_BF_CAP_REG_DWD, 1) + | RT_BF_MAKE(VTD_BF_CAP_REG_DRD, 1) + | RT_BF_MAKE(VTD_BF_CAP_REG_FL1GP, fFlts & fFl1gp) + | RT_BF_MAKE(VTD_BF_CAP_REG_PI, 0) /* Posted Interrupts not supported. */ + | RT_BF_MAKE(VTD_BF_CAP_REG_FL5LP, fFlts & fFl5lp) + | RT_BF_MAKE(VTD_BF_CAP_REG_ESIRTPS, fEsirtps) + | RT_BF_MAKE(VTD_BF_CAP_REG_ESRTPS, fEsrtps); + dmarRegWriteRaw64(pThis, VTD_MMIO_OFF_CAP_REG, pThis->fCapReg); + + AssertCompile(fNd <= RT_ELEMENTS(g_auNdMask)); + pThis->fHawBaseMask = ~(UINT64_MAX << cGstPhysAddrBits) & X86_PAGE_4K_BASE_MASK; + pThis->fMgawInvMask = UINT64_MAX << cGstPhysAddrBits; + pThis->cMaxPagingLevel = vtdCapRegGetMaxPagingLevel(fSagaw); + pThis->fCtxEntryQw1ValidMask = VTD_BF_1_CONTEXT_ENTRY_AW_MASK + | VTD_BF_1_CONTEXT_ENTRY_IGN_6_3_MASK + | RT_BF_MAKE(VTD_BF_1_CONTEXT_ENTRY_DID, g_auNdMask[fNd]); + } + + /* ECAP_REG */ + { + uint8_t const fQi = 1; /* Queued-invalidations. */ + uint8_t const fIr = !!(DMAR_ACPI_DMAR_FLAGS & ACPI_DMAR_F_INTR_REMAP); /* Interrupt remapping support. */ + uint8_t const fMhmv = 0xf; /* Maximum handle mask value. */ + uint16_t const offIro = DMAR_MMIO_OFF_IVA_REG >> 4; /* MMIO offset of IOTLB registers. */ + uint8_t const fEim = 1; /* Extended interrupt mode.*/ + uint8_t const fAdms = 1; /* Abort DMA mode support. */ + uint8_t const fErs = 0; /* Execute Request (not supported). */ + + pThis->fExtCapReg = RT_BF_MAKE(VTD_BF_ECAP_REG_C, 0) /* Accesses don't snoop CPU cache. */ + | RT_BF_MAKE(VTD_BF_ECAP_REG_QI, fQi) + | RT_BF_MAKE(VTD_BF_ECAP_REG_DT, 0) /* Device-TLBs not supported. */ + | RT_BF_MAKE(VTD_BF_ECAP_REG_IR, fQi & fIr) + | RT_BF_MAKE(VTD_BF_ECAP_REG_EIM, fIr & fEim) + | RT_BF_MAKE(VTD_BF_ECAP_REG_PT, fPt) + | RT_BF_MAKE(VTD_BF_ECAP_REG_SC, 0) /* Snoop control not supported. */ + | RT_BF_MAKE(VTD_BF_ECAP_REG_IRO, offIro) + | RT_BF_MAKE(VTD_BF_ECAP_REG_MHMV, fIr & fMhmv) + | RT_BF_MAKE(VTD_BF_ECAP_REG_MTS, 0) /* Memory type not supported. */ + | RT_BF_MAKE(VTD_BF_ECAP_REG_NEST, fNest) + | RT_BF_MAKE(VTD_BF_ECAP_REG_PRS, 0) /* 0 as DT not supported. */ + | RT_BF_MAKE(VTD_BF_ECAP_REG_ERS, fErs) + | RT_BF_MAKE(VTD_BF_ECAP_REG_SRS, 0) /* Supervisor request not supported. */ + | RT_BF_MAKE(VTD_BF_ECAP_REG_NWFS, 0) /* 0 as DT not supported. */ + | RT_BF_MAKE(VTD_BF_ECAP_REG_EAFS, 0) /* 0 as SMPWCS not supported. */ + | RT_BF_MAKE(VTD_BF_ECAP_REG_PSS, 0) /* 0 as PASID not supported. */ + | RT_BF_MAKE(VTD_BF_ECAP_REG_PASID, 0) /* PASID not supported. */ + | RT_BF_MAKE(VTD_BF_ECAP_REG_DIT, 0) /* 0 as DT not supported. */ + | RT_BF_MAKE(VTD_BF_ECAP_REG_PDS, 0) /* 0 as DT not supported. */ + | RT_BF_MAKE(VTD_BF_ECAP_REG_SMTS, fSmts) + | RT_BF_MAKE(VTD_BF_ECAP_REG_VCS, 0) /* 0 as PASID not supported (commands seem PASID specific). */ + | RT_BF_MAKE(VTD_BF_ECAP_REG_SLADS, 0) /* Second-level accessed/dirty not supported. */ + | RT_BF_MAKE(VTD_BF_ECAP_REG_SLTS, fSlts) + | RT_BF_MAKE(VTD_BF_ECAP_REG_FLTS, fFlts) + | RT_BF_MAKE(VTD_BF_ECAP_REG_SMPWCS, 0) /* 0 as PASID not supported. */ + | RT_BF_MAKE(VTD_BF_ECAP_REG_RPS, 0) /* We don't support RID_PASID field in SM context entry. */ + | RT_BF_MAKE(VTD_BF_ECAP_REG_ADMS, fAdms) + | RT_BF_MAKE(VTD_BF_ECAP_REG_RPRIVS, 0); /* 0 as SRS not supported. */ + dmarRegWriteRaw64(pThis, VTD_MMIO_OFF_ECAP_REG, pThis->fExtCapReg); + + pThis->fPermValidMask = DMAR_PERM_READ | DMAR_PERM_WRITE; + if (fErs) + pThis->fPermValidMask = DMAR_PERM_EXE; + } + + /* + * Initialize registers mutable by software. + */ + /* FECTL_REG */ + { + uint32_t const uCtl = RT_BF_MAKE(VTD_BF_FECTL_REG_IM, 1); + dmarRegWriteRaw32(pThis, VTD_MMIO_OFF_FECTL_REG, uCtl); + } + + /* ICETL_REG */ + { + uint32_t const uCtl = RT_BF_MAKE(VTD_BF_IECTL_REG_IM, 1); + dmarRegWriteRaw32(pThis, VTD_MMIO_OFF_IECTL_REG, uCtl); + } + +#ifdef VBOX_STRICT + Assert(!RT_BF_GET(pThis->fExtCapReg, VTD_BF_ECAP_REG_PRS)); /* PECTL_REG - Reserved if don't support PRS. */ + Assert(!RT_BF_GET(pThis->fExtCapReg, VTD_BF_ECAP_REG_MTS)); /* MTRRCAP_REG - Reserved if we don't support MTS. */ +#endif +} + + +/** + * @callback_method_impl{FNSSMDEVSAVEEXEC} + */ +static DECLCALLBACK(int) dmarR3SaveExec(PPDMDEVINS pDevIns, PSSMHANDLE pSSM) +{ + PCDMAR pThis = PDMDEVINS_2_DATA(pDevIns, PCDMAR); + PCPDMDEVHLPR3 pHlp = pDevIns->pHlpR3; + LogFlowFunc(("\n")); + + /* First, save software-immutable registers that we validate on state load. */ + pHlp->pfnSSMPutU32(pSSM, pThis->uVerReg); + pHlp->pfnSSMPutU64(pSSM, pThis->fCapReg); + pHlp->pfnSSMPutU64(pSSM, pThis->fExtCapReg); + + /* Save MMIO registers. */ + pHlp->pfnSSMPutU32(pSSM, DMAR_MMIO_GROUP_COUNT); + pHlp->pfnSSMPutU32(pSSM, sizeof(pThis->abRegs0)); + pHlp->pfnSSMPutMem(pSSM, &pThis->abRegs0[0], sizeof(pThis->abRegs0)); + pHlp->pfnSSMPutU32(pSSM, sizeof(pThis->abRegs1)); + pHlp->pfnSSMPutMem(pSSM, &pThis->abRegs1[0], sizeof(pThis->abRegs1)); + + /* + * Save our implemention-defined MMIO registers offsets. + * The register themselves are currently all part of group 1 (saved above). + * We save these to ensure they're located where the code expects them while loading state. + */ + pHlp->pfnSSMPutU16(pSSM, DMAR_MMIO_OFF_IMPL_COUNT); + AssertCompile(DMAR_MMIO_OFF_IMPL_COUNT == 2); + pHlp->pfnSSMPutU16(pSSM, DMAR_MMIO_OFF_IVA_REG); + pHlp->pfnSSMPutU16(pSSM, DMAR_MMIO_OFF_FRCD_LO_REG); + + /* Save lazily activated registers. */ + pHlp->pfnSSMPutU64(pSSM, pThis->uIrtaReg); + pHlp->pfnSSMPutU64(pSSM, pThis->uRtaddrReg); + + /* Save terminator marker and return status. */ + return pHlp->pfnSSMPutU32(pSSM, UINT32_MAX); +} + + +/** + * @callback_method_impl{FNSSMDEVLOADEXEC} + */ +static DECLCALLBACK(int) dmarR3LoadExec(PPDMDEVINS pDevIns, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass) +{ + PDMAR pThis = PDMDEVINS_2_DATA(pDevIns, PDMAR); + PCPDMDEVHLPR3 pHlp = pDevIns->pHlpR3; + int const rcDataErr = VERR_SSM_UNEXPECTED_DATA; + int const rcFmtErr = VERR_SSM_DATA_UNIT_FORMAT_CHANGED; + LogFlowFunc(("\n")); + + /* + * Validate saved-state version. + */ + AssertReturn(uPass == SSM_PASS_FINAL, VERR_WRONG_ORDER); + if (uVersion != DMAR_SAVED_STATE_VERSION) + { + LogRel(("%s: Invalid saved-state version %#x\n", DMAR_LOG_PFX, uVersion)); + return VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION; + } + + /* + * Load and validate software-immutable registers. + * The features we had exposed to the guest (in the saved state) must be identical + * to what is currently emulated. + */ + { + /* VER_REG */ + uint32_t uVerReg = 0; + int rc = pHlp->pfnSSMGetU32(pSSM, &uVerReg); + AssertRCReturn(rc, rc); + AssertLogRelMsgReturn(uVerReg == pThis->uVerReg, + ("%s: VER_REG mismatch (expected %#RX32 got %#RX32)\n", DMAR_LOG_PFX, pThis->uVerReg, uVerReg), + rcDataErr); + /* CAP_REG */ + uint64_t fCapReg = 0; + pHlp->pfnSSMGetU64(pSSM, &fCapReg); + AssertLogRelMsgReturn(fCapReg == pThis->fCapReg, + ("%s: CAP_REG mismatch (expected %#RX64 got %#RX64)\n", DMAR_LOG_PFX, pThis->fCapReg, fCapReg), + rcDataErr); + /* ECAP_REG */ + uint64_t fExtCapReg = 0; + pHlp->pfnSSMGetU64(pSSM, &fExtCapReg); + AssertLogRelMsgReturn(fExtCapReg == pThis->fExtCapReg, + ("%s: ECAP_REG mismatch (expected %#RX64 got %#RX64)\n", DMAR_LOG_PFX, pThis->fExtCapReg, + fExtCapReg), rcDataErr); + } + + /* + * Load MMIO registers. + */ + { + /* Group count. */ + uint32_t cRegGroups = 0; + pHlp->pfnSSMGetU32(pSSM, &cRegGroups); + AssertLogRelMsgReturn(cRegGroups == DMAR_MMIO_GROUP_COUNT, + ("%s: MMIO group count mismatch (expected %u got %u)\n", DMAR_LOG_PFX, DMAR_MMIO_GROUP_COUNT, + cRegGroups), rcFmtErr); + /* Group 0. */ + uint32_t cbRegs0 = 0; + pHlp->pfnSSMGetU32(pSSM, &cbRegs0); + AssertLogRelMsgReturn(cbRegs0 == sizeof(pThis->abRegs0), + ("%s: MMIO group 0 size mismatch (expected %u got %u)\n", DMAR_LOG_PFX, sizeof(pThis->abRegs0), + cbRegs0), rcFmtErr); + pHlp->pfnSSMGetMem(pSSM, &pThis->abRegs0[0], cbRegs0); + /* Group 1. */ + uint32_t cbRegs1 = 0; + pHlp->pfnSSMGetU32(pSSM, &cbRegs1); + AssertLogRelMsgReturn(cbRegs1 == sizeof(pThis->abRegs1), + ("%s: MMIO group 1 size mismatch (expected %u got %u)\n", DMAR_LOG_PFX, sizeof(pThis->abRegs1), + cbRegs1), rcFmtErr); + pHlp->pfnSSMGetMem(pSSM, &pThis->abRegs1[0], cbRegs1); + } + + /* + * Validate implementation-defined MMIO register offsets. + */ + { + /* Offset count. */ + uint16_t cOffsets = 0; + pHlp->pfnSSMGetU16(pSSM, &cOffsets); + AssertLogRelMsgReturn(cOffsets == DMAR_MMIO_OFF_IMPL_COUNT, + ("%s: MMIO offset count mismatch (expected %u got %u)\n", DMAR_LOG_PFX, DMAR_MMIO_OFF_IMPL_COUNT, + cOffsets), rcFmtErr); + /* IVA_REG. */ + uint16_t offReg = 0; + pHlp->pfnSSMGetU16(pSSM, &offReg); + AssertLogRelMsgReturn(offReg == DMAR_MMIO_OFF_IVA_REG, + ("%s: IVA_REG offset mismatch (expected %u got %u)\n", DMAR_LOG_PFX, DMAR_MMIO_OFF_IVA_REG, + offReg), rcFmtErr); + /* IOTLB_REG. */ + AssertLogRelMsgReturn(offReg + 8 == DMAR_MMIO_OFF_IOTLB_REG, + ("%s: IOTLB_REG offset mismatch (expected %u got %u)\n", DMAR_LOG_PFX, DMAR_MMIO_OFF_IOTLB_REG, + offReg), rcFmtErr); + /* FRCD_LO_REG. */ + pHlp->pfnSSMGetU16(pSSM, &offReg); + AssertLogRelMsgReturn(offReg == DMAR_MMIO_OFF_FRCD_LO_REG, + ("%s: FRCD_LO_REG offset mismatch (expected %u got %u)\n", DMAR_LOG_PFX, DMAR_MMIO_OFF_FRCD_LO_REG, + offReg), rcFmtErr); + /* FRCD_HI_REG. */ + AssertLogRelMsgReturn(offReg + 8 == DMAR_MMIO_OFF_FRCD_HI_REG, + ("%s: FRCD_HI_REG offset mismatch (expected %u got %u)\n", DMAR_LOG_PFX, DMAR_MMIO_OFF_FRCD_HI_REG, + offReg), rcFmtErr); + } + + /* + * Load lazily activated registers. + */ + { + /* Active IRTA_REG. */ + pHlp->pfnSSMGetU64(pSSM, &pThis->uIrtaReg); + AssertLogRelMsgReturn(!(pThis->uIrtaReg & ~VTD_IRTA_REG_RW_MASK), + ("%s: IRTA_REG reserved bits set %#RX64\n", DMAR_LOG_PFX, pThis->uIrtaReg), rcDataErr); + /* Active RTADDR_REG. */ + pHlp->pfnSSMGetU64(pSSM, &pThis->uRtaddrReg); + AssertLogRelMsgReturn(!(pThis->uRtaddrReg & ~VTD_RTADDR_REG_RW_MASK), + ("%s: RTADDR_REG reserved bits set %#RX64\n", DMAR_LOG_PFX, pThis->uRtaddrReg), rcDataErr); + } + + /* + * Verify terminator marker. + */ + { + uint32_t uEndMarker = 0; + int const rc = pHlp->pfnSSMGetU32(pSSM, &uEndMarker); + AssertRCReturn(rc, rc); + AssertLogRelMsgReturn(uEndMarker == UINT32_MAX, + ("%s: End marker mismatch (expected %#RX32 got %#RX32)\n", DMAR_LOG_PFX, UINT32_MAX, uEndMarker), + rcFmtErr); + } + return VINF_SUCCESS; +} + + +/** + * @callback_method_impl{FNSSMDEVLOADDONE} + */ +static DECLCALLBACK(int) dmarR3LoadDone(PPDMDEVINS pDevIns, PSSMHANDLE pSSM) +{ + PDMARR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PDMARR3); + LogFlowFunc(("\n")); + RT_NOREF(pSSM); + AssertPtrReturn(pThisR3, VERR_INVALID_POINTER); + + DMAR_LOCK(pDevIns, pThisR3); + dmarInvQueueThreadWakeUpIfNeeded(pDevIns); + DMAR_UNLOCK(pDevIns, pThisR3); + return VINF_SUCCESS; +} + + +/** + * @interface_method_impl{PDMDEVREG,pfnReset} + */ +static DECLCALLBACK(void) iommuIntelR3Reset(PPDMDEVINS pDevIns) +{ + PCDMARR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PCDMARR3); + LogFlowFunc(("\n")); + + DMAR_LOCK(pDevIns, pThisR3); + dmarR3RegsInit(pDevIns); + DMAR_UNLOCK(pDevIns, pThisR3); +} + + +/** + * @interface_method_impl{PDMDEVREG,pfnDestruct} + */ +static DECLCALLBACK(int) iommuIntelR3Destruct(PPDMDEVINS pDevIns) +{ + PDMAR pThis = PDMDEVINS_2_DATA(pDevIns, PDMAR); + PCDMARR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PCDMARR3); + LogFlowFunc(("\n")); + + DMAR_LOCK(pDevIns, pThisR3); + + if (pThis->hEvtInvQueue != NIL_SUPSEMEVENT) + { + PDMDevHlpSUPSemEventClose(pDevIns, pThis->hEvtInvQueue); + pThis->hEvtInvQueue = NIL_SUPSEMEVENT; + } + + DMAR_UNLOCK(pDevIns, pThisR3); + return VINF_SUCCESS; +} + + +/** + * @interface_method_impl{PDMDEVREG,pfnConstruct} + */ +static DECLCALLBACK(int) iommuIntelR3Construct(PPDMDEVINS pDevIns, int iInstance, PCFGMNODE pCfg) +{ + RT_NOREF(pCfg); + + PDMAR pThis = PDMDEVINS_2_DATA(pDevIns, PDMAR); + PDMARR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PDMARR3); + pThisR3->pDevInsR3 = pDevIns; + + LogFlowFunc(("iInstance=%d\n", iInstance)); + NOREF(iInstance); + + /* + * Register the IOMMU with PDM. + */ + PDMIOMMUREGR3 IommuReg; + RT_ZERO(IommuReg); + IommuReg.u32Version = PDM_IOMMUREGCC_VERSION; + IommuReg.pfnMemAccess = iommuIntelMemAccess; + IommuReg.pfnMemBulkAccess = iommuIntelMemBulkAccess; + IommuReg.pfnMsiRemap = iommuIntelMsiRemap; + IommuReg.u32TheEnd = PDM_IOMMUREGCC_VERSION; + int rc = PDMDevHlpIommuRegister(pDevIns, &IommuReg, &pThisR3->CTX_SUFF(pIommuHlp), &pThis->idxIommu); + if (RT_FAILURE(rc)) + return PDMDEV_SET_ERROR(pDevIns, rc, N_("Failed to register ourselves as an IOMMU device")); + if (pThisR3->CTX_SUFF(pIommuHlp)->u32Version != PDM_IOMMUHLPR3_VERSION) + return PDMDevHlpVMSetError(pDevIns, VERR_VERSION_MISMATCH, RT_SRC_POS, + N_("IOMMU helper version mismatch; got %#x expected %#x"), + pThisR3->CTX_SUFF(pIommuHlp)->u32Version, PDM_IOMMUHLPR3_VERSION); + if (pThisR3->CTX_SUFF(pIommuHlp)->u32TheEnd != PDM_IOMMUHLPR3_VERSION) + return PDMDevHlpVMSetError(pDevIns, VERR_VERSION_MISMATCH, RT_SRC_POS, + N_("IOMMU helper end-version mismatch; got %#x expected %#x"), + pThisR3->CTX_SUFF(pIommuHlp)->u32TheEnd, PDM_IOMMUHLPR3_VERSION); + AssertPtr(pThisR3->pIommuHlpR3->pfnLock); + AssertPtr(pThisR3->pIommuHlpR3->pfnUnlock); + AssertPtr(pThisR3->pIommuHlpR3->pfnLockIsOwner); + AssertPtr(pThisR3->pIommuHlpR3->pfnSendMsi); + + /* + * Use PDM's critical section (via helpers) for the IOMMU device. + */ + rc = PDMDevHlpSetDeviceCritSect(pDevIns, PDMDevHlpCritSectGetNop(pDevIns)); + AssertRCReturn(rc, rc); + + /* + * Initialize PCI configuration registers. + */ + PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0]; + PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev); + + /* Header. */ + PDMPciDevSetVendorId(pPciDev, DMAR_PCI_VENDOR_ID); /* Intel */ + PDMPciDevSetDeviceId(pPciDev, DMAR_PCI_DEVICE_ID); /* VirtualBox DMAR device */ + PDMPciDevSetRevisionId(pPciDev, DMAR_PCI_REVISION_ID); /* VirtualBox specific device implementation revision */ + PDMPciDevSetClassBase(pPciDev, VBOX_PCI_CLASS_SYSTEM); /* System Base Peripheral */ + PDMPciDevSetClassSub(pPciDev, VBOX_PCI_SUB_SYSTEM_OTHER); /* Other */ + PDMPciDevSetHeaderType(pPciDev, 0); /* Single function, type 0 */ + PDMPciDevSetSubSystemId(pPciDev, DMAR_PCI_DEVICE_ID); /* VirtualBox DMAR device */ + PDMPciDevSetSubSystemVendorId(pPciDev, DMAR_PCI_VENDOR_ID); /* Intel */ + + /** @todo Chipset spec says PCI Express Capability Id. Relevant for us? */ + PDMPciDevSetStatus(pPciDev, 0); + PDMPciDevSetCapabilityList(pPciDev, 0); + /** @todo VTBAR at 0x180? */ + + /* + * Register the PCI function with PDM. + */ + rc = PDMDevHlpPCIRegister(pDevIns, pPciDev); + AssertLogRelRCReturn(rc, rc); + + /* + * Register MMIO region. + */ + AssertCompile(!(DMAR_MMIO_BASE_PHYSADDR & X86_PAGE_4K_OFFSET_MASK)); + rc = PDMDevHlpMmioCreateAndMap(pDevIns, DMAR_MMIO_BASE_PHYSADDR, DMAR_MMIO_SIZE, dmarMmioWrite, dmarMmioRead, + IOMMMIO_FLAGS_READ_DWORD_QWORD | IOMMMIO_FLAGS_WRITE_DWORD_QWORD_ZEROED, "Intel-IOMMU", + &pThis->hMmio); + AssertLogRelRCReturn(rc, rc); + + /* + * Register saved state handlers. + */ + rc = PDMDevHlpSSMRegisterEx(pDevIns, DMAR_SAVED_STATE_VERSION, sizeof(DMAR), NULL /* pszBefore */, + NULL /* pfnLivePrep */, NULL /* pfnLiveExec */, NULL /* pfnLiveVote */, + NULL /* pfnSavePrep */, dmarR3SaveExec, NULL /* pfnSaveDone */, + NULL /* pfnLoadPrep */, dmarR3LoadExec, dmarR3LoadDone); + AssertLogRelRCReturn(rc, rc); + + /* + * Register debugger info items. + */ + rc = PDMDevHlpDBGFInfoRegister(pDevIns, "iommu", "Display IOMMU state.", dmarR3DbgInfo); + AssertLogRelRCReturn(rc, rc); + +#ifdef VBOX_WITH_STATISTICS + /* + * Statistics. + */ + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMmioReadR3, STAMTYPE_COUNTER, "R3/MmioRead", STAMUNIT_OCCURENCES, "Number of MMIO reads in R3"); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMmioReadRZ, STAMTYPE_COUNTER, "RZ/MmioRead", STAMUNIT_OCCURENCES, "Number of MMIO reads in RZ."); + + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMmioWriteR3, STAMTYPE_COUNTER, "R3/MmioWrite", STAMUNIT_OCCURENCES, "Number of MMIO writes in R3."); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMmioWriteRZ, STAMTYPE_COUNTER, "RZ/MmioWrite", STAMUNIT_OCCURENCES, "Number of MMIO writes in RZ."); + + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMsiRemapCfiR3, STAMTYPE_COUNTER, "R3/MsiRemapCfi", STAMUNIT_OCCURENCES, "Number of compatibility-format interrupt remap requests in R3."); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMsiRemapCfiRZ, STAMTYPE_COUNTER, "RZ/MsiRemapCfi", STAMUNIT_OCCURENCES, "Number of compatibility-format interrupt remap requests in RZ."); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMsiRemapRfiR3, STAMTYPE_COUNTER, "R3/MsiRemapRfi", STAMUNIT_OCCURENCES, "Number of remappable-format interrupt remap requests in R3."); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMsiRemapRfiRZ, STAMTYPE_COUNTER, "RZ/MsiRemapRfi", STAMUNIT_OCCURENCES, "Number of remappable-format interrupt remap requests in RZ."); + + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemReadR3, STAMTYPE_COUNTER, "R3/MemRead", STAMUNIT_OCCURENCES, "Number of memory read translation requests in R3."); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemReadRZ, STAMTYPE_COUNTER, "RZ/MemRead", STAMUNIT_OCCURENCES, "Number of memory read translation requests in RZ."); + + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemWriteR3, STAMTYPE_COUNTER, "R3/MemWrite", STAMUNIT_OCCURENCES, "Number of memory write translation requests in R3."); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemWriteRZ, STAMTYPE_COUNTER, "RZ/MemWrite", STAMUNIT_OCCURENCES, "Number of memory write translation requests in RZ."); + + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemBulkReadR3, STAMTYPE_COUNTER, "R3/MemBulkRead", STAMUNIT_OCCURENCES, "Number of memory bulk read translation requests in R3."); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemBulkReadRZ, STAMTYPE_COUNTER, "RZ/MemBulkRead", STAMUNIT_OCCURENCES, "Number of memory bulk read translation requests in RZ."); + + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemBulkWriteR3, STAMTYPE_COUNTER, "R3/MemBulkWrite", STAMUNIT_OCCURENCES, "Number of memory bulk write translation requests in R3."); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemBulkWriteRZ, STAMTYPE_COUNTER, "RZ/MemBulkWrite", STAMUNIT_OCCURENCES, "Number of memory bulk write translation requests in RZ."); + + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCcInvDsc, STAMTYPE_COUNTER, "R3/QI/CcInv", STAMUNIT_OCCURENCES, "Number of cc_inv_dsc processed."); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatIotlbInvDsc, STAMTYPE_COUNTER, "R3/QI/IotlbInv", STAMUNIT_OCCURENCES, "Number of iotlb_inv_dsc processed."); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatDevtlbInvDsc, STAMTYPE_COUNTER, "R3/QI/DevtlbInv", STAMUNIT_OCCURENCES, "Number of dev_tlb_inv_dsc processed."); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatIecInvDsc, STAMTYPE_COUNTER, "R3/QI/IecInv", STAMUNIT_OCCURENCES, "Number of iec_inv processed."); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatInvWaitDsc, STAMTYPE_COUNTER, "R3/QI/InvWait", STAMUNIT_OCCURENCES, "Number of inv_wait_dsc processed."); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatPasidIotlbInvDsc, STAMTYPE_COUNTER, "R3/QI/PasidIotlbInv", STAMUNIT_OCCURENCES, "Number of p_iotlb_inv_dsc processed."); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatPasidCacheInvDsc, STAMTYPE_COUNTER, "R3/QI/PasidCacheInv", STAMUNIT_OCCURENCES, "Number of pc_inv_dsc pprocessed."); + PDMDevHlpSTAMRegister(pDevIns, &pThis->StatPasidDevtlbInvDsc, STAMTYPE_COUNTER, "R3/QI/PasidDevtlbInv", STAMUNIT_OCCURENCES, "Number of p_dev_tlb_inv_dsc processed."); +#endif + + /* + * Initialize registers. + */ + dmarR3RegsInit(pDevIns); + + /* + * Create invalidation-queue thread and semaphore. + */ + char szInvQueueThread[32]; + RT_ZERO(szInvQueueThread); + RTStrPrintf(szInvQueueThread, sizeof(szInvQueueThread), "IOMMU-QI-%u", iInstance); + rc = PDMDevHlpThreadCreate(pDevIns, &pThisR3->pInvQueueThread, pThis, dmarR3InvQueueThread, dmarR3InvQueueThreadWakeUp, + 0 /* cbStack */, RTTHREADTYPE_IO, szInvQueueThread); + AssertLogRelRCReturn(rc, rc); + + rc = PDMDevHlpSUPSemEventCreate(pDevIns, &pThis->hEvtInvQueue); + AssertLogRelRCReturn(rc, rc); + + /* + * Log some of the features exposed to software. + */ + uint8_t const uVerMax = RT_BF_GET(pThis->uVerReg, VTD_BF_VER_REG_MAX); + uint8_t const uVerMin = RT_BF_GET(pThis->uVerReg, VTD_BF_VER_REG_MIN); + uint8_t const cMgawBits = RT_BF_GET(pThis->fCapReg, VTD_BF_CAP_REG_MGAW) + 1; + uint8_t const fSagaw = RT_BF_GET(pThis->fCapReg, VTD_BF_CAP_REG_SAGAW); + uint16_t const offFrcd = RT_BF_GET(pThis->fCapReg, VTD_BF_CAP_REG_FRO); + uint16_t const offIva = RT_BF_GET(pThis->fExtCapReg, VTD_BF_ECAP_REG_IRO); + LogRel(("%s: Mapped at %#RGp (%u-level page-table supported)\n", + DMAR_LOG_PFX, DMAR_MMIO_BASE_PHYSADDR, pThis->cMaxPagingLevel)); + LogRel(("%s: Version=%u.%u Cap=%#RX64 ExtCap=%#RX64 Mgaw=%u bits Sagaw=%#x HawBaseMask=%#RX64 MgawInvMask=%#RX64 FRO=%#x IRO=%#x\n", + DMAR_LOG_PFX, uVerMax, uVerMin, pThis->fCapReg, pThis->fExtCapReg, cMgawBits, fSagaw, pThis->fHawBaseMask, + pThis->fMgawInvMask, offFrcd, offIva)); + return VINF_SUCCESS; +} + +#else + +/** + * @callback_method_impl{PDMDEVREGR0,pfnConstruct} + */ +static DECLCALLBACK(int) iommuIntelRZConstruct(PPDMDEVINS pDevIns) +{ + PDMDEV_CHECK_VERSIONS_RETURN(pDevIns); + PDMAR pThis = PDMDEVINS_2_DATA(pDevIns, PDMAR); + PDMARCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PDMARCC); + pThisCC->CTX_SUFF(pDevIns) = pDevIns; + + /* We will use PDM's critical section (via helpers) for the IOMMU device. */ + int rc = PDMDevHlpSetDeviceCritSect(pDevIns, PDMDevHlpCritSectGetNop(pDevIns)); + AssertRCReturn(rc, rc); + + /* Set up the MMIO RZ handlers. */ + rc = PDMDevHlpMmioSetUpContext(pDevIns, pThis->hMmio, dmarMmioWrite, dmarMmioRead, NULL /* pvUser */); + AssertRCReturn(rc, rc); + + /* Set up the IOMMU RZ callbacks. */ + PDMIOMMUREGCC IommuReg; + RT_ZERO(IommuReg); + IommuReg.u32Version = PDM_IOMMUREGCC_VERSION; + IommuReg.idxIommu = pThis->idxIommu; + IommuReg.pfnMemAccess = iommuIntelMemAccess; + IommuReg.pfnMemBulkAccess = iommuIntelMemBulkAccess; + IommuReg.pfnMsiRemap = iommuIntelMsiRemap; + IommuReg.u32TheEnd = PDM_IOMMUREGCC_VERSION; + + rc = PDMDevHlpIommuSetUpContext(pDevIns, &IommuReg, &pThisCC->CTX_SUFF(pIommuHlp)); + AssertRCReturn(rc, rc); + AssertPtrReturn(pThisCC->CTX_SUFF(pIommuHlp), VERR_IOMMU_IPE_1); + AssertReturn(pThisCC->CTX_SUFF(pIommuHlp)->u32Version == CTX_MID(PDM_IOMMUHLP,_VERSION), VERR_VERSION_MISMATCH); + AssertReturn(pThisCC->CTX_SUFF(pIommuHlp)->u32TheEnd == CTX_MID(PDM_IOMMUHLP,_VERSION), VERR_VERSION_MISMATCH); + AssertPtr(pThisCC->CTX_SUFF(pIommuHlp)->pfnLock); + AssertPtr(pThisCC->CTX_SUFF(pIommuHlp)->pfnUnlock); + AssertPtr(pThisCC->CTX_SUFF(pIommuHlp)->pfnLockIsOwner); + AssertPtr(pThisCC->CTX_SUFF(pIommuHlp)->pfnSendMsi); + + return VINF_SUCCESS; +} + +#endif + + +/** + * The device registration structure. + */ +PDMDEVREG const g_DeviceIommuIntel = +{ + /* .u32Version = */ PDM_DEVREG_VERSION, + /* .uReserved0 = */ 0, + /* .szName = */ "iommu-intel", + /* .fFlags = */ PDM_DEVREG_FLAGS_DEFAULT_BITS | PDM_DEVREG_FLAGS_RZ | PDM_DEVREG_FLAGS_NEW_STYLE, + /* .fClass = */ PDM_DEVREG_CLASS_PCI_BUILTIN, + /* .cMaxInstances = */ 1, + /* .uSharedVersion = */ 42, + /* .cbInstanceShared = */ sizeof(DMAR), + /* .cbInstanceCC = */ sizeof(DMARCC), + /* .cbInstanceRC = */ sizeof(DMARRC), + /* .cMaxPciDevices = */ 1, + /* .cMaxMsixVectors = */ 0, + /* .pszDescription = */ "IOMMU (Intel)", +#if defined(IN_RING3) + /* .pszRCMod = */ "VBoxDDRC.rc", + /* .pszR0Mod = */ "VBoxDDR0.r0", + /* .pfnConstruct = */ iommuIntelR3Construct, + /* .pfnDestruct = */ iommuIntelR3Destruct, + /* .pfnRelocate = */ NULL, + /* .pfnMemSetup = */ NULL, + /* .pfnPowerOn = */ NULL, + /* .pfnReset = */ iommuIntelR3Reset, + /* .pfnSuspend = */ NULL, + /* .pfnResume = */ NULL, + /* .pfnAttach = */ NULL, + /* .pfnDetach = */ NULL, + /* .pfnQueryInterface = */ NULL, + /* .pfnInitComplete = */ NULL, + /* .pfnPowerOff = */ NULL, + /* .pfnSoftReset = */ NULL, + /* .pfnReserved0 = */ NULL, + /* .pfnReserved1 = */ NULL, + /* .pfnReserved2 = */ NULL, + /* .pfnReserved3 = */ NULL, + /* .pfnReserved4 = */ NULL, + /* .pfnReserved5 = */ NULL, + /* .pfnReserved6 = */ NULL, + /* .pfnReserved7 = */ NULL, +#elif defined(IN_RING0) + /* .pfnEarlyConstruct = */ NULL, + /* .pfnConstruct = */ iommuIntelRZConstruct, + /* .pfnDestruct = */ NULL, + /* .pfnFinalDestruct = */ NULL, + /* .pfnRequest = */ NULL, + /* .pfnReserved0 = */ NULL, + /* .pfnReserved1 = */ NULL, + /* .pfnReserved2 = */ NULL, + /* .pfnReserved3 = */ NULL, + /* .pfnReserved4 = */ NULL, + /* .pfnReserved5 = */ NULL, + /* .pfnReserved6 = */ NULL, + /* .pfnReserved7 = */ NULL, +#elif defined(IN_RC) + /* .pfnConstruct = */ iommuIntelRZConstruct, + /* .pfnReserved0 = */ NULL, + /* .pfnReserved1 = */ NULL, + /* .pfnReserved2 = */ NULL, + /* .pfnReserved3 = */ NULL, + /* .pfnReserved4 = */ NULL, + /* .pfnReserved5 = */ NULL, + /* .pfnReserved6 = */ NULL, + /* .pfnReserved7 = */ NULL, +#else +# error "Not in IN_RING3, IN_RING0 or IN_RC!" +#endif + /* .u32VersionEnd = */ PDM_DEVREG_VERSION +}; + +#endif /* !VBOX_DEVICE_STRUCT_TESTCASE */ + diff --git a/src/VBox/Devices/Bus/DevIommuIntel.h b/src/VBox/Devices/Bus/DevIommuIntel.h new file mode 100644 index 00000000..9b2e301c --- /dev/null +++ b/src/VBox/Devices/Bus/DevIommuIntel.h @@ -0,0 +1,49 @@ +/* $Id: DevIommuIntel.h $ */ +/** @file + * DevIommuIntel - I/O Memory Management Unit (Intel), header shared with the IOMMU, ACPI, chipset/firmware code. + */ + +/* + * Copyright (C) 2021-2022 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifndef VBOX_INCLUDED_SRC_Bus_DevIommuIntel_h +#define VBOX_INCLUDED_SRC_Bus_DevIommuIntel_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +/** Intel vendor ID for the DMAR unit. */ +#define DMAR_PCI_VENDOR_ID 0x8086 +/** VirtualBox DMAR unit's device ID. */ +#define DMAR_PCI_DEVICE_ID 0xc0de +/** VirtualBox DMAR unit's device revision ID. */ +#define DMAR_PCI_REVISION_ID 0x01 + +/** Feature/capability flags exposed to the guest. */ +#define DMAR_ACPI_DMAR_FLAGS ACPI_DMAR_F_INTR_REMAP + +/** The MMIO base address of the DMAR unit (taken from real hardware). */ +#define DMAR_MMIO_BASE_PHYSADDR UINT64_C(0xfed90000) +/** The size of the MMIO region (in bytes). */ +#define DMAR_MMIO_SIZE 4096 + +#endif /* !VBOX_INCLUDED_SRC_Bus_DevIommuIntel_h */ diff --git a/src/VBox/Devices/Bus/DevPCI.cpp b/src/VBox/Devices/Bus/DevPCI.cpp new file mode 100644 index 00000000..ee87bf15 --- /dev/null +++ b/src/VBox/Devices/Bus/DevPCI.cpp @@ -0,0 +1,1867 @@ +/* $Id: DevPCI.cpp $ */ +/** @file + * DevPCI - PCI BUS Device. + * + * @remarks New code shall be added to DevPciIch9.cpp as that will become + * the common PCI bus code soon. Don't fix code in both DevPCI.cpp + * and DevPciIch9.cpp when it's possible to just make the latter + * version common. Common code uses the 'devpci' prefix, is + * prototyped in DevPciInternal.h, and is defined in DevPciIch9.cpp. + */ + +/* + * Copyright (C) 2006-2022 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + * -------------------------------------------------------------------- + * + * This code is based on: + * + * QEMU PCI bus manager + * + * Copyright (c) 2004 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_DEV_PCI +#define PDMPCIDEV_INCLUDE_PRIVATE /* Hack to get pdmpcidevint.h included at the right point. */ +#include <VBox/vmm/pdmpcidev.h> +#include <VBox/vmm/pdmdev.h> +#include <VBox/vmm/mm.h> +#include <iprt/asm.h> +#include <iprt/assert.h> +#include <iprt/string.h> + +#include "PciInline.h" +#include "VBoxDD.h" +#include "DevPciInternal.h" + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ +/** Saved state version of the PCI bus device. */ +#define VBOX_PCI_SAVED_STATE_VERSION VBOX_PCI_SAVED_STATE_VERSION_REGION_SIZES +/** Adds I/O region types and sizes for dealing changes in resource regions. */ +#define VBOX_PCI_SAVED_STATE_VERSION_REGION_SIZES 4 +/** Before region sizes, the first named one. + * Looking at the code though, we support even older version. */ +#define VBOX_PCI_SAVED_STATE_VERSION_IRQ_STATES 3 +/** Notes whether we use the I/O APIC. */ +#define VBOX_PCI_SAVED_STATE_VERSION_USE_IO_APIC 2 + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +RT_C_DECLS_BEGIN + +static DECLCALLBACK(void) pcibridgeSetIrq(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, int iIrq, int iLevel, uint32_t uTag); + +#ifdef IN_RING3 +DECLINLINE(PPDMPCIDEV) pciR3FindBridge(PDEVPCIBUS pBus, uint8_t iBus); +#endif + +RT_C_DECLS_END + +#define DEBUG_PCI + +#define PCI_VENDOR_ID 0x00 /* 16 bits */ +#define PCI_DEVICE_ID 0x02 /* 16 bits */ +#define PCI_COMMAND 0x04 /* 16 bits */ +#define PCI_COMMAND_IO 0x01 /* Enable response in I/O space */ +#define PCI_COMMAND_MEMORY 0x02 /* Enable response in Memory space */ +#define PCI_CLASS_DEVICE 0x0a /* Device class */ +#define PCI_INTERRUPT_LINE 0x3c /* 8 bits */ +#define PCI_INTERRUPT_PIN 0x3d /* 8 bits */ +#define PCI_MIN_GNT 0x3e /* 8 bits */ +#define PCI_MAX_LAT 0x3f /* 8 bits */ + + +static VBOXSTRICTRC pci_data_write(PPDMDEVINS pDevIns, PDEVPCIROOT pGlobals, uint32_t addr, uint32_t u32Value, int cb) +{ + LogFunc(("addr=%08x u32Value=%08x cb=%d\n", pGlobals->uConfigReg, u32Value, cb)); + + if (!(pGlobals->uConfigReg & (1 << 31))) + return VINF_SUCCESS; + if ((pGlobals->uConfigReg & 0x3) != 0) + return VINF_SUCCESS; + + uint8_t const iBus = (pGlobals->uConfigReg >> 16) & 0xff; + uint8_t const iDevice = (pGlobals->uConfigReg >> 8) & 0xff; +#ifdef IN_RING3 + uint32_t const config_addr = (pGlobals->uConfigReg & 0xfc) | (addr & 3); +#endif + RT_UNTRUSTED_VALIDATED_FENCE(); /* paranoia */ + + VBOXSTRICTRC rcStrict = VINF_SUCCESS; + if (iBus != 0) + { + if (pGlobals->PciBus.cBridges) + { +#ifdef IN_RING3 /** @todo do lookup in R0/RC too! */ + PPDMPCIDEV pBridgeDevice = pciR3FindBridge(&pGlobals->PciBus, iBus); + if (pBridgeDevice) + { + AssertPtr(pBridgeDevice->Int.s.pfnBridgeConfigWrite); + rcStrict = pBridgeDevice->Int.s.pfnBridgeConfigWrite(pBridgeDevice->Int.s.CTX_SUFF(pDevIns), iBus, + iDevice, config_addr, cb, u32Value); + } +#else + RT_NOREF(pDevIns, addr, u32Value, cb); + rcStrict = VINF_IOM_R3_IOPORT_WRITE; +#endif + } + } + else + { + R3PTRTYPE(PDMPCIDEV *) pPciDev = pGlobals->PciBus.apDevices[iDevice]; + if (pPciDev) + { +#ifdef IN_RING3 + LogFunc(("%s: addr=%02x u32Value=%08x cb=%d\n", pPciDev->pszNameR3, config_addr, u32Value, cb)); + rcStrict = VINF_PDM_PCI_DO_DEFAULT; + if (pPciDev->Int.s.pfnConfigWrite) + rcStrict = pPciDev->Int.s.pfnConfigWrite(pPciDev->Int.s.CTX_SUFF(pDevIns), pPciDev, config_addr, cb, u32Value); + if (rcStrict == VINF_PDM_PCI_DO_DEFAULT) + rcStrict = devpciR3CommonConfigWriteWorker(pDevIns, PDMINS_2_DATA_CC(pDevIns, PDEVPCIBUSCC), + pPciDev, config_addr, cb, u32Value); +#else + rcStrict = VINF_IOM_R3_IOPORT_WRITE; +#endif + } + } + return rcStrict; +} + +static VBOXSTRICTRC pci_data_read(PDEVPCIROOT pGlobals, uint32_t addr, int cb, uint32_t *pu32Value) +{ + *pu32Value = UINT32_MAX; + + if (!(pGlobals->uConfigReg & (1 << 31))) + return VINF_SUCCESS; + if ((pGlobals->uConfigReg & 0x3) != 0) + return VINF_SUCCESS; + uint8_t const iBus = (pGlobals->uConfigReg >> 16) & 0xff; + uint8_t const iDevice = (pGlobals->uConfigReg >> 8) & 0xff; +#ifdef IN_RING3 + uint32_t const config_addr = (pGlobals->uConfigReg & 0xfc) | (addr & 3); +#endif + RT_UNTRUSTED_VALIDATED_FENCE(); + + VBOXSTRICTRC rcStrict = VINF_SUCCESS; + if (iBus != 0) + { + if (pGlobals->PciBus.cBridges) + { +#ifdef IN_RING3 /** @todo do lookup in R0/RC too! */ + PPDMPCIDEV pBridgeDevice = pciR3FindBridge(&pGlobals->PciBus, iBus); + if (pBridgeDevice) + { + AssertPtr(pBridgeDevice->Int.s.pfnBridgeConfigRead); + rcStrict = pBridgeDevice->Int.s.pfnBridgeConfigRead(pBridgeDevice->Int.s.CTX_SUFF(pDevIns), + iBus, iDevice, config_addr, cb, pu32Value); + } +#else + RT_NOREF(addr, cb); + rcStrict = VINF_IOM_R3_IOPORT_READ; +#endif + } + } + else + { + R3PTRTYPE(PDMPCIDEV *) pPciDev = pGlobals->PciBus.apDevices[iDevice]; + if (pPciDev) + { +#ifdef IN_RING3 + rcStrict = VINF_PDM_PCI_DO_DEFAULT; + if (pPciDev->Int.s.pfnConfigRead) + rcStrict = pPciDev->Int.s.pfnConfigRead(pPciDev->Int.s.CTX_SUFF(pDevIns), pPciDev, config_addr, cb, pu32Value); + if (rcStrict == VINF_PDM_PCI_DO_DEFAULT) + rcStrict = devpciR3CommonConfigReadWorker(pPciDev, config_addr, cb, pu32Value); + LogFunc(("%s: addr=%02x val=%08x cb=%d\n", pPciDev->pszNameR3, config_addr, *pu32Value, cb)); +#else + NOREF(cb); + rcStrict = VINF_IOM_R3_IOPORT_READ; +#endif + } + } + + return rcStrict; +} + + + +/* return the global irq number corresponding to a given device irq + pin. We could also use the bus number to have a more precise + mapping. + This is the implementation note described in the PCI spec chapter 2.2.6 */ +static inline int pci_slot_get_pirq(uint8_t uDevFn, int irq_num) +{ + int slot_addend; + slot_addend = (uDevFn >> 3) - 1; + return (irq_num + slot_addend) & 3; +} + +static inline int pci_slot_get_apic_pirq(uint8_t uDevFn, int irq_num) +{ + return (irq_num + (uDevFn >> 3)) & 7; +} + +static inline int get_pci_irq_apic_level(PDEVPCIROOT pGlobals, int irq_num) +{ + return (pGlobals->auPciApicIrqLevels[irq_num] != 0); +} + +static void apic_set_irq(PPDMDEVINS pDevIns, PDEVPCIBUS pBus, PDEVPCIBUSCC pBusCC, + uint8_t uDevFn, PDMPCIDEV *pPciDev, int irq_num1, int iLevel, int iAcpiIrq, uint32_t uTagSrc) +{ + /* This is only allowed to be called with a pointer to the host bus. */ + AssertMsg(pBus->iBus == 0, ("iBus=%u\n", pBus->iBus)); + uint16_t const uBusDevFn = PCIBDF_MAKE(pBus->iBus, uDevFn); + + if (iAcpiIrq == -1) { + int apic_irq, apic_level; + PDEVPCIROOT pGlobals = DEVPCIBUS_2_DEVPCIROOT(pBus); + int irq_num = pci_slot_get_apic_pirq(uDevFn, irq_num1); + + if ((iLevel & PDM_IRQ_LEVEL_HIGH) == PDM_IRQ_LEVEL_HIGH) + ASMAtomicIncU32(&pGlobals->auPciApicIrqLevels[irq_num]); + else if ((iLevel & PDM_IRQ_LEVEL_HIGH) == PDM_IRQ_LEVEL_LOW) + ASMAtomicDecU32(&pGlobals->auPciApicIrqLevels[irq_num]); + + apic_irq = irq_num + 0x10; + apic_level = get_pci_irq_apic_level(pGlobals, irq_num); + Log3Func(("%s: irq_num1=%d level=%d apic_irq=%d apic_level=%d irq_num1=%d\n", + R3STRING(pPciDev->pszNameR3), irq_num1, iLevel, apic_irq, apic_level, irq_num)); + pBusCC->CTX_SUFF(pPciHlp)->pfnIoApicSetIrq(pDevIns, uBusDevFn, apic_irq, apic_level, uTagSrc); + + if ((iLevel & PDM_IRQ_LEVEL_FLIP_FLOP) == PDM_IRQ_LEVEL_FLIP_FLOP) { + ASMAtomicDecU32(&pGlobals->auPciApicIrqLevels[irq_num]); + pPciDev->Int.s.uIrqPinState = PDM_IRQ_LEVEL_LOW; + apic_level = get_pci_irq_apic_level(pGlobals, irq_num); + Log3Func(("%s: irq_num1=%d level=%d apic_irq=%d apic_level=%d irq_num1=%d (flop)\n", + R3STRING(pPciDev->pszNameR3), irq_num1, iLevel, apic_irq, apic_level, irq_num)); + pBusCC->CTX_SUFF(pPciHlp)->pfnIoApicSetIrq(pDevIns, uBusDevFn, apic_irq, apic_level, uTagSrc); + } + } else { + Log3Func(("%s: irq_num1=%d level=%d iAcpiIrq=%d\n", R3STRING(pPciDev->pszNameR3), irq_num1, iLevel, iAcpiIrq)); + pBusCC->CTX_SUFF(pPciHlp)->pfnIoApicSetIrq(pDevIns, uBusDevFn, iAcpiIrq, iLevel, uTagSrc); + } +} + +DECLINLINE(int) get_pci_irq_level(PDEVPCIROOT pGlobals, int irq_num) +{ + return (pGlobals->Piix3.auPciLegacyIrqLevels[irq_num] != 0); +} + +/** + * Set the IRQ for a PCI device on the host bus - shared by host bus and bridge. + * + * @param pDevIns The PDM device instance for the PCI bus. + * @param pGlobals Device instance of the host PCI bus. + * @param pBusCC Context specific data for the PCI bus. + * @param uDevFn The device number on the host bus which will raise the IRQ + * @param pPciDev The PCI device structure which raised the interrupt. + * @param iIrq IRQ number to set. + * @param iLevel IRQ level. + * @param uTagSrc The IRQ tag and source ID (for tracing). + * @remark uDevFn and pPciDev->uDevFn are not the same if the device is behind + * a bridge. In that case uDevFn will be the slot of the bridge which + * is needed to calculate the PIRQ value. + */ +static void pciSetIrqInternal(PPDMDEVINS pDevIns, PDEVPCIROOT pGlobals, PDEVPCIBUSCC pBusCC, + uint8_t uDevFn, PPDMPCIDEV pPciDev, int iIrq, int iLevel, uint32_t uTagSrc) +{ + PDEVPCIBUS pBus = &pGlobals->PciBus; + uint8_t *pbCfg = pDevIns->apPciDevs[1]->abConfig; + const bool fIsAcpiDevice = pPciDev->abConfig[2] == 0x13 && pPciDev->abConfig[3] == 0x71; + /* If the two configuration space bytes at 0xde, 0xad are set to 0xbe, 0xef, a back door + * is opened to route PCI interrupts directly to the I/O APIC and bypass the PIC. + * See the \_SB_.PCI0._PRT method in vbox.dsl. + */ + const bool fIsApicEnabled = pGlobals->fUseIoApic && pbCfg[0xde] == 0xbe && pbCfg[0xad] == 0xef; + int pic_irq, pic_level; + + /* Check if the state changed. */ + if (pPciDev->Int.s.uIrqPinState != iLevel) + { + pPciDev->Int.s.uIrqPinState = (iLevel & PDM_IRQ_LEVEL_HIGH); + + /* Send interrupt to I/O APIC only. */ + if (fIsApicEnabled) + { + if (fIsAcpiDevice) + /* + * ACPI needs special treatment since SCI is hardwired and + * should not be affected by PCI IRQ routing tables at the + * same time SCI IRQ is shared in PCI sense hence this + * kludge (i.e. we fetch the hardwired value from ACPIs + * PCI device configuration space). + */ + apic_set_irq(pDevIns, pBus, pBusCC, uDevFn, pPciDev, -1, iLevel, pPciDev->abConfig[PCI_INTERRUPT_LINE], uTagSrc); + else + apic_set_irq(pDevIns, pBus, pBusCC, uDevFn, pPciDev, iIrq, iLevel, -1, uTagSrc); + return; + } + + if (fIsAcpiDevice) + { + /* As per above treat ACPI in a special way */ + pic_irq = pPciDev->abConfig[PCI_INTERRUPT_LINE]; + pGlobals->Piix3.iAcpiIrq = pic_irq; + pGlobals->Piix3.iAcpiIrqLevel = iLevel & PDM_IRQ_LEVEL_HIGH; + } + else + { + int irq_num; + irq_num = pci_slot_get_pirq(uDevFn, iIrq); + + if (pPciDev->Int.s.uIrqPinState == PDM_IRQ_LEVEL_HIGH) + ASMAtomicIncU32(&pGlobals->Piix3.auPciLegacyIrqLevels[irq_num]); + else if (pPciDev->Int.s.uIrqPinState == PDM_IRQ_LEVEL_LOW) + ASMAtomicDecU32(&pGlobals->Piix3.auPciLegacyIrqLevels[irq_num]); + + /* now we change the pic irq level according to the piix irq mappings */ + pic_irq = pbCfg[0x60 + irq_num]; + if (pic_irq >= 16) + { + if ((iLevel & PDM_IRQ_LEVEL_FLIP_FLOP) == PDM_IRQ_LEVEL_FLIP_FLOP) + { + ASMAtomicDecU32(&pGlobals->Piix3.auPciLegacyIrqLevels[irq_num]); + pPciDev->Int.s.uIrqPinState = PDM_IRQ_LEVEL_LOW; + } + + return; + } + } + + /* the pic level is the logical OR of all the PCI irqs mapped to it */ + pic_level = 0; + if (pic_irq == pbCfg[0x60]) + pic_level |= get_pci_irq_level(pGlobals, 0); /* PIRQA */ + if (pic_irq == pbCfg[0x61]) + pic_level |= get_pci_irq_level(pGlobals, 1); /* PIRQB */ + if (pic_irq == pbCfg[0x62]) + pic_level |= get_pci_irq_level(pGlobals, 2); /* PIRQC */ + if (pic_irq == pbCfg[0x63]) + pic_level |= get_pci_irq_level(pGlobals, 3); /* PIRQD */ + if (pic_irq == pGlobals->Piix3.iAcpiIrq) + pic_level |= pGlobals->Piix3.iAcpiIrqLevel; + + Log3Func(("%s: iLevel=%d iIrq=%d pic_irq=%d pic_level=%d uTagSrc=%#x\n", + R3STRING(pPciDev->pszNameR3), iLevel, iIrq, pic_irq, pic_level, uTagSrc)); + pBusCC->CTX_SUFF(pPciHlp)->pfnIsaSetIrq(pDevIns, pic_irq, pic_level, uTagSrc); + + /** @todo optimize pci irq flip-flop some rainy day. */ + if ((iLevel & PDM_IRQ_LEVEL_FLIP_FLOP) == PDM_IRQ_LEVEL_FLIP_FLOP) + pciSetIrqInternal(pDevIns, pGlobals, pBusCC, uDevFn, pPciDev, iIrq, PDM_IRQ_LEVEL_LOW, uTagSrc); + } +} + + +/** + * @interface_method_impl{PDMPCIBUSREGR3,pfnSetIrqR3} + */ +static DECLCALLBACK(void) pciSetIrq(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, int iIrq, int iLevel, uint32_t uTagSrc) +{ + PDEVPCIROOT pBus = PDMINS_2_DATA(pDevIns, PDEVPCIROOT); + PDEVPCIBUSCC pBusCC = PDMINS_2_DATA_CC(pDevIns, PDEVPCIBUSCC); + LogFlow(("pciSetIrq: %p %u %u %#x\n", pPciDev, iIrq, iLevel, uTagSrc)); + pciSetIrqInternal(pDevIns, pBus, pBusCC, pPciDev->uDevFn, pPciDev, iIrq, iLevel, uTagSrc); +} + +#ifdef IN_RING3 + +/** + * Finds a bridge on the bus which contains the destination bus. + * + * @return Pointer to the device instance data of the bus or + * NULL if no bridge was found. + * @param pBus Pointer to the bus to search on. + * @param iBus Destination bus number. + */ +DECLINLINE(PPDMPCIDEV) pciR3FindBridge(PDEVPCIBUS pBus, uint8_t iBus) +{ + /* Search for a fitting bridge. */ + for (uint32_t iBridge = 0; iBridge < pBus->cBridges; iBridge++) + { + /* + * Examine secondary and subordinate bus number. + * If the target bus is in the range we pass the request on to the bridge. + */ + PPDMPCIDEV pBridgeTemp = pBus->papBridgesR3[iBridge]; + AssertMsg(pBridgeTemp && pciDevIsPci2PciBridge(pBridgeTemp), + ("Device is not a PCI bridge but on the list of PCI bridges\n")); + + if ( iBus >= pBridgeTemp->abConfig[VBOX_PCI_SECONDARY_BUS] + && iBus <= pBridgeTemp->abConfig[VBOX_PCI_SUBORDINATE_BUS]) + return pBridgeTemp; + } + + /* Nothing found. */ + return NULL; +} + +static void pciR3Piix3Reset(PPDMPCIDEV pPiix3PciDev) +{ + uint8_t *pci_conf = pPiix3PciDev->abConfig; + + pci_conf[0x04] = 0x07; /* master, memory and I/O */ + pci_conf[0x05] = 0x00; + pci_conf[0x06] = 0x00; + pci_conf[0x07] = 0x02; /* PCI_status_devsel_medium */ + pci_conf[0x4c] = 0x4d; + pci_conf[0x4e] = 0x03; + pci_conf[0x4f] = 0x00; + pci_conf[0x60] = 0x80; + pci_conf[0x69] = 0x02; + pci_conf[0x70] = 0x80; + pci_conf[0x76] = 0x0c; + pci_conf[0x77] = 0x0c; + pci_conf[0x78] = 0x02; + pci_conf[0x79] = 0x00; + pci_conf[0x80] = 0x00; + pci_conf[0x82] = 0x02; /* Get rid of the Linux guest "Enabling Passive Release" PCI quirk warning. */ + pci_conf[0xa0] = 0x08; + pci_conf[0xa2] = 0x00; + pci_conf[0xa3] = 0x00; + pci_conf[0xa4] = 0x00; + pci_conf[0xa5] = 0x00; + pci_conf[0xa6] = 0x00; + pci_conf[0xa7] = 0x00; + pci_conf[0xa8] = 0x0f; + pci_conf[0xaa] = 0x00; + pci_conf[0xab] = 0x00; + pci_conf[0xac] = 0x00; + pci_conf[0xae] = 0x00; +} + +/* host irqs corresponding to PCI irqs A-D */ +static const uint8_t pci_irqs[4] = { 11, 10, 9, 11 }; /* bird: added const */ + +static void pci_bios_init_device(PPDMDEVINS pDevIns, PDEVPCIROOT pGlobals, PDEVPCIBUS pBus, + PPDMPCIDEV pPciDev, uint8_t cBridgeDepth, uint8_t *paBridgePositions) +{ + uint32_t uPciBiosSpecialVRAM = 0xe0000000; + uint32_t *paddr; + int pin, pic_irq; + uint16_t devclass, vendor_id, device_id; + + devclass = devpciR3GetWord(pPciDev, PCI_CLASS_DEVICE); + vendor_id = devpciR3GetWord(pPciDev, PCI_VENDOR_ID); + device_id = devpciR3GetWord(pPciDev, PCI_DEVICE_ID); + + /* Check if device is present. */ + if (vendor_id != 0xffff) + { + switch (devclass) + { + case 0x0101: + if ( (vendor_id == 0x8086) + && (device_id == 0x7010 || device_id == 0x7111 || device_id == 0x269e)) + { + /* PIIX3, PIIX4 or ICH6 IDE */ + devpciR3SetWord(pDevIns, pPciDev, 0x40, 0x8011); /* enable IDE0 + fast timing */ + devpciR3SetWord(pDevIns, pPciDev, 0x42, 0x8011); /* enable IDE1 + fast timing */ + goto default_map; + } + else + { + /* IDE: we map it as in ISA mode */ + devpciR3BiosInitSetRegionAddress(pDevIns, pBus, pPciDev, 0, 0x1f0); + devpciR3BiosInitSetRegionAddress(pDevIns, pBus, pPciDev, 1, 0x3f4); + devpciR3BiosInitSetRegionAddress(pDevIns, pBus, pPciDev, 2, 0x170); + devpciR3BiosInitSetRegionAddress(pDevIns, pBus, pPciDev, 3, 0x374); + devpciR3SetWord(pDevIns, pPciDev, PCI_COMMAND, + devpciR3GetWord(pPciDev, PCI_COMMAND) + | PCI_COMMAND_IOACCESS); + } + break; + case 0x0800: + /* PIC */ + vendor_id = devpciR3GetWord(pPciDev, PCI_VENDOR_ID); + device_id = devpciR3GetWord(pPciDev, PCI_DEVICE_ID); + if (vendor_id == 0x1014) + { + /* IBM */ + if (device_id == 0x0046 || device_id == 0xFFFF) + { + /* MPIC & MPIC2 */ + devpciR3BiosInitSetRegionAddress(pDevIns, pBus, pPciDev, 0, 0x80800000 + 0x00040000); + devpciR3SetWord(pDevIns, pPciDev, PCI_COMMAND, + devpciR3GetWord(pPciDev, PCI_COMMAND) + | PCI_COMMAND_MEMACCESS); + } + } + break; + case 0xff00: + if ( (vendor_id == 0x0106b) + && (device_id == 0x0017 || device_id == 0x0022)) + { + /* macio bridge */ + devpciR3BiosInitSetRegionAddress(pDevIns, pBus, pPciDev, 0, 0x80800000); + devpciR3SetWord(pDevIns, pPciDev, PCI_COMMAND, + devpciR3GetWord(pPciDev, PCI_COMMAND) + | PCI_COMMAND_MEMACCESS); + } + break; + case 0x0604: + { + /* Init PCI-to-PCI bridge. */ + devpciR3SetByte(pDevIns, pPciDev, VBOX_PCI_PRIMARY_BUS, pBus->iBus); + + AssertMsg(pGlobals->uPciBiosBus < 255, ("Too many bridges on the bus\n")); + pGlobals->uPciBiosBus++; + devpciR3SetByte(pDevIns, pPciDev, VBOX_PCI_SECONDARY_BUS, pGlobals->uPciBiosBus); + devpciR3SetByte(pDevIns, pPciDev, VBOX_PCI_SUBORDINATE_BUS, 0xff); /* Temporary until we know how many other bridges are behind this one. */ + + /* Add position of this bridge into the array. */ + paBridgePositions[cBridgeDepth+1] = (pPciDev->uDevFn >> 3); + + /* + * The I/O range for the bridge must be aligned to a 4KB boundary. + * This does not change anything really as the access to the device is not going + * through the bridge but we want to be compliant to the spec. + */ + if ((pGlobals->uPciBiosIo % _4K) != 0) + pGlobals->uPciBiosIo = RT_ALIGN_32(pGlobals->uPciBiosIo, _4K); + LogFunc(("Aligned I/O start address. New address %#x\n", pGlobals->uPciBiosIo)); + devpciR3SetByte(pDevIns, pPciDev, VBOX_PCI_IO_BASE, (pGlobals->uPciBiosIo >> 8) & 0xf0); + + /* The MMIO range for the bridge must be aligned to a 1MB boundary. */ + if ((pGlobals->uPciBiosMmio % _1M) != 0) + pGlobals->uPciBiosMmio = RT_ALIGN_32(pGlobals->uPciBiosMmio, _1M); + LogFunc(("Aligned MMIO start address. New address %#x\n", pGlobals->uPciBiosMmio)); + devpciR3SetWord(pDevIns, pPciDev, VBOX_PCI_MEMORY_BASE, (pGlobals->uPciBiosMmio >> 16) & UINT32_C(0xffff0)); + + /* Save values to compare later to. */ + uint32_t u32IoAddressBase = pGlobals->uPciBiosIo; + uint32_t u32MMIOAddressBase = pGlobals->uPciBiosMmio; + + /* Init devices behind the bridge and possibly other bridges as well. */ + PDEVPCIBUS pChildBus = PDMINS_2_DATA(pPciDev->Int.s.CTX_SUFF(pDevIns), PDEVPCIBUS); + for (uint32_t uDevFn = 0; uDevFn < RT_ELEMENTS(pChildBus->apDevices); uDevFn++) + { + PPDMPCIDEV pChildPciDev = pChildBus->apDevices[uDevFn]; + if (pChildPciDev) + pci_bios_init_device(pDevIns, pGlobals, pChildBus, pChildPciDev, cBridgeDepth + 1, paBridgePositions); + } + + /* The number of bridges behind the this one is now available. */ + devpciR3SetByte(pDevIns, pPciDev, VBOX_PCI_SUBORDINATE_BUS, pGlobals->uPciBiosBus); + + /* + * Set I/O limit register. If there is no device with I/O space behind the bridge + * we set a lower value than in the base register. + * The result with a real bridge is that no I/O transactions are passed to the secondary + * interface. Again this doesn't really matter here but we want to be compliant to the spec. + */ + if ((u32IoAddressBase != pGlobals->uPciBiosIo) && ((pGlobals->uPciBiosIo % _4K) != 0)) + { + /* The upper boundary must be one byte less than a 4KB boundary. */ + pGlobals->uPciBiosIo = RT_ALIGN_32(pGlobals->uPciBiosIo, _4K); + } + devpciR3SetByte(pDevIns, pPciDev, VBOX_PCI_IO_LIMIT, ((pGlobals->uPciBiosIo >> 8) & 0xf0) - 1); + + /* Same with the MMIO limit register but with 1MB boundary here. */ + if ((u32MMIOAddressBase != pGlobals->uPciBiosMmio) && ((pGlobals->uPciBiosMmio % _1M) != 0)) + { + /* The upper boundary must be one byte less than a 1MB boundary. */ + pGlobals->uPciBiosMmio = RT_ALIGN_32(pGlobals->uPciBiosMmio, _1M); + } + devpciR3SetWord(pDevIns, pPciDev, VBOX_PCI_MEMORY_LIMIT, ((pGlobals->uPciBiosMmio >> 16) & UINT32_C(0xfff0)) - 1); + + /* + * Set the prefetch base and limit registers. We currently have no device with a prefetchable region + * which may be behind a bridge. That's why it is unconditionally disabled here atm by writing a higher value into + * the base register than in the limit register. + */ + devpciR3SetWord(pDevIns, pPciDev, VBOX_PCI_PREF_MEMORY_BASE, 0xfff0); + devpciR3SetWord(pDevIns, pPciDev, VBOX_PCI_PREF_MEMORY_LIMIT, 0x0); + devpciR3SetDWord(pDevIns, pPciDev, VBOX_PCI_PREF_BASE_UPPER32, 0x00); + devpciR3SetDWord(pDevIns, pPciDev, VBOX_PCI_PREF_LIMIT_UPPER32, 0x00); + break; + } + default: + default_map: + { + /* default memory mappings */ + bool fActiveMemRegion = false; + bool fActiveIORegion = false; + /* + * PCI_NUM_REGIONS is 7 because of the rom region but there are only 6 base address register defined by the PCI spec. + * Leaving only PCI_NUM_REGIONS would cause reading another and enabling a memory region which does not exist. + */ + for (unsigned i = 0; i < (PCI_NUM_REGIONS-1); i++) + { + uint32_t u32Size; + uint8_t u8ResourceType; + uint32_t u32Address = 0x10 + i * 4; + + /* Calculate size. */ + u8ResourceType = devpciR3GetByte(pPciDev, u32Address); + devpciR3SetDWord(pDevIns, pPciDev, u32Address, UINT32_C(0xffffffff)); + u32Size = devpciR3GetDWord(pPciDev, u32Address); + bool fIsPio = ((u8ResourceType & PCI_COMMAND_IOACCESS) == PCI_COMMAND_IOACCESS); + /* Clear resource information depending on resource type. */ + if (fIsPio) /* I/O */ + u32Size &= ~(0x01); + else /* MMIO */ + u32Size &= ~(0x0f); + + /* + * Invert all bits and add 1 to get size of the region. + * (From PCI implementation note) + */ + if (fIsPio && (u32Size & UINT32_C(0xffff0000)) == 0) + u32Size = (~(u32Size | UINT32_C(0xffff0000))) + 1; + else + u32Size = (~u32Size) + 1; + + Log2Func(("Size of region %u for device %d on bus %d is %u\n", i, pPciDev->uDevFn, pBus->iBus, u32Size)); + + if (u32Size) + { + if (fIsPio) + paddr = &pGlobals->uPciBiosIo; + else + { + paddr = &pGlobals->uPciBiosMmio; + if (devclass == 0x0300) + { + /* + * Because legacy VGA I/O ports are implicitly decoded + * by a VGA class device without needing a BAR, we must + * enable I/O decoding for such devices. + */ + fActiveIORegion = true; + + if (vendor_id == 0x80ee || vendor_id == 0x15ad) + { + bool fPrefetch = (u8ResourceType & ((uint8_t)(PCI_ADDRESS_SPACE_MEM_PREFETCH | PCI_ADDRESS_SPACE_IO))) + == PCI_ADDRESS_SPACE_MEM_PREFETCH; + /* VGA: map frame buffer to default Bochs VBE address. Only + * needed for legacy guest drivers. */ + if (fPrefetch) + paddr = &uPciBiosSpecialVRAM; + } + } + } + uint32_t uNew = *paddr; + uNew = (uNew + u32Size - 1) & ~(u32Size - 1); + if (fIsPio) + uNew &= UINT32_C(0xffff); + /* Unconditionally exclude I/O-APIC/HPET/ROM. Pessimistic, but better than causing a mess. */ + if (!uNew || (uNew <= UINT32_C(0xffffffff) && uNew + u32Size - 1 >= UINT32_C(0xfec00000))) + { + LogRel(("PCI: no space left for BAR%u of device %u/%u/%u (vendor=%#06x device=%#06x)\n", + i, pBus->iBus, pPciDev->uDevFn >> 3, pPciDev->uDevFn & 7, vendor_id, device_id)); /** @todo make this a VM start failure later. */ + /* Undo the mapping mess caused by the size probing. */ + devpciR3SetDWord(pDevIns, pPciDev, u32Address, UINT32_C(0)); + } + else + { + LogFunc(("Start address of %s region %u is %#x\n", (fIsPio ? "I/O" : "MMIO"), i, uNew)); + devpciR3BiosInitSetRegionAddress(pDevIns, pBus, pPciDev, i, uNew); + if (fIsPio) + fActiveIORegion = true; + else + fActiveMemRegion = true; + *paddr = uNew + u32Size; + Log2Func(("New address is %#x\n", *paddr)); + } + } + } + + /* Update the command word appropriately. */ + devpciR3SetWord(pDevIns, pPciDev, PCI_COMMAND, + devpciR3GetWord(pPciDev, PCI_COMMAND) + | (fActiveMemRegion ? PCI_COMMAND_MEMACCESS : 0) + | (fActiveIORegion ? PCI_COMMAND_IOACCESS : 0)); + + break; + } + } + + /* map the interrupt */ + pin = devpciR3GetByte(pPciDev, PCI_INTERRUPT_PIN); + if (pin != 0) + { + uint8_t uBridgeDevFn = pPciDev->uDevFn; + pin--; + + /* We need to go up to the host bus to see which irq this device will assert there. */ + while (cBridgeDepth != 0) + { + /* Get the pin the device would assert on the bridge. */ + pin = ((uBridgeDevFn >> 3) + pin) & 3; + uBridgeDevFn = paBridgePositions[cBridgeDepth]; + cBridgeDepth--; + } + + pin = pci_slot_get_pirq(pPciDev->uDevFn, pin); + pic_irq = pci_irqs[pin]; + devpciR3SetByte(pDevIns, pPciDev, PCI_INTERRUPT_LINE, pic_irq); + } + } +} + +/** + * Worker for Fake PCI BIOS config, triggered by magic port access by BIOS. + * + * @returns VBox status code. + * + * @param pDevIns i440FX device instance. + */ +static int pciR3FakePCIBIOS(PPDMDEVINS pDevIns) +{ + uint8_t elcr[2] = {0, 0}; + PDEVPCIROOT pGlobals = PDMINS_2_DATA(pDevIns, PDEVPCIROOT); + + LogRel(("PCI: Setting up resources and interrupts\n")); + + /* + * Set the start addresses. + */ + pGlobals->uPciBiosBus = 0; + pGlobals->uPciBiosIo = 0xd000; + pGlobals->uPciBiosMmio = UINT32_C(0xf0000000); + + /* + * Activate IRQ mappings. + */ + PPDMPCIDEV pPIIX3 = pDevIns->apPciDevs[1]; + for (unsigned i = 0; i < 4; i++) + { + uint8_t irq = pci_irqs[i]; + /* Set to trigger level. */ + elcr[irq >> 3] |= (1 << (irq & 7)); + /* Activate irq remapping in PIIX3. */ + devpciR3SetByte(pDevIns, pPIIX3, 0x60 + i, irq); + } + + /* Tell to the PIC. */ + /** @todo r=aeichner We should really move this to the BIOS code and get rid of this fake PCI BIOS thing, + * DevPciIch9.cpp lacks this code and has a todo for this. + */ + VBOXSTRICTRC rcStrict = pDevIns->pHlpR3->pfnIoPortWrite(pDevIns, 0x4d0, elcr[0], sizeof(uint8_t)); + if (rcStrict == VINF_SUCCESS) + rcStrict = pDevIns->pHlpR3->pfnIoPortWrite(pDevIns, 0x4d1, elcr[1], sizeof(uint8_t)); + if (rcStrict != VINF_SUCCESS) + { + AssertMsgFailed(("Writing to PIC failed! rcStrict=%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); + return RT_SUCCESS(rcStrict) ? VERR_INTERNAL_ERROR : VBOXSTRICTRC_VAL(rcStrict); + } + + /* + * Init the devices. + */ + PDEVPCIBUS pBus = &pGlobals->PciBus; + for (uint32_t uDevFn = 0; uDevFn < RT_ELEMENTS(pBus->apDevices); uDevFn++) + { + PPDMPCIDEV pPciDev = pBus->apDevices[uDevFn]; + if (pPciDev) + { + Log2(("PCI: Initializing device %d (%#x)\n", uDevFn, 0x80000000 | (uDevFn << 8))); + uint8_t aBridgePositions[256]; + RT_ZERO(aBridgePositions); + pci_bios_init_device(pDevIns, pGlobals, pBus, pPciDev, 0, aBridgePositions); + } + } + + return VINF_SUCCESS; +} + +#endif /* IN_RING3 */ + + +/* -=-=-=-=-=- I/O ports -=-=-=-=-=- */ + +/** + * @callback_method_impl{FNIOMIOPORTNEWOUT, PCI address} + */ +static DECLCALLBACK(VBOXSTRICTRC) +pciIOPortAddressWrite(PPDMDEVINS pDevIns, void *pvUser, RTIOPORT offPort, uint32_t u32, unsigned cb) +{ + LogFunc(("offPort=%#x u32=%#x cb=%d\n", offPort, u32, cb)); + Assert(offPort == 0); RT_NOREF2(offPort, pvUser); + if (cb == 4) + { + PDEVPCIROOT pThis = PDMINS_2_DATA(pDevIns, PDEVPCIROOT); + PCI_LOCK_RET(pDevIns, VINF_IOM_R3_IOPORT_WRITE); + pThis->uConfigReg = u32 & ~3; /* Bits 0-1 are reserved and we silently clear them */ + PCI_UNLOCK(pDevIns); + } + /* else: 440FX does "pass through to the bus" for other writes, what ever that means. + * Linux probes for cmd640 using byte writes/reads during ide init. We'll just ignore it. */ + return VINF_SUCCESS; +} + + +/** + * @callback_method_impl{FNIOMIOPORTNEWIN, PCI address} + */ +static DECLCALLBACK(VBOXSTRICTRC) +pciIOPortAddressRead(PPDMDEVINS pDevIns, void *pvUser, RTIOPORT offPort, uint32_t *pu32, unsigned cb) +{ + Assert(offPort == 0); RT_NOREF2(offPort, pvUser); + if (cb == 4) + { + PDEVPCIROOT pThis = PDMINS_2_DATA(pDevIns, PDEVPCIROOT); + PCI_LOCK_RET(pDevIns, VINF_IOM_R3_IOPORT_READ); + *pu32 = pThis->uConfigReg; + PCI_UNLOCK(pDevIns); + LogFunc(("offPort=%#x cb=%d -> %#x\n", offPort, cb, *pu32)); + return VINF_SUCCESS; + } + /* else: 440FX does "pass through to the bus" for other writes, what ever that means. + * Linux probes for cmd640 using byte writes/reads during ide init. We'll just ignore it. */ + LogFunc(("offPort=%#x cb=%d VERR_IOM_IOPORT_UNUSED\n", offPort, cb)); + return VERR_IOM_IOPORT_UNUSED; +} + + +/** + * @callback_method_impl{FNIOMIOPORTNEWOUT, PCI data} + */ +static DECLCALLBACK(VBOXSTRICTRC) +pciIOPortDataWrite(PPDMDEVINS pDevIns, void *pvUser, RTIOPORT offPort, uint32_t u32, unsigned cb) +{ + LogFunc(("offPort=%#x u32=%#x cb=%d\n", offPort, u32, cb)); + Assert(offPort < 4); NOREF(pvUser); + VBOXSTRICTRC rcStrict = VINF_SUCCESS; + if (!(offPort % cb)) + { + PCI_LOCK_RET(pDevIns, VINF_IOM_R3_IOPORT_WRITE); + rcStrict = pci_data_write(pDevIns, PDMINS_2_DATA(pDevIns, PDEVPCIROOT), offPort, u32, cb); + PCI_UNLOCK(pDevIns); + } + else + AssertMsgFailed(("Write to port %#x u32=%#x cb=%d\n", offPort, u32, cb)); + return rcStrict; +} + + +/** + * @callback_method_impl{FNIOMIOPORTNEWIN, PCI data} + */ +static DECLCALLBACK(VBOXSTRICTRC) +pciIOPortDataRead(PPDMDEVINS pDevIns, void *pvUser, RTIOPORT offPort, uint32_t *pu32, unsigned cb) +{ + Assert(offPort < 4); NOREF(pvUser); + if (!(offPort % cb)) + { + PCI_LOCK_RET(pDevIns, VINF_IOM_R3_IOPORT_READ); + VBOXSTRICTRC rcStrict = pci_data_read(PDMINS_2_DATA(pDevIns, PDEVPCIROOT), offPort, cb, pu32); + PCI_UNLOCK(pDevIns); + LogFunc(("offPort=%#x cb=%#x -> %#x (%Rrc)\n", offPort, cb, *pu32, VBOXSTRICTRC_VAL(rcStrict))); + return rcStrict; + } + AssertMsgFailed(("Read from port %#x cb=%d\n", offPort, cb)); + return VERR_IOM_IOPORT_UNUSED; +} + +#ifdef IN_RING3 + +/** + * @callback_method_impl{FNIOMIOPORTNEWOUT, PCI data} + */ +static DECLCALLBACK(VBOXSTRICTRC) +pciR3IOPortMagicPCIWrite(PPDMDEVINS pDevIns, void *pvUser, RTIOPORT offPort, uint32_t u32, unsigned cb) +{ + Assert(offPort == 0); RT_NOREF2(pvUser, offPort); + LogFunc(("offPort=%#x u32=%#x cb=%d\n", offPort, u32, cb)); + if (cb == 4) + { + if (u32 == UINT32_C(19200509)) // Richard Adams - Note! In decimal rather hex. + { + int rc = pciR3FakePCIBIOS(pDevIns); + AssertRC(rc); + } + } + + return VINF_SUCCESS; +} + +/** + * @callback_method_impl{FNIOMIOPORTNEWIN, PCI data} + */ +static DECLCALLBACK(VBOXSTRICTRC) +pciR3IOPortMagicPCIRead(PPDMDEVINS pDevIns, void *pvUser, RTIOPORT offPort, uint32_t *pu32, unsigned cb) +{ + Assert(offPort == 0); RT_NOREF5(pDevIns, pvUser, offPort, pu32, cb); + LogFunc(("offPort=%#x cb=%d VERR_IOM_IOPORT_UNUSED\n", offPort, cb)); + return VERR_IOM_IOPORT_UNUSED; +} + + +/* -=-=-=-=-=- Saved state -=-=-=-=-=- */ + +/** + * Common worker for pciR3SaveExec and pcibridgeR3SaveExec. + * + * @returns VBox status code. + * @param pHlp The device helpers. + * @param pBus The bus to save. + * @param pSSM The saved state handle. + */ +static int pciR3CommonSaveExec(PCPDMDEVHLPR3 pHlp, PDEVPCIBUS pBus, PSSMHANDLE pSSM) +{ + /* + * Iterate thru all the devices. + */ + for (uint32_t uDevFn = 0; uDevFn < RT_ELEMENTS(pBus->apDevices); uDevFn++) + { + PPDMPCIDEV pDev = pBus->apDevices[uDevFn]; + if (pDev) + { + pHlp->pfnSSMPutU32(pSSM, uDevFn); + pHlp->pfnSSMPutMem(pSSM, pDev->abConfig, 256); /* Only save 256 bytes here! */ + + pHlp->pfnSSMPutS32(pSSM, pDev->Int.s.uIrqPinState); + + /* Save the type an size of all the regions. */ + for (uint32_t iRegion = 0; iRegion < VBOX_PCI_NUM_REGIONS; iRegion++) + { + pHlp->pfnSSMPutU8(pSSM, pDev->Int.s.aIORegions[iRegion].type); + pHlp->pfnSSMPutU64(pSSM, pDev->Int.s.aIORegions[iRegion].size); + } + } + } + return pHlp->pfnSSMPutU32(pSSM, UINT32_MAX); /* terminator */ +} + + +/** + * @callback_method_impl{FNSSMDEVSAVEEXEC} + */ +static DECLCALLBACK(int) pciR3SaveExec(PPDMDEVINS pDevIns, PSSMHANDLE pSSM) +{ + PDEVPCIROOT pThis = PDMINS_2_DATA(pDevIns, PDEVPCIROOT); + PCPDMDEVHLPR3 pHlp = pDevIns->pHlpR3; + + /* + * Bus state data. + */ + pHlp->pfnSSMPutU32(pSSM, pThis->uConfigReg); + pHlp->pfnSSMPutBool(pSSM, pThis->fUseIoApic); + + /* + * Save IRQ states. + */ + for (unsigned i = 0; i < RT_ELEMENTS(pThis->Piix3.auPciLegacyIrqLevels); i++) + pHlp->pfnSSMPutU32(pSSM, pThis->Piix3.auPciLegacyIrqLevels[i]); + for (unsigned i = 0; i < RT_ELEMENTS(pThis->auPciApicIrqLevels); i++) + pHlp->pfnSSMPutU32(pSSM, pThis->auPciApicIrqLevels[i]); + + pHlp->pfnSSMPutU32(pSSM, pThis->Piix3.iAcpiIrqLevel); + pHlp->pfnSSMPutS32(pSSM, pThis->Piix3.iAcpiIrq); + + pHlp->pfnSSMPutU32(pSSM, UINT32_MAX); /* separator */ + + /* + * Join paths with pcibridgeR3SaveExec. + */ + return pciR3CommonSaveExec(pHlp, &pThis->PciBus, pSSM); +} + + +/** + * Common worker for pciR3LoadExec and pcibridgeR3LoadExec. + * + * @returns VBox status code. + * @param pDevIns The device instance. + * @param pBus The bus which data is being loaded. + * @param pSSM The saved state handle. + * @param uVersion The data version. + * @param uPass The pass. + */ +static int pciR3CommonLoadExec(PPDMDEVINS pDevIns, PDEVPCIBUS pBus, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass) +{ + PCPDMDEVHLPR3 pHlp = pDevIns->pHlpR3; + uint32_t u32; + int rc; + + Assert(uPass == SSM_PASS_FINAL); NOREF(uPass); + + /* + * Iterate thru all the devices and write 0 to the COMMAND register so + * that all the memory is unmapped before we start restoring the saved + * mapping locations. + * + * The register value is restored afterwards so we can do proper + * LogRels in devpciR3CommonRestoreConfig. + */ + for (uint32_t uDevFn = 0; uDevFn < RT_ELEMENTS(pBus->apDevices); uDevFn++) + { + PPDMPCIDEV pDev = pBus->apDevices[uDevFn]; + if (pDev) + { + uint16_t u16 = PCIDevGetCommand(pDev); + devpciR3SetCfg(pDevIns, pDev, VBOX_PCI_COMMAND, 0 /*u32Value*/, 2 /*cb*/); + PCIDevSetCommand(pDev, u16); + Assert(PCIDevGetCommand(pDev) == u16); + } + } + + /* + * Iterate all the devices. + */ + for (uint32_t uDevFn = 0;; uDevFn++) + { + /* index / terminator */ + rc = pHlp->pfnSSMGetU32(pSSM, &u32); + if (RT_FAILURE(rc)) + return rc; + if (u32 == UINT32_MAX) + break; + if ( u32 >= RT_ELEMENTS(pBus->apDevices) + || u32 < uDevFn) + { + AssertMsgFailed(("u32=%#x uDevFn=%#x\n", u32, uDevFn)); + return rc; + } + + /* skip forward to the device checking that no new devices are present. */ + for (; uDevFn < u32; uDevFn++) + { + if (pBus->apDevices[uDevFn]) + { + LogRel(("PCI: New device in slot %#x, %s (vendor=%#06x device=%#06x)\n", uDevFn, pBus->apDevices[uDevFn]->pszNameR3, + PCIDevGetVendorId(pBus->apDevices[uDevFn]), PCIDevGetDeviceId(pBus->apDevices[uDevFn]))); + if (pHlp->pfnSSMHandleGetAfter(pSSM) != SSMAFTER_DEBUG_IT) + return pHlp->pfnSSMSetCfgError(pSSM, RT_SRC_POS, N_("New device in slot %#x, %s (vendor=%#06x device=%#06x)"), + uDevFn, pBus->apDevices[uDevFn]->pszNameR3, PCIDevGetVendorId(pBus->apDevices[uDevFn]), PCIDevGetDeviceId(pBus->apDevices[uDevFn])); + } + } + + /* get the data */ + union + { + PDMPCIDEV DevTmp; + uint8_t abDevTmpPadding[RT_UOFFSETOF(PDMPCIDEV, abMsixState)]; + } u; + RT_ZERO(u.DevTmp); + u.DevTmp.Int.s.uIrqPinState = ~0; /* Invalid value in case we have an older saved state to force a state change in pciSetIrq. */ + pHlp->pfnSSMGetMem(pSSM, u.DevTmp.abConfig, 256); + if (uVersion < VBOX_PCI_SAVED_STATE_VERSION_IRQ_STATES) + { + int32_t i32Temp; + /* Irq value not needed anymore. */ + rc = pHlp->pfnSSMGetS32(pSSM, &i32Temp); + if (RT_FAILURE(rc)) + return rc; + } + else + { + rc = pHlp->pfnSSMGetS32(pSSM, &u.DevTmp.Int.s.uIrqPinState); + if (RT_FAILURE(rc)) + return rc; + } + + /* Load the region types and sizes. */ + if (uVersion >= VBOX_PCI_SAVED_STATE_VERSION_REGION_SIZES) + { + for (uint32_t iRegion = 0; iRegion < VBOX_PCI_NUM_REGIONS; iRegion++) + { + pHlp->pfnSSMGetU8(pSSM, &u.DevTmp.Int.s.aIORegions[iRegion].type); + rc = pHlp->pfnSSMGetU64(pSSM, &u.DevTmp.Int.s.aIORegions[iRegion].size); + AssertLogRelRCReturn(rc, rc); + } + } + + /* check that it's still around. */ + PPDMPCIDEV pDev = pBus->apDevices[uDevFn]; + if (!pDev) + { + LogRel(("PCI: Device in slot %#x has been removed! vendor=%#06x device=%#06x\n", uDevFn, + PCIDevGetVendorId(&u.DevTmp), PCIDevGetDeviceId(&u.DevTmp))); + if (pHlp->pfnSSMHandleGetAfter(pSSM) != SSMAFTER_DEBUG_IT) + return pHlp->pfnSSMSetCfgError(pSSM, RT_SRC_POS, N_("Device in slot %#x has been removed! vendor=%#06x device=%#06x"), + uDevFn, PCIDevGetVendorId(&u.DevTmp), PCIDevGetDeviceId(&u.DevTmp)); + continue; + } + + /* match the vendor id assuming that this will never be changed. */ + if ( u.DevTmp.abConfig[0] != pDev->abConfig[0] + || u.DevTmp.abConfig[1] != pDev->abConfig[1]) + return pHlp->pfnSSMSetCfgError(pSSM, RT_SRC_POS, + N_("Device in slot %#x (%s) vendor id mismatch! saved=%.4Rhxs current=%.4Rhxs"), + uDevFn, pDev->pszNameR3, u.DevTmp.abConfig, pDev->abConfig); + + /* commit the loaded device config. */ + rc = devpciR3CommonRestoreRegions(pHlp, pSSM, pDev, u.DevTmp.Int.s.aIORegions, + uVersion >= VBOX_PCI_SAVED_STATE_VERSION_REGION_SIZES); + if (RT_FAILURE(rc)) + break; + devpciR3CommonRestoreConfig(pDevIns, pDev, &u.DevTmp.abConfig[0]); + + pDev->Int.s.uIrqPinState = u.DevTmp.Int.s.uIrqPinState; + } + + return VINF_SUCCESS; +} + + +/** + * @callback_method_impl{FNSSMDEVLOADEXEC} + */ +static DECLCALLBACK(int) pciR3LoadExec(PPDMDEVINS pDevIns, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass) +{ + PDEVPCIROOT pThis = PDMINS_2_DATA(pDevIns, PDEVPCIROOT); + PDEVPCIBUS pBus = &pThis->PciBus; + PCPDMDEVHLPR3 pHlp = pDevIns->pHlpR3; + uint32_t u32; + int rc; + + /* + * Check the version. + */ + if (uVersion > VBOX_PCI_SAVED_STATE_VERSION) + return VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION; + Assert(uPass == SSM_PASS_FINAL); NOREF(uPass); + + /* + * Bus state data. + */ + pHlp->pfnSSMGetU32(pSSM, &pThis->uConfigReg); + if (uVersion >= VBOX_PCI_SAVED_STATE_VERSION_USE_IO_APIC) + pHlp->pfnSSMGetBool(pSSM, &pThis->fUseIoApic); + + /* Load IRQ states. */ + if (uVersion >= VBOX_PCI_SAVED_STATE_VERSION_IRQ_STATES) + { + for (uint8_t i = 0; i < RT_ELEMENTS(pThis->Piix3.auPciLegacyIrqLevels); i++) + pHlp->pfnSSMGetU32V(pSSM, &pThis->Piix3.auPciLegacyIrqLevels[i]); + for (uint8_t i = 0; i < RT_ELEMENTS(pThis->auPciApicIrqLevels); i++) + pHlp->pfnSSMGetU32V(pSSM, &pThis->auPciApicIrqLevels[i]); + + pHlp->pfnSSMGetU32(pSSM, &pThis->Piix3.iAcpiIrqLevel); + pHlp->pfnSSMGetS32(pSSM, &pThis->Piix3.iAcpiIrq); + } + + /* separator */ + rc = pHlp->pfnSSMGetU32(pSSM, &u32); + if (RT_FAILURE(rc)) + return rc; + if (u32 != UINT32_MAX) + AssertMsgFailedReturn(("u32=%#x\n", u32), rc); + + /* + * The devices. + */ + return pciR3CommonLoadExec(pDevIns, pBus, pSSM, uVersion, uPass); +} + + +/* -=-=-=-=-=- PCI Bus Interface Methods (PDMPCIBUSREG) -=-=-=-=-=- */ + + +/* -=-=-=-=-=- Debug Info Handlers -=-=-=-=-=- */ + +/** + * @callback_method_impl{FNDBGFHANDLERDEV} + */ +static DECLCALLBACK(void) pciR3IrqRouteInfo(PPDMDEVINS pDevIns, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + PPDMPCIDEV pPIIX3 = pDevIns->apPciDevs[1]; + NOREF(pszArgs); + + uint16_t router = pPIIX3->uDevFn; + pHlp->pfnPrintf(pHlp, "PCI interrupt router at: %02X:%02X:%X\n", + router >> 8, (router >> 3) & 0x1f, router & 0x7); + + for (int i = 0; i < 4; ++i) + { + uint8_t irq_map = devpciR3GetByte(pPIIX3, 0x60 + i); + if (irq_map & 0x80) + pHlp->pfnPrintf(pHlp, "PIRQ%c disabled\n", 'A' + i); + else + pHlp->pfnPrintf(pHlp, "PIRQ%c -> IRQ%d\n", 'A' + i, irq_map & 0xf); + } +} + +/** + * @callback_method_impl{FNDBGFHANDLERDEV, 'pirq'} + */ +DECLCALLBACK(void) devpciR3InfoPIRQ(PPDMDEVINS pDevIns, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + PDEVPCIROOT pGlobals = PDMINS_2_DATA(pDevIns, PDEVPCIROOT); + NOREF(pszArgs); + + pHlp->pfnPrintf(pHlp, "PCI IRQ levels:\n"); + for (int i = 0; i < DEVPCI_LEGACY_IRQ_PINS; ++i) + pHlp->pfnPrintf(pHlp, " IRQ%c: %u\n", 'A' + i, pGlobals->Piix3.auPciLegacyIrqLevels[i]); +} + + +/* -=-=-=-=-=- PDMDEVREG -=-=-=-=-=- */ + +/** + * @interface_method_impl{PDMDEVREG,pfnReset} + */ +static DECLCALLBACK(void) pciR3Reset(PPDMDEVINS pDevIns) +{ + PDEVPCIROOT pGlobals = PDMINS_2_DATA(pDevIns, PDEVPCIROOT); + PDEVPCIBUS pBus = &pGlobals->PciBus; + + /* PCI-specific reset for each device. */ + for (uint32_t uDevFn = 0; uDevFn < RT_ELEMENTS(pBus->apDevices); uDevFn++) + { + if (pBus->apDevices[uDevFn]) + devpciR3ResetDevice(pDevIns, pBus->apDevices[uDevFn]); + } + + pciR3Piix3Reset(pDevIns->apPciDevs[1]); +} + + +/** + * @interface_method_impl{PDMDEVREG,pfnDestruct} + */ +static DECLCALLBACK(int) pciR3Destruct(PPDMDEVINS pDevIns) +{ + PDEVPCIROOT pGlobals = PDMINS_2_DATA(pDevIns, PDEVPCIROOT); + if (pGlobals->PciBus.papBridgesR3) + { + PDMDevHlpMMHeapFree(pDevIns, pGlobals->PciBus.papBridgesR3); + pGlobals->PciBus.papBridgesR3 = NULL; + } + return VINF_SUCCESS; +} + + +/** + * @interface_method_impl{PDMDEVREG,pfnConstruct} + */ +static DECLCALLBACK(int) pciR3Construct(PPDMDEVINS pDevIns, int iInstance, PCFGMNODE pCfg) +{ + PDMDEV_CHECK_VERSIONS_RETURN(pDevIns); + PCPDMDEVHLPR3 pHlp = pDevIns->pHlpR3; + PDEVPCIBUSCC pBusCC = PDMINS_2_DATA_CC(pDevIns, PDEVPCIBUSCC); + PDEVPCIROOT pGlobals = PDMINS_2_DATA(pDevIns, PDEVPCIROOT); + RT_NOREF(iInstance); + Assert(iInstance == 0); + + /* + * Validate and read configuration. + */ + PDMDEV_VALIDATE_CONFIG_RETURN(pDevIns, "IOAPIC", ""); + + /* query whether we got an IOAPIC */ + bool fUseIoApic; + int rc = pHlp->pfnCFGMQueryBoolDef(pCfg, "IOAPIC", &fUseIoApic, false); + if (RT_FAILURE(rc)) + return PDMDEV_SET_ERROR(pDevIns, rc, + N_("Configuration error: Failed to query boolean value \"IOAPIC\"")); + + Log(("PCI: fUseIoApic=%RTbool fR0Enabled=%RTbool fRCEnabled=%RTbool\n", fUseIoApic, pDevIns->fR0Enabled, pDevIns->fRCEnabled)); + + /* + * Init data and register the PCI bus. + */ + pGlobals->uPciBiosIo = 0xc000; + pGlobals->uPciBiosMmio = 0xf0000000; + memset((void *)&pGlobals->Piix3.auPciLegacyIrqLevels, 0, sizeof(pGlobals->Piix3.auPciLegacyIrqLevels)); + pGlobals->fUseIoApic = fUseIoApic; + memset((void *)&pGlobals->auPciApicIrqLevels, 0, sizeof(pGlobals->auPciApicIrqLevels)); + + pGlobals->PciBus.fTypePiix3 = true; + pGlobals->PciBus.fTypeIch9 = false; + pGlobals->PciBus.fPureBridge = false; + pGlobals->PciBus.papBridgesR3 = (PPDMPCIDEV *)PDMDevHlpMMHeapAllocZ(pDevIns, + sizeof(PPDMPCIDEV) + * RT_ELEMENTS(pGlobals->PciBus.apDevices)); + AssertLogRelReturn(pGlobals->PciBus.papBridgesR3, VERR_NO_MEMORY); + + PDEVPCIBUS pBus = &pGlobals->PciBus; + PDMPCIBUSREGCC PciBusReg; + PciBusReg.u32Version = PDM_PCIBUSREGCC_VERSION; + PciBusReg.pfnRegisterR3 = devpciR3CommonRegisterDevice; + PciBusReg.pfnRegisterMsiR3 = NULL; + PciBusReg.pfnIORegionRegisterR3 = devpciR3CommonIORegionRegister; + PciBusReg.pfnInterceptConfigAccesses = devpciR3CommonInterceptConfigAccesses; + PciBusReg.pfnConfigRead = devpciR3CommonConfigRead; + PciBusReg.pfnConfigWrite = devpciR3CommonConfigWrite; + PciBusReg.pfnSetIrqR3 = pciSetIrq; + PciBusReg.u32EndVersion = PDM_PCIBUSREGCC_VERSION; + rc = PDMDevHlpPCIBusRegister(pDevIns, &PciBusReg, &pBusCC->pPciHlpR3, &pBus->iBus); + if (RT_FAILURE(rc)) + return PDMDEV_SET_ERROR(pDevIns, rc, N_("Failed to register ourselves as a PCI Bus")); + Assert(pBus->iBus == 0); + if (pBusCC->pPciHlpR3->u32Version != PDM_PCIHLPR3_VERSION) + return PDMDevHlpVMSetError(pDevIns, VERR_VERSION_MISMATCH, RT_SRC_POS, + N_("PCI helper version mismatch; got %#x expected %#x"), + pBusCC->pPciHlpR3->u32Version, PDM_PCIHLPR3_VERSION); + + /* Disable default device locking. */ + rc = PDMDevHlpSetDeviceCritSect(pDevIns, PDMDevHlpCritSectGetNop(pDevIns)); + AssertRCReturn(rc, rc); + + /* + * Fill in PCI configs and add them to the bus. + */ + PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0]; + AssertPtr(pPciDev); + + /* i440FX */ + PCIDevSetVendorId( pPciDev, 0x8086); /* Intel */ + PCIDevSetDeviceId( pPciDev, 0x1237); + PCIDevSetRevisionId(pPciDev, 0x02); + PCIDevSetClassSub( pPciDev, 0x00); /* host2pci */ + PCIDevSetClassBase( pPciDev, 0x06); /* PCI_bridge */ + PCIDevSetHeaderType(pPciDev, 0x00); + rc = PDMDevHlpPCIRegisterEx(pDevIns, pPciDev, 0 /*fFlags*/, 0 /*uPciDevNo*/, 0 /*uPciFunNo*/, "i440FX"); + AssertLogRelRCReturn(rc, rc); + + /* PIIX3 */ + PPDMPCIDEV pPiix3PciDev = pDevIns->apPciDevs[1]; + PCIDevSetVendorId( pPiix3PciDev, 0x8086); /* Intel */ + PCIDevSetDeviceId( pPiix3PciDev, 0x7000); /* 82371SB PIIX3 PCI-to-ISA bridge (Step A1) */ + PCIDevSetClassSub( pPiix3PciDev, 0x01); /* PCI_ISA */ + PCIDevSetClassBase( pPiix3PciDev, 0x06); /* PCI_bridge */ + PCIDevSetHeaderType(pPiix3PciDev, 0x80); /* PCI_multifunction, generic */ + rc = PDMDevHlpPCIRegisterEx(pDevIns, pPiix3PciDev, 0 /*fFlags*/, 1 /*uPciDevNo*/, 0 /*uPciFunNo*/, "PIIX3"); + AssertLogRelRCReturn(rc, rc); + pciR3Piix3Reset(pDevIns->apPciDevs[1]); + + pBus->iDevSearch = 16; + + /* + * Register I/O ports and save state. + */ + static const IOMIOPORTDESC s_aAddrDesc[] = { { "PCI address", "PCI address", NULL, NULL }, { NULL, NULL, NULL, NULL } }; + rc = PDMDevHlpIoPortCreateAndMap(pDevIns, 0x0cf8, 1, pciIOPortAddressWrite, pciIOPortAddressRead, "i440FX (PCI)", s_aAddrDesc, + &pGlobals->hIoPortAddress); + AssertLogRelRCReturn(rc, rc); + + static const IOMIOPORTDESC s_aDataDesc[] = { { "PCI data", "PCI data", NULL, NULL }, { NULL, NULL, NULL, NULL } }; + rc = PDMDevHlpIoPortCreateAndMap(pDevIns, 0x0cfc, 4, pciIOPortDataWrite, pciIOPortDataRead, "i440FX (PCI)", s_aDataDesc, + &pGlobals->hIoPortData); + AssertLogRelRCReturn(rc, rc); + + static const IOMIOPORTDESC s_aMagicDesc[] = { { "PCI magic", NULL, NULL, NULL }, { NULL, NULL, NULL, NULL } }; + rc = PDMDevHlpIoPortCreateAndMap(pDevIns, 0x0410, 1, pciR3IOPortMagicPCIWrite, pciR3IOPortMagicPCIRead, + "i440FX (Fake PCI BIOS trigger)", s_aMagicDesc, &pGlobals->hIoPortMagic); + AssertLogRelRCReturn(rc, rc); + + + rc = PDMDevHlpSSMRegisterEx(pDevIns, VBOX_PCI_SAVED_STATE_VERSION, sizeof(*pBus) + 16*128, "pgm", + NULL, NULL, NULL, + NULL, pciR3SaveExec, NULL, + NULL, pciR3LoadExec, NULL); + AssertLogRelRCReturn(rc, rc); + + PDMDevHlpDBGFInfoRegister(pDevIns, "pci", + "Display PCI bus status. Recognizes 'basic' or 'verbose' as arguments, defaults to 'basic'.", + devpciR3InfoPci); + PDMDevHlpDBGFInfoRegister(pDevIns, "pciirq", "Display PCI IRQ state. (no arguments)", devpciR3InfoPciIrq); + PDMDevHlpDBGFInfoRegister(pDevIns, "pirq", "Display PIRQ state. (no arguments)", devpciR3InfoPIRQ); + PDMDevHlpDBGFInfoRegister(pDevIns, "irqroute", "Display PCI IRQ routing. (no arguments)", pciR3IrqRouteInfo); + + return VINF_SUCCESS; +} + +#else /* !IN_RING3 */ + +/** + * @interface_method_impl{PDMDEVREGR0,pfnConstruct} + */ +static DECLCALLBACK(int) pciRZRootConstruct(PPDMDEVINS pDevIns) +{ + PDMDEV_CHECK_VERSIONS_RETURN(pDevIns); + PDEVPCIROOT pGlobals = PDMINS_2_DATA(pDevIns, PDEVPCIROOT); + PDEVPCIBUSCC pBusCC = PDMINS_2_DATA_CC(pDevIns, PDEVPCIBUSCC); + + /* Mirror the ring-3 device lock disabling: */ + int rc = PDMDevHlpSetDeviceCritSect(pDevIns, PDMDevHlpCritSectGetNop(pDevIns)); + AssertRCReturn(rc, rc); + + /* Set up the RZ PCI bus callbacks: */ + PDMPCIBUSREGCC PciBusReg; + PciBusReg.u32Version = PDM_PCIBUSREGCC_VERSION; + PciBusReg.iBus = pGlobals->PciBus.iBus; + PciBusReg.pfnSetIrq = pciSetIrq; + PciBusReg.u32EndVersion = PDM_PCIBUSREGCC_VERSION; + rc = PDMDevHlpPCIBusSetUpContext(pDevIns, &PciBusReg, &pBusCC->CTX_SUFF(pPciHlp)); + AssertRCReturn(rc, rc); + + /* Set up I/O port callbacks, except for the magic port: */ + rc = PDMDevHlpIoPortSetUpContext(pDevIns, pGlobals->hIoPortAddress, pciIOPortAddressWrite, pciIOPortAddressRead, NULL); + AssertLogRelRCReturn(rc, rc); + + rc = PDMDevHlpIoPortSetUpContext(pDevIns, pGlobals->hIoPortData, pciIOPortDataWrite, pciIOPortDataRead, NULL); + AssertLogRelRCReturn(rc, rc); + + return rc; +} + +#endif /* !IN_RING3 */ + +/** + * The device registration structure. + */ +const PDMDEVREG g_DevicePCI = +{ + /* .u32Version = */ PDM_DEVREG_VERSION, + /* .uReserved0 = */ 0, + /* .szName = */ "pci", + /* .fFlags = */ PDM_DEVREG_FLAGS_DEFAULT_BITS | PDM_DEVREG_FLAGS_RZ | PDM_DEVREG_FLAGS_NEW_STYLE, + /* .fClass = */ PDM_DEVREG_CLASS_BUS_PCI, + /* .cMaxInstances = */ 1, + /* .uSharedVersion = */ 42, + /* .cbInstanceShared = */ sizeof(DEVPCIROOT), + /* .cbInstanceCC = */ sizeof(CTX_SUFF(DEVPCIBUS)), + /* .cbInstanceRC = */ sizeof(DEVPCIBUSRC), + /* .cMaxPciDevices = */ 2, + /* .cMaxMsixVectors = */ 0, + /* .pszDescription = */ "i440FX PCI bridge and PIIX3 ISA bridge.", +#if defined(IN_RING3) + /* .pszRCMod = */ "VBoxDDRC.rc", + /* .pszR0Mod = */ "VBoxDDR0.r0", + /* .pfnConstruct = */ pciR3Construct, + /* .pfnDestruct = */ pciR3Destruct, + /* .pfnRelocate = */ NULL, + /* .pfnMemSetup = */ NULL, + /* .pfnPowerOn = */ NULL, + /* .pfnReset = */ pciR3Reset, + /* .pfnSuspend = */ NULL, + /* .pfnResume = */ NULL, + /* .pfnAttach = */ NULL, + /* .pfnDetach = */ NULL, + /* .pfnQueryInterface = */ NULL, + /* .pfnInitComplete = */ NULL, + /* .pfnPowerOff = */ NULL, + /* .pfnSoftReset = */ NULL, + /* .pfnReserved0 = */ NULL, + /* .pfnReserved1 = */ NULL, + /* .pfnReserved2 = */ NULL, + /* .pfnReserved3 = */ NULL, + /* .pfnReserved4 = */ NULL, + /* .pfnReserved5 = */ NULL, + /* .pfnReserved6 = */ NULL, + /* .pfnReserved7 = */ NULL, +#elif defined(IN_RING0) + /* .pfnEarlyConstruct = */ NULL, + /* .pfnConstruct = */ pciRZRootConstruct, + /* .pfnDestruct = */ NULL, + /* .pfnFinalDestruct = */ NULL, + /* .pfnRequest = */ NULL, + /* .pfnReserved0 = */ NULL, + /* .pfnReserved1 = */ NULL, + /* .pfnReserved2 = */ NULL, + /* .pfnReserved3 = */ NULL, + /* .pfnReserved4 = */ NULL, + /* .pfnReserved5 = */ NULL, + /* .pfnReserved6 = */ NULL, + /* .pfnReserved7 = */ NULL, +#elif defined(IN_RC) + /* .pfnConstruct = */ pciRZRootConstruct, + /* .pfnReserved0 = */ NULL, + /* .pfnReserved1 = */ NULL, + /* .pfnReserved2 = */ NULL, + /* .pfnReserved3 = */ NULL, + /* .pfnReserved4 = */ NULL, + /* .pfnReserved5 = */ NULL, + /* .pfnReserved6 = */ NULL, + /* .pfnReserved7 = */ NULL, +#else +# error "Not in IN_RING3, IN_RING0 or IN_RC!" +#endif + /* .u32VersionEnd = */ PDM_DEVREG_VERSION + +}; + + + +/* -=-=-=-=-=- The PCI bridge specific bits -=-=-=-=-=- */ + +/** + * @interface_method_impl{PDMPCIBUSREGR3,pfnSetIrqR3} + */ +static DECLCALLBACK(void) pcibridgeSetIrq(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, int iIrq, int iLevel, uint32_t uTagSrc) +{ + LogFlow(("pcibridgeSetIrq: %p %u %u %#x\n", pPciDev, iIrq, iLevel, uTagSrc)); + + /* + * The PCI-to-PCI bridge specification defines how the interrupt pins + * are routed from the secondary to the primary bus (see chapter 9). + * iIrq gives the interrupt pin the pci device asserted. + * We change iIrq here according to the spec and call the SetIrq function + * of our parent passing the device which asserted the interrupt instead of the device of the bridge. + */ + PDEVPCIBUS pBus; + uint8_t uDevFnBridge; + int iIrqPinBridge; + PPDMDEVINS pDevInsBus = devpcibridgeCommonSetIrqRootWalk(pDevIns, pPciDev, iIrq, &pBus, &uDevFnBridge, &iIrqPinBridge); + AssertReturnVoid(pDevInsBus); + AssertMsg(pBus->iBus == 0, ("This is not the host pci bus iBus=%d\n", pBus->iBus)); + Assert(pDevInsBus->pReg == &g_DevicePCI); + + pciSetIrqInternal(pDevInsBus, DEVPCIBUS_2_DEVPCIROOT(pBus), PDMINS_2_DATA_CC(pDevInsBus, PDEVPCIBUSCC), + uDevFnBridge, pPciDev, iIrqPinBridge, iLevel, uTagSrc); +} + +#ifdef IN_RING3 + +/** + * @callback_method_impl{FNPCIBRIDGECONFIGWRITE} + */ +static DECLCALLBACK(VBOXSTRICTRC) pcibridgeR3ConfigWrite(PPDMDEVINSR3 pDevIns, uint8_t iBus, uint8_t iDevice, + uint32_t u32Address, unsigned cb, uint32_t u32Value) +{ + LogFlowFunc(("pDevIns=%p iBus=%d iDevice=%d u32Address=%u cb=%d u32Value=%u\n", pDevIns, iBus, iDevice, u32Address, cb, u32Value)); + PDEVPCIBUS pBus = PDMINS_2_DATA(pDevIns, PDEVPCIBUS); + VBOXSTRICTRC rcStrict = VINF_SUCCESS; + + /* If the current bus is not the target bus search for the bus which contains the device. */ + if (iBus != pDevIns->apPciDevs[0]->abConfig[VBOX_PCI_SECONDARY_BUS]) + { + PPDMPCIDEV pBridgeDevice = pciR3FindBridge(pBus, iBus); + if (pBridgeDevice) + { + AssertPtr(pBridgeDevice->Int.s.pfnBridgeConfigWrite); + rcStrict = pBridgeDevice->Int.s.pfnBridgeConfigWrite(pBridgeDevice->Int.s.CTX_SUFF(pDevIns), iBus, iDevice, + u32Address, cb, u32Value); + } + } + else + { + /* This is the target bus, pass the write to the device. */ + PPDMPCIDEV pPciDev = pBus->apDevices[iDevice]; + if (pPciDev) + { + LogFunc(("%s: addr=%02x val=%08x len=%d\n", pPciDev->pszNameR3, u32Address, u32Value, cb)); + rcStrict = VINF_PDM_PCI_DO_DEFAULT; + if (pPciDev->Int.s.pfnConfigWrite) + rcStrict = pPciDev->Int.s.pfnConfigWrite(pPciDev->Int.s.CTX_SUFF(pDevIns), pPciDev, u32Address, cb, u32Value); + if (rcStrict == VINF_PDM_PCI_DO_DEFAULT) + rcStrict = devpciR3CommonConfigWriteWorker(pDevIns, PDMINS_2_DATA_CC(pDevIns, PDEVPCIBUSCC), + pPciDev, u32Address, cb, u32Value); + } + } + return rcStrict; +} + + +/** + * @callback_method_impl{FNPCIBRIDGECONFIGREAD} + */ +static DECLCALLBACK(VBOXSTRICTRC) pcibridgeR3ConfigRead(PPDMDEVINSR3 pDevIns, uint8_t iBus, uint8_t iDevice, + uint32_t u32Address, unsigned cb, uint32_t *pu32Value) +{ + LogFlowFunc(("pDevIns=%p iBus=%d iDevice=%d u32Address=%u cb=%d\n", pDevIns, iBus, iDevice, u32Address, cb)); + PDEVPCIBUS pBus = PDMINS_2_DATA(pDevIns, PDEVPCIBUS); + VBOXSTRICTRC rcStrict = VINF_SUCCESS; + + /* If the current bus is not the target bus search for the bus which contains the device. */ + if (iBus != pDevIns->apPciDevs[0]->abConfig[VBOX_PCI_SECONDARY_BUS]) + { + PPDMPCIDEV pBridgeDevice = pciR3FindBridge(pBus, iBus); + if (pBridgeDevice) + { + AssertPtr(pBridgeDevice->Int.s.pfnBridgeConfigRead); + rcStrict = pBridgeDevice->Int.s.pfnBridgeConfigRead(pBridgeDevice->Int.s.CTX_SUFF(pDevIns), + iBus, iDevice, u32Address, cb, pu32Value); + } + else + *pu32Value = UINT32_MAX; + } + else + { + /* This is the target bus, pass the read to the device. */ + PPDMPCIDEV pPciDev = pBus->apDevices[iDevice]; + if (pPciDev) + { + rcStrict = VINF_PDM_PCI_DO_DEFAULT; + if (pPciDev->Int.s.pfnConfigRead) + rcStrict = pPciDev->Int.s.pfnConfigRead(pPciDev->Int.s.CTX_SUFF(pDevIns), pPciDev, u32Address, cb, pu32Value); + if (rcStrict == VINF_PDM_PCI_DO_DEFAULT) + rcStrict = devpciR3CommonConfigReadWorker(pPciDev, u32Address, cb, pu32Value); + + LogFunc(("%s: u32Address=%02x u32Value=%08x cb=%d\n", pPciDev->pszNameR3, u32Address, *pu32Value, cb)); + } + else + *pu32Value = UINT32_MAX; + } + + return rcStrict; +} + + +/** + * @callback_method_impl{FNSSMDEVSAVEEXEC} + */ +static DECLCALLBACK(int) pcibridgeR3SaveExec(PPDMDEVINS pDevIns, PSSMHANDLE pSSM) +{ + return pciR3CommonSaveExec(pDevIns->pHlpR3, PDMINS_2_DATA(pDevIns, PDEVPCIBUS), pSSM); +} + + +/** + * @callback_method_impl{FNSSMDEVLOADEXEC} + */ +static DECLCALLBACK(int) pcibridgeR3LoadExec(PPDMDEVINS pDevIns, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass) +{ + PDEVPCIBUS pThis = PDMINS_2_DATA(pDevIns, PDEVPCIBUS); + if (uVersion > VBOX_PCI_SAVED_STATE_VERSION) + return VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION; + return pciR3CommonLoadExec(pDevIns, pThis, pSSM, uVersion, uPass); +} + + +/** + * @interface_method_impl{PDMDEVREG,pfnReset} + */ +static DECLCALLBACK(void) pcibridgeR3Reset(PPDMDEVINS pDevIns) +{ + /* Reset config space to default values. */ + PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0]; + pPciDev->abConfig[VBOX_PCI_PRIMARY_BUS] = 0; + pPciDev->abConfig[VBOX_PCI_SECONDARY_BUS] = 0; + pPciDev->abConfig[VBOX_PCI_SUBORDINATE_BUS] = 0; +} + + +/** + * @interface_method_impl{PDMDEVREG,pfnDestruct} + */ +static DECLCALLBACK(int) pcibridgeR3Destruct(PPDMDEVINS pDevIns) +{ + PDEVPCIBUS pBus = PDMINS_2_DATA(pDevIns, PDEVPCIBUS); + if (pBus->papBridgesR3) + { + PDMDevHlpMMHeapFree(pDevIns, pBus->papBridgesR3); + pBus->papBridgesR3 = NULL; + } + return VINF_SUCCESS; +} + + +/** + * @interface_method_impl{PDMDEVREG,pfnConstruct} + */ +static DECLCALLBACK(int) pcibridgeR3Construct(PPDMDEVINS pDevIns, int iInstance, PCFGMNODE pCfg) +{ + PDMDEV_CHECK_VERSIONS_RETURN(pDevIns); + RT_NOREF(iInstance, pCfg); + PDEVPCIBUS pBus = PDMINS_2_DATA(pDevIns, PDEVPCIBUS); + PDEVPCIBUSCC pBusCC = PDMINS_2_DATA_CC(pDevIns, PDEVPCIBUSCC); + + /* + * Validate and read configuration (none left). + */ + PDMDEV_VALIDATE_CONFIG_RETURN(pDevIns, "", ""); + Log(("PCI: fRCEnabled=%RTbool fR0Enabled=%RTbool\n", pDevIns->fRCEnabled, pDevIns->fR0Enabled)); + + /* + * Init data and register the PCI bus. + */ + pBus->fTypePiix3 = true; + pBus->fTypeIch9 = false; + pBus->fPureBridge = true; + pBus->papBridgesR3 = (PPDMPCIDEV *)PDMDevHlpMMHeapAllocZ(pDevIns, sizeof(PPDMPCIDEV) * RT_ELEMENTS(pBus->apDevices)); + AssertLogRelReturn(pBus->papBridgesR3, VERR_NO_MEMORY); + + PDMPCIBUSREGCC PciBusReg; + PciBusReg.u32Version = PDM_PCIBUSREGCC_VERSION; + PciBusReg.pfnRegisterR3 = devpcibridgeR3CommonRegisterDevice; + PciBusReg.pfnRegisterMsiR3 = NULL; + PciBusReg.pfnIORegionRegisterR3 = devpciR3CommonIORegionRegister; + PciBusReg.pfnInterceptConfigAccesses = devpciR3CommonInterceptConfigAccesses; + PciBusReg.pfnConfigWrite = devpciR3CommonConfigWrite; + PciBusReg.pfnConfigRead = devpciR3CommonConfigRead; + PciBusReg.pfnSetIrqR3 = pcibridgeSetIrq; + PciBusReg.u32EndVersion = PDM_PCIBUSREGCC_VERSION; + int rc = PDMDevHlpPCIBusRegister(pDevIns, &PciBusReg, &pBusCC->pPciHlpR3, &pBus->iBus); + if (RT_FAILURE(rc)) + return PDMDEV_SET_ERROR(pDevIns, rc, N_("Failed to register ourselves as a PCI Bus")); + Assert(pBus->iBus == (uint32_t)iInstance + 1); /* Can be removed when adding support for multiple bridge implementations. */ + if (pBusCC->pPciHlpR3->u32Version != PDM_PCIHLPR3_VERSION) + return PDMDevHlpVMSetError(pDevIns, VERR_VERSION_MISMATCH, RT_SRC_POS, + N_("PCI helper version mismatch; got %#x expected %#x"), + pBusCC->pPciHlpR3->u32Version, PDM_PCIHLPR3_VERSION); + + /* + * Fill in PCI configs and add them to the bus. + */ + PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0]; + PCIDevSetVendorId( pPciDev, 0x8086); /* Intel */ + PCIDevSetDeviceId( pPciDev, 0x2448); /* 82801 Mobile PCI bridge. */ + PCIDevSetRevisionId(pPciDev, 0xf2); + PCIDevSetClassSub( pPciDev, 0x04); /* pci2pci */ + PCIDevSetClassBase( pPciDev, 0x06); /* PCI_bridge */ + PCIDevSetClassProg( pPciDev, 0x01); /* Supports subtractive decoding. */ + PCIDevSetHeaderType(pPciDev, 0x01); /* Single function device which adheres to the PCI-to-PCI bridge spec. */ + PCIDevSetCommand( pPciDev, 0x0000); + PCIDevSetStatus( pPciDev, 0x0020); /* 66MHz Capable. */ + PCIDevSetInterruptLine(pPciDev, 0x00); /* This device does not assert interrupts. */ + + /* + * This device does not generate interrupts. Interrupt delivery from + * devices attached to the bus is unaffected. + */ + PCIDevSetInterruptPin(pPciDev, 0x00); + + /* + * Register this PCI bridge. The called function will take care on which bus we will get registered. + */ + rc = PDMDevHlpPCIRegisterEx(pDevIns, pPciDev, PDMPCIDEVREG_F_PCI_BRIDGE, PDMPCIDEVREG_DEV_NO_FIRST_UNUSED, + PDMPCIDEVREG_FUN_NO_FIRST_UNUSED, "pcibridge"); + if (RT_FAILURE(rc)) + return rc; + pPciDev->Int.s.pfnBridgeConfigRead = pcibridgeR3ConfigRead; + pPciDev->Int.s.pfnBridgeConfigWrite = pcibridgeR3ConfigWrite; + + pBus->iDevSearch = 0; + + /* + * Register SSM handlers. We use the same saved state version as for the host bridge + * to make changes easier. + */ + rc = PDMDevHlpSSMRegisterEx(pDevIns, VBOX_PCI_SAVED_STATE_VERSION, sizeof(*pBus) + 16*128, "pgm", + NULL, NULL, NULL, + NULL, pcibridgeR3SaveExec, NULL, + NULL, pcibridgeR3LoadExec, NULL); + if (RT_FAILURE(rc)) + return rc; + + return VINF_SUCCESS; +} + +#else /* !IN_RING3 */ + +/** + * @interface_method_impl{PDMDEVREGR0,pfnConstruct} + */ +static DECLCALLBACK(int) pcibridgeRZConstruct(PPDMDEVINS pDevIns) +{ + PDMDEV_CHECK_VERSIONS_RETURN(pDevIns); + PDEVPCIBUS pBus = PDMINS_2_DATA(pDevIns, PDEVPCIBUS); + PDEVPCIBUSCC pBusCC = PDMINS_2_DATA_CC(pDevIns, PDEVPCIBUSCC); + + PDMPCIBUSREGCC PciBusReg; + PciBusReg.u32Version = PDM_PCIBUSREGCC_VERSION; + PciBusReg.iBus = pBus->iBus; + PciBusReg.pfnSetIrq = pcibridgeSetIrq; + PciBusReg.u32EndVersion = PDM_PCIBUSREGCC_VERSION; + int rc = PDMDevHlpPCIBusSetUpContext(pDevIns, &PciBusReg, &pBusCC->CTX_SUFF(pPciHlp)); + AssertRC(rc); + + return rc; +} + +#endif /* !IN_RING3 */ + +/** + * The device registration structure + * for the PCI-to-PCI bridge. + */ +const PDMDEVREG g_DevicePCIBridge = +{ + /* .u32Version = */ PDM_DEVREG_VERSION, + /* .uReserved0 = */ 0, + /* .szName = */ "pcibridge", + /* .fFlags = */ PDM_DEVREG_FLAGS_DEFAULT_BITS | PDM_DEVREG_FLAGS_RZ | PDM_DEVREG_FLAGS_NEW_STYLE, + /* .fClass = */ PDM_DEVREG_CLASS_BUS_PCI, + /* .cMaxInstances = */ ~0U, + /* .uSharedVersion = */ 42, + /* .cbInstanceShared = */ sizeof(DEVPCIBUS), + /* .cbInstanceCC = */ sizeof(CTX_SUFF(DEVPCIBUS)), + /* .cbInstanceRC = */ 0, + /* .cMaxPciDevices = */ 1, + /* .cMaxMsixVectors = */ 0, + /* .pszDescription = */ "82801 Mobile PCI to PCI bridge", +#if defined(IN_RING3) + /* .pszRCMod = */ "VBoxDDRC.rc", + /* .pszR0Mod = */ "VBoxDDR0.r0", + /* .pfnConstruct = */ pcibridgeR3Construct, + /* .pfnDestruct = */ pcibridgeR3Destruct, + /* .pfnRelocate = */ NULL, + /* .pfnMemSetup = */ NULL, + /* .pfnPowerOn = */ NULL, + /* .pfnReset = */ pcibridgeR3Reset, + /* .pfnSuspend = */ NULL, + /* .pfnResume = */ NULL, + /* .pfnAttach = */ NULL, + /* .pfnDetach = */ NULL, + /* .pfnQueryInterface = */ NULL, + /* .pfnInitComplete = */ NULL, + /* .pfnPowerOff = */ NULL, + /* .pfnSoftReset = */ NULL, + /* .pfnReserved0 = */ NULL, + /* .pfnReserved1 = */ NULL, + /* .pfnReserved2 = */ NULL, + /* .pfnReserved3 = */ NULL, + /* .pfnReserved4 = */ NULL, + /* .pfnReserved5 = */ NULL, + /* .pfnReserved6 = */ NULL, + /* .pfnReserved7 = */ NULL, +#elif defined(IN_RING0) + /* .pfnEarlyConstruct = */ NULL, + /* .pfnConstruct = */ pcibridgeRZConstruct, + /* .pfnDestruct = */ NULL, + /* .pfnFinalDestruct = */ NULL, + /* .pfnRequest = */ NULL, + /* .pfnReserved0 = */ NULL, + /* .pfnReserved1 = */ NULL, + /* .pfnReserved2 = */ NULL, + /* .pfnReserved3 = */ NULL, + /* .pfnReserved4 = */ NULL, + /* .pfnReserved5 = */ NULL, + /* .pfnReserved6 = */ NULL, + /* .pfnReserved7 = */ NULL, +#elif defined(IN_RC) + /* .pfnConstruct = */ pcibridgeRZConstruct, + /* .pfnReserved0 = */ NULL, + /* .pfnReserved1 = */ NULL, + /* .pfnReserved2 = */ NULL, + /* .pfnReserved3 = */ NULL, + /* .pfnReserved4 = */ NULL, + /* .pfnReserved5 = */ NULL, + /* .pfnReserved6 = */ NULL, + /* .pfnReserved7 = */ NULL, +#else +# error "Not in IN_RING3, IN_RING0 or IN_RC!" +#endif + /* .u32VersionEnd = */ PDM_DEVREG_VERSION +}; + diff --git a/src/VBox/Devices/Bus/DevPciIch9.cpp b/src/VBox/Devices/Bus/DevPciIch9.cpp new file mode 100644 index 00000000..e12a63d2 --- /dev/null +++ b/src/VBox/Devices/Bus/DevPciIch9.cpp @@ -0,0 +1,4042 @@ +/* $Id: DevPciIch9.cpp $ */ +/** @file + * DevPCI - ICH9 southbridge PCI bus emulation device. + * + * @remarks We'll be slowly promoting the code in this file to common PCI bus + * code. Function without 'static' and using 'devpci' as prefix is + * also used by DevPCI.cpp and have a prototype in DevPciInternal.h. + * + * For the time being the DevPciMerge1.cpp.h file will remain separate, + * due to 5.1. We can merge it into this one later in the dev cycle. + * + * DO NOT use the PDMPciDev* or PCIDev* family of functions in this + * file except in the two callbacks for config space access (and the + * functions which are used exclusively by that code) and the two + * device constructors when setting up the config space for the + * bridges. Everything else need extremely careful review. Using + * them elsewhere (especially in the init code) causes weird failures + * with PCI passthrough, as it would only update the array of + * (emulated) config space, but not talk to the actual device (needs + * invoking the respective callback). + */ + +/* + * Copyright (C) 2010-2022 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_DEV_PCI +#define PDMPCIDEV_INCLUDE_PRIVATE /* Hack to get pdmpcidevint.h included at the right point. */ +#include <VBox/vmm/pdmpcidev.h> + +#include <VBox/AssertGuest.h> +#include <VBox/msi.h> +#ifdef VBOX_WITH_IOMMU_AMD +# include <VBox/iommu-amd.h> +#endif +#include <VBox/vmm/pdmdev.h> +#include <VBox/vmm/mm.h> +#include <iprt/asm.h> +#include <iprt/assert.h> +#include <iprt/string.h> +#ifdef IN_RING3 +# include <iprt/mem.h> +# include <iprt/uuid.h> +#endif + +#include "PciInline.h" +#include "VBoxDD.h" +#include "MsiCommon.h" +#include "DevPciInternal.h" +#ifdef VBOX_WITH_IOMMU_AMD +# include "../Bus/DevIommuAmd.h" +#endif + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ +/** + * PCI configuration space address. + */ +typedef struct +{ + uint8_t iBus; + uint8_t iDeviceFunc; + uint16_t iRegister; +} PciAddress; + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ +/** Saved state version of the ICH9 PCI bus device. */ +#define VBOX_ICH9PCI_SAVED_STATE_VERSION VBOX_ICH9PCI_SAVED_STATE_VERSION_4KB_CFG_SPACE +/** 4KB config space */ +#define VBOX_ICH9PCI_SAVED_STATE_VERSION_4KB_CFG_SPACE 4 +/** Adds I/O region types and sizes for dealing changes in resource regions. */ +#define VBOX_ICH9PCI_SAVED_STATE_VERSION_REGION_SIZES 3 +/** This appears to be the first state we need to care about. */ +#define VBOX_ICH9PCI_SAVED_STATE_VERSION_MSI 2 +/** This is apparently not supported or has a grossly incomplete state, juding + * from hints in the code. */ +#define VBOX_ICH9PCI_SAVED_STATE_VERSION_NOMSI 1 + +/** Invalid PCI region mapping address. */ +#define INVALID_PCI_ADDRESS UINT32_MAX + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +/* Prototypes */ +static void ich9pciSetIrqInternal(PPDMDEVINS pDevIns, PDEVPCIROOT pPciRoot, PDEVPCIBUSCC pBusCC, + uint8_t uDevFn, PPDMPCIDEV pPciDev, int iIrq, int iLevel, uint32_t uTagSrc); +#ifdef IN_RING3 +static int ich9pciFakePCIBIOS(PPDMDEVINS pDevIns); +DECLINLINE(PPDMPCIDEV) ich9pciFindBridge(PDEVPCIBUS pBus, uint8_t uBus); +static void ich9pciBiosInitAllDevicesOnBus(PPDMDEVINS pDevIns, PDEVPCIROOT pPciRoot, PDEVPCIBUS pBus); +static bool ich9pciBiosInitAllDevicesPrefetchableOnBus(PPDMDEVINS pDevIns, PDEVPCIROOT pPciRoot, PDEVPCIBUS pBus, bool fUse64Bit, bool fDryrun); +#endif + + +/** + * See 7.2.2. PCI Express Enhanced Configuration Mechanism for details of address + * mapping, we take n=6 approach + */ +DECLINLINE(void) ich9pciPhysToPciAddr(PDEVPCIROOT pPciRoot, RTGCPHYS off, PciAddress *pPciAddr) +{ + NOREF(pPciRoot); + pPciAddr->iBus = (off >> 20) & ((1<<6) - 1); + pPciAddr->iDeviceFunc = (off >> 12) & ((1<<(5+3)) - 1); // 5 bits - device, 3 bits - function + pPciAddr->iRegister = (off >> 0) & ((1<<(6+4+2)) - 1); // 6 bits - register, 4 bits - extended register, 2 bits -Byte Enable + RT_UNTRUSTED_VALIDATED_FENCE(); /* paranoia */ +} + +DECLINLINE(void) ich9pciStateToPciAddr(PDEVPCIROOT pPciRoot, RTGCPHYS addr, PciAddress *pPciAddr) +{ + pPciAddr->iBus = (pPciRoot->uConfigReg >> 16) & 0xff; + pPciAddr->iDeviceFunc = (pPciRoot->uConfigReg >> 8) & 0xff; + pPciAddr->iRegister = (pPciRoot->uConfigReg & 0xfc) | (addr & 3); + RT_UNTRUSTED_VALIDATED_FENCE(); /* paranoia */ +} + +static DECLCALLBACK(void) ich9pciSetIrq(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, int iIrq, int iLevel, uint32_t uTagSrc) +{ + LogFlowFunc(("invoked by %p/%d: iIrq=%d iLevel=%d uTagSrc=%#x\n", pDevIns, pDevIns->iInstance, iIrq, iLevel, uTagSrc)); + ich9pciSetIrqInternal(pDevIns, PDMINS_2_DATA(pDevIns, PDEVPCIROOT), PDMINS_2_DATA_CC(pDevIns, PDEVPCIBUSCC), + pPciDev->uDevFn, pPciDev, iIrq, iLevel, uTagSrc); +} + +/** + * Worker for ich9pcibridgeSetIrq and pcibridgeSetIrq that walks up to the root + * bridges and permutates iIrq accordingly. + * + * See ich9pciBiosInitAllDevicesOnBus for corresponding configuration code. + */ +DECLHIDDEN(PPDMDEVINS) devpcibridgeCommonSetIrqRootWalk(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, int iIrq, + PDEVPCIBUS *ppBus, uint8_t *puDevFnBridge, int *piIrqPinBridge) +{ + PDEVPCIBUSCC const pBridgeBusCC = PDMINS_2_DATA_CC(pDevIns, PDEVPCIBUSCC); /* For keep using our own pcihlp. */ + PPDMDEVINS const pBridgeDevIns = pDevIns; /* ditto */ + + PDEVPCIBUS pBus = PDMINS_2_DATA(pDevIns, PDEVPCIBUS); + PPDMDEVINS pDevInsBus; + PPDMPCIDEV pPciDevBus = pDevIns->apPciDevs[0]; + uint8_t uDevFnBridge = pPciDevBus->uDevFn; + int iIrqPinBridge = ((pPciDev->uDevFn >> 3) + iIrq) & 3; + uint64_t bmSeen[256/64] = { 0, 0, 0, 0 }; + AssertCompile(sizeof(pPciDevBus->Int.s.idxPdmBus) == 1); + ASMBitSet(bmSeen, pPciDevBus->Int.s.idxPdmBus); + + /* Walk the chain until we reach the host bus. */ + Assert(pBus->iBus != 0); + for (;;) + { + /* Get the parent. */ + pDevInsBus = pBridgeBusCC->CTX_SUFF(pPciHlp)->pfnGetBusByNo(pBridgeDevIns, pPciDevBus->Int.s.idxPdmBus); + AssertLogRelReturn(pDevInsBus, NULL); + + pBus = PDMINS_2_DATA(pDevInsBus, PDEVPCIBUS); + pPciDevBus = pDevInsBus->apPciDevs[0]; + if (pBus->iBus == 0) + { + *ppBus = pBus; + *puDevFnBridge = uDevFnBridge; + *piIrqPinBridge = iIrqPinBridge; + return pDevInsBus; + } + + uDevFnBridge = pPciDevBus->uDevFn; + iIrqPinBridge = ((uDevFnBridge >> 3) + iIrqPinBridge) & 3; + + /* Make sure that we cannot end up in a loop here: */ + AssertCompile(sizeof(pPciDevBus->Int.s.idxPdmBus) == 1); + AssertMsgReturn(ASMBitTestAndSet(bmSeen, pPciDevBus->Int.s.idxPdmBus), + ("idxPdmBus=%u\n", pPciDevBus->Int.s.idxPdmBus), + NULL); + } + +} + +static DECLCALLBACK(void) ich9pcibridgeSetIrq(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, int iIrq, int iLevel, uint32_t uTagSrc) +{ + /* + * The PCI-to-PCI bridge specification defines how the interrupt pins + * are routed from the secondary to the primary bus (see chapter 9). + * iIrq gives the interrupt pin the pci device asserted. + * We change iIrq here according to the spec and call the SetIrq function + * of our parent passing the device which asserted the interrupt instead of the device of the bridge. + * + * See ich9pciBiosInitAllDevicesOnBus for corresponding configuration code. + */ + PDEVPCIBUS pBus; + uint8_t uDevFnBridge; + int iIrqPinBridge; + PPDMDEVINS pDevInsBus = devpcibridgeCommonSetIrqRootWalk(pDevIns, pPciDev, iIrq, &pBus, &uDevFnBridge, &iIrqPinBridge); + AssertReturnVoid(pDevInsBus); + AssertMsg(pBus->iBus == 0, ("This is not the host pci bus iBus=%d\n", pBus->iBus)); + Assert(pDevInsBus->pReg == &g_DevicePciIch9); /* ASSUMPTION: Same style root bus. Need callback interface to mix types. */ + + /* + * For MSI/MSI-X enabled devices the iIrq doesn't denote the pin but rather a vector which is completely + * orthogonal to the pin based approach. The vector is not subject to the pin based routing with PCI bridges. + */ + int iIrqPinVector = iIrqPinBridge; + if ( MsiIsEnabled(pPciDev) + || MsixIsEnabled(pPciDev)) + iIrqPinVector = iIrq; + ich9pciSetIrqInternal(pDevInsBus, DEVPCIBUS_2_DEVPCIROOT(pBus), PDMINS_2_DATA_CC(pDevInsBus, PDEVPCIBUSCC), + uDevFnBridge, pPciDev, iIrqPinVector, iLevel, uTagSrc); +} + +#ifdef IN_RING3 + +/** + * @callback_method_impl{FNIOMIOPORTNEWOUT, + * Port I/O Handler for Fake PCI BIOS trigger OUT operations at 0410h.} + */ +static DECLCALLBACK(VBOXSTRICTRC) +ich9pciR3IOPortMagicPCIWrite(PPDMDEVINS pDevIns, void *pvUser, RTIOPORT offPort, uint32_t u32, unsigned cb) +{ + Assert(offPort == 0); RT_NOREF2(pvUser, offPort); + LogFlowFunc(("offPort=%#x u32=%#x cb=%d\n", offPort, u32, cb)); + if (cb == 4) + { + if (u32 == UINT32_C(19200509)) // Richard Adams + { + int rc = ich9pciFakePCIBIOS(pDevIns); + AssertRC(rc); + } + } + + return VINF_SUCCESS; +} + + +/** + * @callback_method_impl{FNIOMIOPORTNEWIN, + * Port I/O Handler for Fake PCI BIOS trigger IN operations at 0410h.} + */ +static DECLCALLBACK(VBOXSTRICTRC) +ich9pciR3IOPortMagicPCIRead(PPDMDEVINS pDevIns, void *pvUser, RTIOPORT offPort, uint32_t *pu32, unsigned cb) +{ + Assert(offPort == 0); RT_NOREF5(pDevIns, pvUser, offPort, pu32, cb); + LogFunc(("offPort=%#x cb=%d VERR_IOM_IOPORT_UNUSED\n", offPort, cb)); + return VERR_IOM_IOPORT_UNUSED; +} + +#endif /* IN_RING3 */ + +/** + * @callback_method_impl{FNIOMIOPORTNEWOUT, + * Port I/O Handler for PCI address OUT operations.} + * + * Emulates writes to Configuration Address Port at 0CF8h for Configuration + * Mechanism \#1. + */ +static DECLCALLBACK(VBOXSTRICTRC) +ich9pciIOPortAddressWrite(PPDMDEVINS pDevIns, void *pvUser, RTIOPORT offPort, uint32_t u32, unsigned cb) +{ + LogFlowFunc(("offPort=%#x u32=%#x cb=%d\n", offPort, u32, cb)); + Assert(offPort == 0); RT_NOREF2(offPort, pvUser); + if (cb == 4) + { + PDEVPCIROOT pThis = PDMINS_2_DATA(pDevIns, PDEVPCIROOT); + + /* + * bits [1:0] are hard-wired, read-only and must return zeroes + * when read. + */ + u32 &= ~3; + + PCI_LOCK_RET(pDevIns, VINF_IOM_R3_IOPORT_WRITE); + pThis->uConfigReg = u32; + PCI_UNLOCK(pDevIns); + } + + return VINF_SUCCESS; +} + + +/** + * @callback_method_impl{FNIOMIOPORTNEWIN, + * Port I/O Handler for PCI data IN operations.} + * + * Emulates reads from Configuration Address Port at 0CF8h for Configuration + * Mechanism \#1. + */ +static DECLCALLBACK(VBOXSTRICTRC) +ich9pciIOPortAddressRead(PPDMDEVINS pDevIns, void *pvUser, RTIOPORT offPort, uint32_t *pu32, unsigned cb) +{ + Assert(offPort == 0); RT_NOREF2(offPort, pvUser); + if (cb == 4) + { + PDEVPCIROOT pThis = PDMINS_2_DATA(pDevIns, PDEVPCIROOT); + + PCI_LOCK_RET(pDevIns, VINF_IOM_R3_IOPORT_READ); + *pu32 = pThis->uConfigReg; + PCI_UNLOCK(pDevIns); + + LogFlowFunc(("offPort=%#x cb=%d -> %#x\n", offPort, cb, *pu32)); + return VINF_SUCCESS; + } + + LogFunc(("offPort=%#x cb=%d VERR_IOM_IOPORT_UNUSED\n", offPort, cb)); + return VERR_IOM_IOPORT_UNUSED; +} + + +/** + * Perform configuration space write. + */ +static VBOXSTRICTRC ich9pciConfigWrite(PPDMDEVINS pDevIns, PDEVPCIROOT pPciRoot, PciAddress const *pPciAddr, + uint32_t u32Value, int cb, int rcReschedule) +{ + VBOXSTRICTRC rcStrict = VINF_SUCCESS; + + if (pPciAddr->iBus != 0) /* forward to subordinate bus */ + { + if (pPciRoot->PciBus.cBridges) + { +#ifdef IN_RING3 /** @todo do lookup in R0/RC too! r=klaus don't think that it can work, since the config space access callback only works in R3 */ + PPDMPCIDEV pBridgeDevice = ich9pciFindBridge(&pPciRoot->PciBus, pPciAddr->iBus); + if (pBridgeDevice) + { + AssertPtr(pBridgeDevice->Int.s.pfnBridgeConfigWrite); + rcStrict = pBridgeDevice->Int.s.pfnBridgeConfigWrite(pBridgeDevice->Int.s.CTX_SUFF(pDevIns), pPciAddr->iBus, + pPciAddr->iDeviceFunc, pPciAddr->iRegister, cb, u32Value); + } +#else + rcStrict = rcReschedule; +#endif + } + } + else /* forward to directly connected device */ + { + R3PTRTYPE(PDMPCIDEV *) pPciDev = pPciRoot->PciBus.apDevices[pPciAddr->iDeviceFunc]; + if (pPciDev) + { +#ifdef IN_RING3 + rcStrict = VINF_PDM_PCI_DO_DEFAULT; + if (pPciDev->Int.s.pfnConfigWrite) + rcStrict = pPciDev->Int.s.pfnConfigWrite(pPciDev->Int.s.CTX_SUFF(pDevIns), pPciDev, + pPciAddr->iRegister, cb, u32Value); + if (rcStrict == VINF_PDM_PCI_DO_DEFAULT) + rcStrict = devpciR3CommonConfigWriteWorker(pDevIns, PDMINS_2_DATA_CC(pDevIns, PDEVPCIBUSCC), + pPciDev, pPciAddr->iRegister, cb, u32Value); + RT_NOREF(rcReschedule); +#else + rcStrict = rcReschedule; + RT_NOREF(pDevIns, u32Value, cb); +#endif + } + } + + Log2Func(("%02x:%02x.%u reg %x(%u) %x %Rrc\n", pPciAddr->iBus, pPciAddr->iDeviceFunc >> 3, pPciAddr->iDeviceFunc & 0x7, + pPciAddr->iRegister, cb, u32Value, VBOXSTRICTRC_VAL(rcStrict))); + return rcStrict; +} + + +/** + * @callback_method_impl{FNIOMIOPORTNEWOUT, + * Port I/O Handler for PCI data OUT operations.} + * + * Emulates writes to Configuration Data Port at 0CFCh for Configuration + * Mechanism \#1. + */ +static DECLCALLBACK(VBOXSTRICTRC) +ich9pciIOPortDataWrite(PPDMDEVINS pDevIns, void *pvUser, RTIOPORT offPort, uint32_t u32, unsigned cb) +{ + PDEVPCIROOT pThis = PDMINS_2_DATA(pDevIns, PDEVPCIROOT); + LogFlowFunc(("offPort=%u u32=%#x cb=%d (config=%#10x)\n", offPort, u32, cb, pThis->uConfigReg)); + Assert(offPort < 4); NOREF(pvUser); + + VBOXSTRICTRC rcStrict = VINF_SUCCESS; + if (!(offPort % cb)) + { + PCI_LOCK_RET(pDevIns, VINF_IOM_R3_IOPORT_WRITE); + + if (pThis->uConfigReg & (1 << 31)) + { + + /* Decode target device from Configuration Address Port */ + PciAddress aPciAddr; + ich9pciStateToPciAddr(pThis, offPort, &aPciAddr); + + /* Perform configuration space write */ + rcStrict = ich9pciConfigWrite(pDevIns, pThis, &aPciAddr, u32, cb, VINF_IOM_R3_IOPORT_WRITE); + } + + PCI_UNLOCK(pDevIns); + } + else + AssertMsgFailed(("Unaligned write to offPort=%u u32=%#x cb=%d\n", offPort, u32, cb)); + + return rcStrict; +} + + +/** + * Perform configuration space read. + */ +static VBOXSTRICTRC ich9pciConfigRead(PDEVPCIROOT pPciRoot, PciAddress* pPciAddr, int cb, uint32_t *pu32Value, int rcReschedule) +{ + VBOXSTRICTRC rcStrict = VINF_SUCCESS; +#ifdef IN_RING3 + NOREF(rcReschedule); +#else + NOREF(cb); +#endif + + if (pPciAddr->iBus != 0) /* forward to subordinate bus */ + { + if (pPciRoot->PciBus.cBridges) + { +#ifdef IN_RING3 /** @todo do lookup in R0/RC too! r=klaus don't think that it can work, since the config space access callback only works in R3 */ + PPDMPCIDEV pBridgeDevice = ich9pciFindBridge(&pPciRoot->PciBus, pPciAddr->iBus); + if (pBridgeDevice) + { + AssertPtr(pBridgeDevice->Int.s.pfnBridgeConfigRead); + rcStrict = pBridgeDevice->Int.s.pfnBridgeConfigRead(pBridgeDevice->Int.s.CTX_SUFF(pDevIns), pPciAddr->iBus, + pPciAddr->iDeviceFunc, pPciAddr->iRegister, cb, pu32Value); + } + else + *pu32Value = UINT32_MAX; +#else + rcStrict = rcReschedule; +#endif + } + else + *pu32Value = 0xffffffff; + } + else /* forward to directly connected device */ + { + R3PTRTYPE(PDMPCIDEV *) pPciDev = pPciRoot->PciBus.apDevices[pPciAddr->iDeviceFunc]; + if (pPciDev) + { +#ifdef IN_RING3 + rcStrict = VINF_PDM_PCI_DO_DEFAULT; + if (pPciDev->Int.s.pfnConfigRead) + rcStrict = pPciDev->Int.s.pfnConfigRead(pPciDev->Int.s.CTX_SUFF(pDevIns), pPciDev, + pPciAddr->iRegister, cb, pu32Value); + if (rcStrict == VINF_PDM_PCI_DO_DEFAULT) + rcStrict = devpciR3CommonConfigReadWorker(pPciDev, pPciAddr->iRegister, cb, pu32Value); +#else + rcStrict = rcReschedule; +#endif + } + else + *pu32Value = UINT32_MAX; + } + + Log3Func(("%02x:%02x.%d reg %x(%d) gave %x %Rrc\n", pPciAddr->iBus, pPciAddr->iDeviceFunc >> 3, pPciAddr->iDeviceFunc & 0x7, + pPciAddr->iRegister, cb, *pu32Value, VBOXSTRICTRC_VAL(rcStrict) )); + return rcStrict; +} + + +/** + * @callback_method_impl{FNIOMIOPORTNEWIN, + * Port I/O Handler for PCI data IN operations.} + * + * Emulates reads from Configuration Data Port at 0CFCh for Configuration + * Mechanism \#1. + */ +static DECLCALLBACK(VBOXSTRICTRC) +ich9pciIOPortDataRead(PPDMDEVINS pDevIns, void *pvUser, RTIOPORT offPort, uint32_t *pu32, unsigned cb) +{ + NOREF(pvUser); + Assert(offPort < 4); + if (!(offPort % cb)) + { + PDEVPCIROOT pThis = PDMINS_2_DATA(pDevIns, PDEVPCIROOT); + *pu32 = 0xffffffff; + + PCI_LOCK_RET(pDevIns, VINF_IOM_R3_IOPORT_READ); + + /* Configuration space mapping enabled? */ + VBOXSTRICTRC rcStrict; + if (!(pThis->uConfigReg & (1 << 31))) + rcStrict = VINF_SUCCESS; + else + { + /* Decode target device and configuration space register */ + PciAddress aPciAddr; + ich9pciStateToPciAddr(pThis, offPort, &aPciAddr); + + /* Perform configuration space read */ + rcStrict = ich9pciConfigRead(pThis, &aPciAddr, cb, pu32, VINF_IOM_R3_IOPORT_READ); + } + + PCI_UNLOCK(pDevIns); + + LogFlowFunc(("offPort=%u cb=%#x (config=%#10x) -> %#x (%Rrc)\n", offPort, cb, *pu32, pThis->uConfigReg, VBOXSTRICTRC_VAL(rcStrict))); + return rcStrict; + } + AssertMsgFailed(("Unaligned read from offPort=%u cb=%d\n", offPort, cb)); + return VERR_IOM_IOPORT_UNUSED; +} + + +/* Compute mapping of PCI slot and IRQ number to APIC interrupt line */ +DECLINLINE(int) ich9pciSlot2ApicIrq(uint8_t uSlot, int irq_num) +{ + return (irq_num + uSlot) & 7; +} + +#ifdef IN_RING3 + +/* return the global irq number corresponding to a given device irq + pin. We could also use the bus number to have a more precise + mapping. This is the implementation note described in the PCI spec chapter 2.2.6 */ +DECLINLINE(int) ich9pciSlotGetPirq(uint8_t uBus, uint8_t uDevFn, uint8_t uIrqNum) +{ + NOREF(uBus); + int iSlotAddend = (uDevFn >> 3) - 1; + return (uIrqNum + iSlotAddend) & 3; +} + +/* irqs corresponding to PCI irqs A-D, must match pci_irq_list in pcibios.inc */ +/** @todo r=klaus inconsistent! ich9 doesn't implement PIRQ yet, so both needs to be addressed and tested thoroughly. */ +static const uint8_t aPciIrqs[4] = { 11, 10, 9, 5 }; + +#endif /* IN_RING3 */ + +/* Add one more level up request on APIC input line */ +DECLINLINE(void) ich9pciApicLevelUp(PDEVPCIROOT pPciRoot, int irq_num) +{ + ASMAtomicIncU32(&pPciRoot->auPciApicIrqLevels[irq_num]); +} + +/* Remove one level up request on APIC input line */ +DECLINLINE(void) ich9pciApicLevelDown(PDEVPCIROOT pPciRoot, int irq_num) +{ + ASMAtomicDecU32(&pPciRoot->auPciApicIrqLevels[irq_num]); +} + +static void ich9pciApicSetIrq(PPDMDEVINS pDevIns, PDEVPCIBUS pBus, PDEVPCIBUSCC pBusCC, + uint8_t uDevFn, PDMPCIDEV *pPciDev, int irq_num1, int iLevel, uint32_t uTagSrc, int iForcedIrq) +{ + /* This is only allowed to be called with a pointer to the root bus. */ + AssertMsg(pBus->iBus == 0, ("iBus=%u\n", pBus->iBus)); + uint16_t const uBusDevFn = PCIBDF_MAKE(pBus->iBus, uDevFn); + + if (iForcedIrq == -1) + { + int apic_irq, apic_level; + PDEVPCIROOT pPciRoot = DEVPCIBUS_2_DEVPCIROOT(pBus); + int irq_num = ich9pciSlot2ApicIrq(uDevFn >> 3, irq_num1); + + if ((iLevel & PDM_IRQ_LEVEL_HIGH) == PDM_IRQ_LEVEL_HIGH) + ich9pciApicLevelUp(pPciRoot, irq_num); + else if ((iLevel & PDM_IRQ_LEVEL_HIGH) == PDM_IRQ_LEVEL_LOW) + ich9pciApicLevelDown(pPciRoot, irq_num); + + apic_irq = irq_num + 0x10; + apic_level = pPciRoot->auPciApicIrqLevels[irq_num] != 0; + Log3Func(("%s: irq_num1=%d level=%d apic_irq=%d apic_level=%d irq_num1=%d uTagSrc=%#x\n", + R3STRING(pPciDev->pszNameR3), irq_num1, iLevel, apic_irq, apic_level, irq_num, uTagSrc)); + pBusCC->CTX_SUFF(pPciHlp)->pfnIoApicSetIrq(pDevIns, uBusDevFn, apic_irq, apic_level, uTagSrc); + + if ((iLevel & PDM_IRQ_LEVEL_FLIP_FLOP) == PDM_IRQ_LEVEL_FLIP_FLOP) + { + /* + * we raised it few lines above, as PDM_IRQ_LEVEL_FLIP_FLOP has + * PDM_IRQ_LEVEL_HIGH bit set + */ + ich9pciApicLevelDown(pPciRoot, irq_num); + pPciDev->Int.s.uIrqPinState = PDM_IRQ_LEVEL_LOW; + apic_level = pPciRoot->auPciApicIrqLevels[irq_num] != 0; + Log3Func(("%s: irq_num1=%d level=%d apic_irq=%d apic_level=%d irq_num1=%d uTagSrc=%#x (flop)\n", + R3STRING(pPciDev->pszNameR3), irq_num1, iLevel, apic_irq, apic_level, irq_num, uTagSrc)); + pBusCC->CTX_SUFF(pPciHlp)->pfnIoApicSetIrq(pDevIns, uBusDevFn, apic_irq, apic_level, uTagSrc); + } + } else { + Log3Func(("(forced) %s: irq_num1=%d level=%d acpi_irq=%d uTagSrc=%#x\n", + R3STRING(pPciDev->pszNameR3), irq_num1, iLevel, iForcedIrq, uTagSrc)); + pBusCC->CTX_SUFF(pPciHlp)->pfnIoApicSetIrq(pDevIns, uBusDevFn, iForcedIrq, iLevel, uTagSrc); + } +} + +static void ich9pciSetIrqInternal(PPDMDEVINS pDevIns, PDEVPCIROOT pPciRoot, PDEVPCIBUSCC pBusCC, + uint8_t uDevFn, PPDMPCIDEV pPciDev, int iIrq, int iLevel, uint32_t uTagSrc) +{ + /* If MSI or MSI-X is enabled, PCI INTx# signals are disabled regardless of the PCI command + * register interrupt bit state. + * PCI 3.0 (section 6.8) forbids MSI and MSI-X to be enabled at the same time and makes + * that undefined behavior. We check for MSI first, then MSI-X. + */ + if (MsiIsEnabled(pPciDev)) + { + Assert(!MsixIsEnabled(pPciDev)); /* Not allowed -- see note above. */ + LogFlowFunc(("PCI Dev %p : MSI\n", pPciDev)); + MsiNotify(pDevIns, pBusCC->CTX_SUFF(pPciHlp), pPciDev, iIrq, iLevel, uTagSrc); + return; + } + + if (MsixIsEnabled(pPciDev)) + { + LogFlowFunc(("PCI Dev %p : MSI-X\n", pPciDev)); + MsixNotify(pDevIns, pBusCC->CTX_SUFF(pPciHlp), pPciDev, iIrq, iLevel, uTagSrc); + return; + } + + PDEVPCIBUS pBus = &pPciRoot->PciBus; + /* safe, only needs to go to the config space array */ + const bool fIsAcpiDevice = PDMPciDevGetDeviceId(pPciDev) == 0x7113; + + LogFlowFunc(("PCI Dev %p : IRQ\n", pPciDev)); + /* Check if the state changed. */ + if (pPciDev->Int.s.uIrqPinState != iLevel) + { + pPciDev->Int.s.uIrqPinState = (iLevel & PDM_IRQ_LEVEL_HIGH); + + /** @todo r=klaus: implement PIRQ handling (if APIC isn't active). Needed for legacy OSes which don't use the APIC stuff. */ + + /* Send interrupt to I/O APIC only now. */ + if (fIsAcpiDevice) + /* + * ACPI needs special treatment since SCI is hardwired and + * should not be affected by PCI IRQ routing tables at the + * same time SCI IRQ is shared in PCI sense hence this + * kludge (i.e. we fetch the hardwired value from ACPIs + * PCI device configuration space). + */ + /* safe, only needs to go to the config space array */ + ich9pciApicSetIrq(pDevIns, pBus, pBusCC, uDevFn, pPciDev, -1, iLevel, uTagSrc, PDMPciDevGetInterruptLine(pPciDev)); + else + ich9pciApicSetIrq(pDevIns, pBus, pBusCC, uDevFn, pPciDev, iIrq, iLevel, uTagSrc, -1); + } +} + + +/** + * @callback_method_impl{FNIOMMMIONEWWRITE, + * Emulates writes to configuration space.} + */ +static DECLCALLBACK(VBOXSTRICTRC) ich9pciMcfgMMIOWrite(PPDMDEVINS pDevIns, void *pvUser, RTGCPHYS off, void const *pv, unsigned cb) +{ + PDEVPCIROOT pPciRoot = PDMINS_2_DATA(pDevIns, PDEVPCIROOT); + Log2Func(("%RGp LB %d\n", off, cb)); + NOREF(pvUser); + + /* Decode target device and configuration space register */ + PciAddress aDest; + ich9pciPhysToPciAddr(pPciRoot, off, &aDest); + + /* Get the value. */ + uint32_t u32; + switch (cb) + { + case 1: + u32 = *(uint8_t const *)pv; + break; + case 2: + u32 = *(uint16_t const *)pv; + break; + case 4: + u32 = *(uint32_t const *)pv; + break; + default: + ASSERT_GUEST_MSG_FAILED(("cb=%u off=%RGp\n", cb, off)); /** @todo how the heck should this work? Split it, right? */ + u32 = 0; + break; + } + + /* Perform configuration space write */ + PCI_LOCK_RET(pDevIns, VINF_IOM_R3_MMIO_WRITE); + VBOXSTRICTRC rcStrict = ich9pciConfigWrite(pDevIns, pPciRoot, &aDest, u32, cb, VINF_IOM_R3_MMIO_WRITE); + PCI_UNLOCK(pDevIns); + + return rcStrict; +} + + +/** + * @callback_method_impl{FNIOMMMIONEWWRITE, + * Emulates reads from configuration space.} + */ +static DECLCALLBACK(VBOXSTRICTRC) ich9pciMcfgMMIORead(PPDMDEVINS pDevIns, void *pvUser, RTGCPHYS off, void *pv, unsigned cb) +{ + PDEVPCIROOT pPciRoot = PDMINS_2_DATA(pDevIns, PDEVPCIROOT); + LogFlowFunc(("%RGp LB %u\n", off, cb)); + NOREF(pvUser); + + /* Decode target device and configuration space register */ + PciAddress aDest; + ich9pciPhysToPciAddr(pPciRoot, off, &aDest); + + /* Perform configuration space read */ + uint32_t u32Value = 0; + PCI_LOCK_RET(pDevIns, VINF_IOM_R3_MMIO_READ); + VBOXSTRICTRC rcStrict = ich9pciConfigRead(pPciRoot, &aDest, cb, &u32Value, VINF_IOM_R3_MMIO_READ); + PCI_UNLOCK(pDevIns); + + if (RT_SUCCESS(rcStrict)) /** @todo this is wrong, though it probably works fine due to double buffering... */ + { + switch (cb) + { + case 1: + *(uint8_t *)pv = (uint8_t)u32Value; + break; + case 2: + *(uint16_t *)pv = (uint16_t)u32Value; + break; + case 4: + *(uint32_t *)pv = u32Value; + break; + default: + ASSERT_GUEST_MSG_FAILED(("cb=%u off=%RGp\n", cb, off)); /** @todo how the heck should this work? Split it, right? */ + break; + } + } + + return VBOXSTRICTRC_TODO(rcStrict); +} + +#ifdef IN_RING3 + +DECLINLINE(PPDMPCIDEV) ich9pciFindBridge(PDEVPCIBUS pBus, uint8_t uBus) +{ + /* Search for a fitting bridge. */ + for (uint32_t iBridge = 0; iBridge < pBus->cBridges; iBridge++) + { + /* + * Examine secondary and subordinate bus number. + * If the target bus is in the range we pass the request on to the bridge. + */ + PPDMPCIDEV pBridge = pBus->papBridgesR3[iBridge]; + AssertMsg(pBridge && pciDevIsPci2PciBridge(pBridge), + ("Device is not a PCI bridge but on the list of PCI bridges\n")); + /* safe, only needs to go to the config space array */ + uint32_t uSecondary = PDMPciDevGetByte(pBridge, VBOX_PCI_SECONDARY_BUS); + /* safe, only needs to go to the config space array */ + uint32_t uSubordinate = PDMPciDevGetByte(pBridge, VBOX_PCI_SUBORDINATE_BUS); + Log3Func(("bus %p, bridge %d: %d in %d..%d\n", pBus, iBridge, uBus, uSecondary, uSubordinate)); + if (uBus >= uSecondary && uBus <= uSubordinate) + return pBridge; + } + + /* Nothing found. */ + return NULL; +} + +uint32_t devpciR3GetCfg(PPDMPCIDEV pPciDev, int32_t iRegister, int cb) +{ + uint32_t u32Value = UINT32_MAX; + VBOXSTRICTRC rcStrict = VINF_PDM_PCI_DO_DEFAULT; + if (pPciDev->Int.s.pfnConfigRead) + rcStrict = pPciDev->Int.s.pfnConfigRead(pPciDev->Int.s.CTX_SUFF(pDevIns), pPciDev, iRegister, cb, &u32Value); + if (rcStrict == VINF_PDM_PCI_DO_DEFAULT) + rcStrict = devpciR3CommonConfigReadWorker(pPciDev, iRegister, cb, &u32Value); + AssertRCSuccess(VBOXSTRICTRC_VAL(rcStrict)); + return u32Value; +} + +DECLINLINE(uint32_t) devpciGetRegionReg(int iRegion) +{ + return iRegion == VBOX_PCI_ROM_SLOT + ? VBOX_PCI_ROM_ADDRESS : (VBOX_PCI_BASE_ADDRESS_0 + iRegion * 4); +} + +/** + * Worker for devpciR3SetByte(), devpciR3SetWord() and devpciR3SetDWord(), also + * used during state restore. + */ +void devpciR3SetCfg(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, int32_t iRegister, uint32_t u32Value, int cb) +{ + Assert(cb <= 4 && cb != 3); + VBOXSTRICTRC rcStrict = VINF_PDM_PCI_DO_DEFAULT; + if (pPciDev->Int.s.pfnConfigWrite) + rcStrict = pPciDev->Int.s.pfnConfigWrite(pPciDev->Int.s.CTX_SUFF(pDevIns), pPciDev, iRegister, cb, u32Value); + if (rcStrict == VINF_PDM_PCI_DO_DEFAULT) + rcStrict = devpciR3CommonConfigWriteWorker(pDevIns, PDMINS_2_DATA_CC(pDevIns, PDEVPCIBUSCC), + pPciDev, iRegister, cb, u32Value); + AssertRCSuccess(VBOXSTRICTRC_VAL(rcStrict)); +} + + +/* -=-=-=-=-=- PCI Bus Interface Methods (PDMPCIBUSREG) -=-=-=-=-=- */ + +/** + * Search for a completely unused device entry (all 8 functions are unused). + * + * @returns VBox status code. + * @param pBus The bus to register with. + * @remarks Caller enters the PDM critical section. + */ +static uint8_t devpciR3CommonFindUnusedDeviceNo(PDEVPCIBUS pBus) +{ + for (uint8_t uPciDevNo = pBus->iDevSearch >> VBOX_PCI_DEVFN_DEV_SHIFT; uPciDevNo < VBOX_PCI_MAX_DEVICES; uPciDevNo++) + if ( !pBus->apDevices[VBOX_PCI_DEVFN_MAKE(uPciDevNo, 0)] + && !pBus->apDevices[VBOX_PCI_DEVFN_MAKE(uPciDevNo, 1)] + && !pBus->apDevices[VBOX_PCI_DEVFN_MAKE(uPciDevNo, 2)] + && !pBus->apDevices[VBOX_PCI_DEVFN_MAKE(uPciDevNo, 3)] + && !pBus->apDevices[VBOX_PCI_DEVFN_MAKE(uPciDevNo, 4)] + && !pBus->apDevices[VBOX_PCI_DEVFN_MAKE(uPciDevNo, 5)] + && !pBus->apDevices[VBOX_PCI_DEVFN_MAKE(uPciDevNo, 6)] + && !pBus->apDevices[VBOX_PCI_DEVFN_MAKE(uPciDevNo, 7)]) + return uPciDevNo; + return UINT8_MAX; +} + + + +/** + * Registers the device with the specified PCI bus. + * + * This is shared between the pci bus and pci bridge code. + * + * @returns VBox status code. + * @param pDevIns The PCI bus device instance. + * @param pBus The bus to register with. + * @param pPciDev The PCI device structure. + * @param fFlags Reserved for future use, PDMPCIDEVREG_F_MBZ. + * @param uPciDevNo PDMPCIDEVREG_DEV_NO_FIRST_UNUSED, or a specific + * device number (0-31). + * @param uPciFunNo PDMPCIDEVREG_FUN_NO_FIRST_UNUSED, or a specific + * function number (0-7). + * @param pszName Device name (static but not unique). + * + * @remarks Caller enters the PDM critical section. + */ +static int devpciR3CommonRegisterDeviceOnBus(PPDMDEVINS pDevIns, PDEVPCIBUS pBus, PPDMPCIDEV pPciDev, uint32_t fFlags, + uint8_t uPciDevNo, uint8_t uPciFunNo, const char *pszName) +{ + RT_NOREF(pDevIns); + + /* + * Validate input. + */ + AssertPtrReturn(pszName, VERR_INVALID_POINTER); + AssertPtrReturn(pPciDev, VERR_INVALID_POINTER); + AssertReturn(!(fFlags & ~PDMPCIDEVREG_F_VALID_MASK), VERR_INVALID_FLAGS); + AssertReturn(uPciDevNo < VBOX_PCI_MAX_DEVICES || uPciDevNo == PDMPCIDEVREG_DEV_NO_FIRST_UNUSED, VERR_INVALID_PARAMETER); + AssertReturn(uPciFunNo < VBOX_PCI_MAX_FUNCTIONS || uPciFunNo == PDMPCIDEVREG_FUN_NO_FIRST_UNUSED, VERR_INVALID_PARAMETER); + + /* + * Assign device & function numbers. + */ + + /* Work the optional assignment flag. */ + if (fFlags & PDMPCIDEVREG_F_NOT_MANDATORY_NO) + { + AssertLogRelMsgReturn(uPciDevNo < VBOX_PCI_MAX_DEVICES && uPciFunNo < VBOX_PCI_MAX_FUNCTIONS, + ("PDMPCIDEVREG_F_NOT_MANDATORY_NO not implemented for #Dev=%#x / #Fun=%#x\n", uPciDevNo, uPciFunNo), + VERR_NOT_IMPLEMENTED); + if (pBus->apDevices[VBOX_PCI_DEVFN_MAKE(uPciDevNo, uPciFunNo)]) + { + uPciDevNo = PDMPCIDEVREG_DEV_NO_FIRST_UNUSED; + uPciFunNo = PDMPCIDEVREG_FUN_NO_FIRST_UNUSED; + } + } + + if (uPciDevNo == PDMPCIDEVREG_DEV_NO_FIRST_UNUSED) + { + /* Just find the next unused device number and we're good. */ + uPciDevNo = devpciR3CommonFindUnusedDeviceNo(pBus); + AssertLogRelMsgReturn(uPciDevNo < VBOX_PCI_MAX_DEVICES, ("Couldn't find a free spot!\n"), VERR_PDM_TOO_PCI_MANY_DEVICES); + if (uPciFunNo == PDMPCIDEVREG_FUN_NO_FIRST_UNUSED) + uPciFunNo = 0; + } + else + { + /* + * Direct assignment of device number can be more complicated. + */ + PPDMPCIDEV pClash; + if (uPciFunNo != PDMPCIDEVREG_FUN_NO_FIRST_UNUSED) + { + /* In the case of a specified function, we only relocate an existing + device if it belongs to a different device instance. Reasoning is + that the device should figure out it's own function assignments. + Note! We could make this more flexible by relocating functions assigned + via PDMPCIDEVREG_FUN_NO_FIRST_UNUSED, but it can wait till it's needed. */ + pClash = pBus->apDevices[VBOX_PCI_DEVFN_MAKE(uPciDevNo, uPciFunNo)]; + if (!pClash) + { /* likely */ } + else if (pClash->Int.s.pDevInsR3 == pPciDev->Int.s.pDevInsR3) + AssertLogRelMsgFailedReturn(("PCI Configuration conflict at %u.%u: %s vs %s (same pDevIns)!\n", + uPciDevNo, uPciFunNo, pClash->pszNameR3, pszName), + VERR_PDM_TOO_PCI_MANY_DEVICES); + else if (!pClash->Int.s.fReassignableDevNo) + AssertLogRelMsgFailedReturn(("PCI Configuration conflict at %u.%u: %s vs %s (different pDevIns)!\n", + uPciDevNo, uPciFunNo, pClash->pszNameR3, pszName), + VERR_PDM_TOO_PCI_MANY_DEVICES); + } + else + { + /* First unused function slot. Again, we only relocate the whole + thing if all the device instance differs, because we otherwise + reason that a device should manage its own functions correctly. */ + unsigned cSameDevInses = 0; + for (uPciFunNo = 0, pClash = NULL; uPciFunNo < VBOX_PCI_MAX_FUNCTIONS; uPciFunNo++) + { + pClash = pBus->apDevices[VBOX_PCI_DEVFN_MAKE(uPciDevNo, uPciFunNo)]; + if (!pClash) + break; + cSameDevInses += pClash->Int.s.pDevInsR3 == pPciDev->Int.s.pDevInsR3; + } + if (!pClash) + Assert(uPciFunNo < VBOX_PCI_MAX_FUNCTIONS); + else + AssertLogRelMsgReturn(cSameDevInses == 0, + ("PCI Configuration conflict at %u.* appending %s (%u of %u pDevIns matches)!\n", + uPciDevNo, pszName, cSameDevInses, VBOX_PCI_MAX_FUNCTIONS), + VERR_PDM_TOO_PCI_MANY_DEVICES); + } + if (pClash) + { + /* + * Try relocate the existing device. + */ + /* Check that all functions can be moved. */ + for (uint8_t uMoveFun = 0; uMoveFun < VBOX_PCI_MAX_FUNCTIONS; uMoveFun++) + { + PPDMPCIDEV pMovePciDev = pBus->apDevices[VBOX_PCI_DEVFN_MAKE(uPciDevNo, uMoveFun)]; + AssertLogRelMsgReturn(!pMovePciDev || pMovePciDev->Int.s.fReassignableDevNo, + ("PCI Configuration conflict at %u.%u: %s vs %s\n", + uPciDevNo, uMoveFun, pMovePciDev->pszNameR3, pszName), + VERR_PDM_TOO_PCI_MANY_DEVICES); + } + + /* Find a free device number to move it to. */ + uint8_t uMoveToDevNo = devpciR3CommonFindUnusedDeviceNo(pBus); + Assert(uMoveToDevNo != uPciFunNo); + AssertLogRelMsgReturn(uMoveToDevNo < VBOX_PCI_MAX_DEVICES, + ("No space to relocate device at %u so '%s' can be placed there instead!\n", uPciFunNo, pszName), + VERR_PDM_TOO_PCI_MANY_DEVICES); + + /* Execute the move. */ + for (uint8_t uMoveFun = 0; uMoveFun < VBOX_PCI_MAX_FUNCTIONS; uMoveFun++) + { + PPDMPCIDEV pMovePciDev = pBus->apDevices[VBOX_PCI_DEVFN_MAKE(uPciDevNo, uMoveFun)]; + if (pMovePciDev) + { + Log(("PCI: Relocating '%s' from %u.%u to %u.%u.\n", pMovePciDev->pszNameR3, uPciDevNo, uMoveFun, uMoveToDevNo, uMoveFun)); + pBus->apDevices[VBOX_PCI_DEVFN_MAKE(uPciDevNo, uMoveFun)] = NULL; + pBus->apDevices[VBOX_PCI_DEVFN_MAKE(uMoveToDevNo, uMoveFun)] = pMovePciDev; + pMovePciDev->uDevFn = VBOX_PCI_DEVFN_MAKE(uMoveToDevNo, uMoveFun); + } + } + } + } + + /* + * Now, initialize the rest of the PCI device structure. + */ + Assert(!pBus->apDevices[VBOX_PCI_DEVFN_MAKE(uPciDevNo, uPciFunNo)]); + pBus->apDevices[VBOX_PCI_DEVFN_MAKE(uPciDevNo, uPciFunNo)] = pPciDev; + + pPciDev->uDevFn = VBOX_PCI_DEVFN_MAKE(uPciDevNo, uPciFunNo); + pPciDev->Int.s.pBusR3 = pBus; + Assert(pBus == PDMINS_2_DATA(pDevIns, PDEVPCIBUS)); + pPciDev->Int.s.pfnConfigRead = NULL; + pPciDev->Int.s.pfnConfigWrite = NULL; + pPciDev->Int.s.hMmioMsix = NIL_IOMMMIOHANDLE; + if (pBus->fTypePiix3 && pPciDev->cbConfig > 256) + pPciDev->cbConfig = 256; + + /* Remember and mark bridges. */ + if (fFlags & PDMPCIDEVREG_F_PCI_BRIDGE) + { + AssertLogRelMsgReturn(pBus->cBridges < RT_ELEMENTS(pBus->apDevices), + ("Number of bridges exceeds the number of possible devices on the bus\n"), + VERR_INTERNAL_ERROR_3); + pBus->papBridgesR3[pBus->cBridges++] = pPciDev; + pciDevSetPci2PciBridge(pPciDev); + } + + Log(("PCI: Registered device %d function %d (%#x) '%s'.\n", + uPciDevNo, uPciFunNo, UINT32_C(0x80000000) | (pPciDev->uDevFn << 8), pszName)); + + return VINF_SUCCESS; +} + + +/** + * @interface_method_impl{PDMPCIBUSREGR3,pfnRegisterR3} + */ +DECLCALLBACK(int) devpciR3CommonRegisterDevice(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, uint32_t fFlags, + uint8_t uPciDevNo, uint8_t uPciFunNo, const char *pszName) +{ + PDEVPCIBUS pBus = PDMINS_2_DATA(pDevIns, PDEVPCIBUS); + AssertCompileMemberOffset(DEVPCIROOT, PciBus, 0); + return devpciR3CommonRegisterDeviceOnBus(pDevIns, pBus, pPciDev, fFlags, uPciDevNo, uPciFunNo, pszName); +} + + +/** + * @interface_method_impl{PDMPCIBUSREGR3,pfnRegisterR3} + */ +DECLCALLBACK(int) devpcibridgeR3CommonRegisterDevice(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, uint32_t fFlags, + uint8_t uPciDevNo, uint8_t uPciFunNo, const char *pszName) +{ + PDEVPCIBUS pBus = PDMINS_2_DATA(pDevIns, PDEVPCIBUS); + return devpciR3CommonRegisterDeviceOnBus(pDevIns, pBus, pPciDev, fFlags, uPciDevNo, uPciFunNo, pszName); +} + + +static DECLCALLBACK(int) ich9pciRegisterMsi(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, PPDMMSIREG pMsiReg) +{ + //PDEVPCIBUS pBus = PDMINS_2_DATA(pDevIns, PDEVPCIBUS); + PDEVPCIBUSCC pBusCC = PDMINS_2_DATA_CC(pDevIns, PDEVPCIBUSCC); + + int rc = MsiR3Init(pPciDev, pMsiReg); + if (RT_SUCCESS(rc)) + rc = MsixR3Init(pBusCC->CTX_SUFF(pPciHlp), pPciDev, pMsiReg); + + return rc; +} + + +/** + * @interface_method_impl{PDMPCIBUSREGR3,pfnIORegionRegisterR3} + */ +DECLCALLBACK(int) devpciR3CommonIORegionRegister(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, uint32_t iRegion, + RTGCPHYS cbRegion, PCIADDRESSSPACE enmType, uint32_t fFlags, + uint64_t hHandle, PFNPCIIOREGIONMAP pfnMapUnmap) +{ + LogFunc(("%s: region #%u size %RGp type %x fFlags=%#x hHandle=%#RX64\n", + pPciDev->pszNameR3, iRegion, cbRegion, enmType, fFlags, hHandle)); + RT_NOREF(pDevIns); + + /* + * Validate. + */ + AssertMsgReturn( enmType == (PCI_ADDRESS_SPACE_MEM | PCI_ADDRESS_SPACE_BAR32) + || enmType == (PCI_ADDRESS_SPACE_MEM_PREFETCH | PCI_ADDRESS_SPACE_BAR32) + || enmType == (PCI_ADDRESS_SPACE_MEM | PCI_ADDRESS_SPACE_BAR64) + || enmType == (PCI_ADDRESS_SPACE_MEM_PREFETCH | PCI_ADDRESS_SPACE_BAR64) + || enmType == PCI_ADDRESS_SPACE_IO + , + ("Invalid enmType=%#x? Or was this a bitmask after all...\n", enmType), + VERR_INVALID_PARAMETER); + AssertMsgReturn((unsigned)iRegion < VBOX_PCI_NUM_REGIONS, + ("Invalid iRegion=%d VBOX_PCI_NUM_REGIONS=%d\n", iRegion, VBOX_PCI_NUM_REGIONS), + VERR_INVALID_PARAMETER); + int iLastSet = ASMBitLastSetU64(cbRegion); + AssertMsgReturn( iLastSet != 0 + && RT_BIT_64(iLastSet - 1) == cbRegion, + ("Invalid cbRegion=%RGp iLastSet=%#x (not a power of 2 or 0)\n", cbRegion, iLastSet), + VERR_INVALID_PARAMETER); + switch (fFlags & PDMPCIDEV_IORGN_F_HANDLE_MASK) + { + case PDMPCIDEV_IORGN_F_IOPORT_HANDLE: + case PDMPCIDEV_IORGN_F_MMIO_HANDLE: + case PDMPCIDEV_IORGN_F_MMIO2_HANDLE: + AssertReturn(hHandle != UINT64_MAX, VERR_INVALID_HANDLE); + break; + default: + AssertReturn(hHandle == UINT64_MAX, VERR_INVALID_HANDLE); + } + + /* Make sure that we haven't marked this region as continuation of 64-bit region. */ + AssertReturn(pPciDev->Int.s.aIORegions[iRegion].type != 0xff, VERR_NOT_AVAILABLE); + + /* + * Register the I/O region. + */ + PPCIIOREGION pRegion = &pPciDev->Int.s.aIORegions[iRegion]; + pRegion->addr = INVALID_PCI_ADDRESS; + pRegion->size = cbRegion; + pRegion->fFlags = fFlags; + pRegion->hHandle = hHandle; + pRegion->type = enmType; + pRegion->pfnMap = pfnMapUnmap; + + if ((enmType & PCI_ADDRESS_SPACE_BAR64) != 0) + { + /* VBOX_PCI_BASE_ADDRESS_5 and VBOX_PCI_ROM_ADDRESS are excluded. */ + AssertMsgReturn(iRegion < VBOX_PCI_NUM_REGIONS - 2, + ("Region %d cannot be 64-bit\n", iRegion), + VERR_INVALID_PARAMETER); + /* Mark next region as continuation of this one. */ + pPciDev->Int.s.aIORegions[iRegion + 1].type = 0xff; + } + + /* Set type in the PCI config space. */ + AssertCompile(PCI_ADDRESS_SPACE_MEM == 0); + AssertCompile(PCI_ADDRESS_SPACE_IO == 1); + AssertCompile(PCI_ADDRESS_SPACE_BAR64 == RT_BIT_32(2)); + AssertCompile(PCI_ADDRESS_SPACE_MEM_PREFETCH == RT_BIT_32(3)); + uint32_t u32Value = (uint32_t)enmType & (PCI_ADDRESS_SPACE_IO | PCI_ADDRESS_SPACE_BAR64 | PCI_ADDRESS_SPACE_MEM_PREFETCH); + /* safe, only needs to go to the config space array */ + PDMPciDevSetDWord(pPciDev, devpciGetRegionReg(iRegion), u32Value); + + return VINF_SUCCESS; +} + + +/** + * @interface_method_impl{PDMPCIBUSREGR3,pfnInterceptConfigAccesses} + */ +DECLCALLBACK(void) devpciR3CommonInterceptConfigAccesses(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, + PFNPCICONFIGREAD pfnRead, PFNPCICONFIGWRITE pfnWrite) +{ + NOREF(pDevIns); + + pPciDev->Int.s.pfnConfigRead = pfnRead; + pPciDev->Int.s.pfnConfigWrite = pfnWrite; +} + + +static int ich9pciR3CommonSaveExec(PCPDMDEVHLPR3 pHlp, PDEVPCIBUS pBus, PSSMHANDLE pSSM) +{ + /* + * Iterate thru all the devices. + */ + for (uint32_t uDevFn = 0; uDevFn < RT_ELEMENTS(pBus->apDevices); uDevFn++) + { + PPDMPCIDEV pDev = pBus->apDevices[uDevFn]; + if (pDev) + { + /* Device position */ + pHlp->pfnSSMPutU32(pSSM, uDevFn); + + /* PCI config registers */ + pHlp->pfnSSMPutU32(pSSM, sizeof(pDev->abConfig)); + pHlp->pfnSSMPutMem(pSSM, pDev->abConfig, sizeof(pDev->abConfig)); + + /* Device flags */ + pHlp->pfnSSMPutU32(pSSM, pDev->Int.s.fFlags); + + /* IRQ pin state */ + pHlp->pfnSSMPutS32(pSSM, pDev->Int.s.uIrqPinState); + + /* MSI info */ + pHlp->pfnSSMPutU8(pSSM, pDev->Int.s.u8MsiCapOffset); + pHlp->pfnSSMPutU8(pSSM, pDev->Int.s.u8MsiCapSize); + + /* MSI-X info */ + pHlp->pfnSSMPutU8(pSSM, pDev->Int.s.u8MsixCapOffset); + pHlp->pfnSSMPutU8(pSSM, pDev->Int.s.u8MsixCapSize); + + /* Save MSI-X page state */ + if (pDev->Int.s.u8MsixCapOffset != 0) + { + pHlp->pfnSSMPutU32(pSSM, pDev->Int.s.cbMsixRegion); + pHlp->pfnSSMPutMem(pSSM, pDev->abMsixState, pDev->Int.s.cbMsixRegion); + } + else + pHlp->pfnSSMPutU32(pSSM, 0); + + /* Save the type an size of all the regions. */ + for (uint32_t iRegion = 0; iRegion < VBOX_PCI_NUM_REGIONS; iRegion++) + { + pHlp->pfnSSMPutU8(pSSM, pDev->Int.s.aIORegions[iRegion].type); + pHlp->pfnSSMPutU64(pSSM, pDev->Int.s.aIORegions[iRegion].size); + } + } + } + return pHlp->pfnSSMPutU32(pSSM, UINT32_MAX); /* terminator */ +} + +static DECLCALLBACK(int) ich9pciR3SaveExec(PPDMDEVINS pDevIns, PSSMHANDLE pSSM) +{ + PDEVPCIROOT pThis = PDMINS_2_DATA(pDevIns, PDEVPCIROOT); + PCPDMDEVHLPR3 pHlp = pDevIns->pHlpR3; + + /* + * Bus state data. + */ + pHlp->pfnSSMPutU32(pSSM, pThis->uConfigReg); + + /* + * Save IRQ states. + */ + for (unsigned i = 0; i < RT_ELEMENTS(pThis->auPciApicIrqLevels); i++) + pHlp->pfnSSMPutU32(pSSM, pThis->auPciApicIrqLevels[i]); + + pHlp->pfnSSMPutU32(pSSM, UINT32_MAX); /* separator */ + + return ich9pciR3CommonSaveExec(pHlp, &pThis->PciBus, pSSM); +} + + +static DECLCALLBACK(int) ich9pcibridgeR3SaveExec(PPDMDEVINS pDevIns, PSSMHANDLE pSSM) +{ + PDEVPCIBUS pThis = PDMINS_2_DATA(pDevIns, PDEVPCIBUS); + PCPDMDEVHLPR3 pHlp = pDevIns->pHlpR3; + + return ich9pciR3CommonSaveExec(pHlp, pThis, pSSM); +} + + +/** + * @callback_method_impl{FNPCIBRIDGECONFIGWRITE} + */ +static DECLCALLBACK(VBOXSTRICTRC) ich9pcibridgeConfigWrite(PPDMDEVINSR3 pDevIns, uint8_t iBus, uint8_t iDevice, + uint32_t u32Address, unsigned cb, uint32_t u32Value) +{ + PDEVPCIBUS pBus = PDMINS_2_DATA(pDevIns, PDEVPCIBUS); + VBOXSTRICTRC rcStrict = VINF_SUCCESS; + LogFlowFunc(("pDevIns=%p iBus=%d iDevice=%d u32Address=%#x cb=%d u32Value=%#x\n", pDevIns, iBus, iDevice, u32Address, cb, u32Value)); + + /* If the current bus is not the target bus search for the bus which contains the device. */ + /* safe, only needs to go to the config space array */ + if (iBus != PDMPciDevGetByte(pDevIns->apPciDevs[0], VBOX_PCI_SECONDARY_BUS)) + { + PPDMPCIDEV pBridgeDevice = ich9pciFindBridge(pBus, iBus); + if (pBridgeDevice) + { + AssertPtr(pBridgeDevice->Int.s.pfnBridgeConfigWrite); + pBridgeDevice->Int.s.pfnBridgeConfigWrite(pBridgeDevice->Int.s.CTX_SUFF(pDevIns), iBus, iDevice, + u32Address, cb, u32Value); + } + } + else + { + /* This is the target bus, pass the write to the device. */ + PPDMPCIDEV pPciDev = pBus->apDevices[iDevice]; + if (pPciDev) + { + LogFunc(("%s: addr=%02x val=%08x len=%d\n", pPciDev->pszNameR3, u32Address, u32Value, cb)); + rcStrict = VINF_PDM_PCI_DO_DEFAULT; + if (pPciDev->Int.s.pfnConfigWrite) + rcStrict = pPciDev->Int.s.pfnConfigWrite(pPciDev->Int.s.CTX_SUFF(pDevIns), pPciDev, u32Address, cb, u32Value); + if (rcStrict == VINF_PDM_PCI_DO_DEFAULT) + rcStrict = devpciR3CommonConfigWriteWorker(pDevIns, PDMINS_2_DATA_CC(pDevIns, PDEVPCIBUSCC), + pPciDev, u32Address, cb, u32Value); + } + } + return rcStrict; +} + +/** + * @callback_method_impl{FNPCIBRIDGECONFIGREAD} + */ +static DECLCALLBACK(VBOXSTRICTRC) ich9pcibridgeConfigRead(PPDMDEVINSR3 pDevIns, uint8_t iBus, uint8_t iDevice, + uint32_t u32Address, unsigned cb, uint32_t *pu32Value) +{ + PDEVPCIBUS pBus = PDMINS_2_DATA(pDevIns, PDEVPCIBUS); + VBOXSTRICTRC rcStrict = VINF_SUCCESS; + LogFlowFunc(("pDevIns=%p iBus=%d iDevice=%d u32Address=%#x cb=%d\n", pDevIns, iBus, iDevice, u32Address, cb)); + + /* If the current bus is not the target bus search for the bus which contains the device. */ + /* safe, only needs to go to the config space array */ + if (iBus != PDMPciDevGetByte(pDevIns->apPciDevs[0], VBOX_PCI_SECONDARY_BUS)) + { + PPDMPCIDEV pBridgeDevice = ich9pciFindBridge(pBus, iBus); + if (pBridgeDevice) + { + AssertPtr(pBridgeDevice->Int.s.pfnBridgeConfigRead); + rcStrict = pBridgeDevice->Int.s.pfnBridgeConfigRead(pBridgeDevice->Int.s.CTX_SUFF(pDevIns), iBus, iDevice, + u32Address, cb, pu32Value); + } + else + *pu32Value = UINT32_MAX; + } + else + { + /* This is the target bus, pass the read to the device. */ + PPDMPCIDEV pPciDev = pBus->apDevices[iDevice]; + if (pPciDev) + { + rcStrict = VINF_PDM_PCI_DO_DEFAULT; + if (pPciDev->Int.s.pfnConfigRead) + rcStrict = pPciDev->Int.s.pfnConfigRead(pPciDev->Int.s.CTX_SUFF(pDevIns), pPciDev, u32Address, cb, pu32Value); + if (rcStrict == VINF_PDM_PCI_DO_DEFAULT) + rcStrict = devpciR3CommonConfigReadWorker(pPciDev, u32Address, cb, pu32Value); + LogFunc(("%s: u32Address=%02x *pu32Value=%#010x cb=%d\n", pPciDev->pszNameR3, u32Address, *pu32Value, cb)); + } + else + *pu32Value = UINT32_MAX; + } + + return rcStrict; +} + + + +/* -=-=-=-=-=- Saved State -=-=-=-=-=- */ + + +/** + * Common routine for restoring the config registers of a PCI device. + * + * @param pDevIns The device instance of the PC bus. + * @param pDev The PCI device. + * @param pbSrcConfig The configuration register values to be loaded. + */ +void devpciR3CommonRestoreConfig(PPDMDEVINS pDevIns, PPDMPCIDEV pDev, uint8_t const *pbSrcConfig) +{ + /* + * This table defines the fields for normal devices and bridge devices, and + * the order in which they need to be restored. + */ + static const struct PciField + { + uint8_t off; + uint8_t cb; + uint8_t fWritable; + uint8_t fBridge; + const char *pszName; + } s_aFields[] = + { + /* off,cb,fW,fB, pszName */ + { 0x00, 2, 0, 3, "VENDOR_ID" }, + { 0x02, 2, 0, 3, "DEVICE_ID" }, + { 0x06, 2, 1, 3, "STATUS" }, + { 0x08, 1, 0, 3, "REVISION_ID" }, + { 0x09, 1, 0, 3, "CLASS_PROG" }, + { 0x0a, 1, 0, 3, "CLASS_SUB" }, + { 0x0b, 1, 0, 3, "CLASS_BASE" }, + { 0x0c, 1, 1, 3, "CACHE_LINE_SIZE" }, + { 0x0d, 1, 1, 3, "LATENCY_TIMER" }, + { 0x0e, 1, 0, 3, "HEADER_TYPE" }, + { 0x0f, 1, 1, 3, "BIST" }, + { 0x10, 4, 1, 3, "BASE_ADDRESS_0" }, + { 0x14, 4, 1, 3, "BASE_ADDRESS_1" }, + { 0x18, 4, 1, 1, "BASE_ADDRESS_2" }, + { 0x18, 1, 1, 2, "PRIMARY_BUS" }, + { 0x19, 1, 1, 2, "SECONDARY_BUS" }, + { 0x1a, 1, 1, 2, "SUBORDINATE_BUS" }, + { 0x1b, 1, 1, 2, "SEC_LATENCY_TIMER" }, + { 0x1c, 4, 1, 1, "BASE_ADDRESS_3" }, + { 0x1c, 1, 1, 2, "IO_BASE" }, + { 0x1d, 1, 1, 2, "IO_LIMIT" }, + { 0x1e, 2, 1, 2, "SEC_STATUS" }, + { 0x20, 4, 1, 1, "BASE_ADDRESS_4" }, + { 0x20, 2, 1, 2, "MEMORY_BASE" }, + { 0x22, 2, 1, 2, "MEMORY_LIMIT" }, + { 0x24, 4, 1, 1, "BASE_ADDRESS_5" }, + { 0x24, 2, 1, 2, "PREF_MEMORY_BASE" }, + { 0x26, 2, 1, 2, "PREF_MEMORY_LIMIT" }, + { 0x28, 4, 0, 1, "CARDBUS_CIS" }, + { 0x28, 4, 1, 2, "PREF_BASE_UPPER32" }, + { 0x2c, 2, 0, 1, "SUBSYSTEM_VENDOR_ID" }, + { 0x2c, 4, 1, 2, "PREF_LIMIT_UPPER32" }, + { 0x2e, 2, 0, 1, "SUBSYSTEM_ID" }, + { 0x30, 4, 1, 1, "ROM_ADDRESS" }, + { 0x30, 2, 1, 2, "IO_BASE_UPPER16" }, + { 0x32, 2, 1, 2, "IO_LIMIT_UPPER16" }, + { 0x34, 4, 0, 3, "CAPABILITY_LIST" }, + { 0x38, 4, 1, 1, "RESERVED_38" }, + { 0x38, 4, 1, 2, "ROM_ADDRESS_BR" }, + { 0x3c, 1, 1, 3, "INTERRUPT_LINE" }, + { 0x3d, 1, 0, 3, "INTERRUPT_PIN" }, + { 0x3e, 1, 0, 1, "MIN_GNT" }, + { 0x3e, 2, 1, 2, "BRIDGE_CONTROL" }, + { 0x3f, 1, 0, 1, "MAX_LAT" }, + /* The COMMAND register must come last as it requires the *ADDRESS* + registers to be restored before we pretent to change it from 0 to + whatever value the guest assigned it. */ + { 0x04, 2, 1, 3, "COMMAND" }, + }; + +#ifdef RT_STRICT + /* Check that we've got full register coverage. */ + uint32_t bmDevice[0x40 / 32]; + uint32_t bmBridge[0x40 / 32]; + RT_ZERO(bmDevice); + RT_ZERO(bmBridge); + for (uint32_t i = 0; i < RT_ELEMENTS(s_aFields); i++) + { + uint8_t off = s_aFields[i].off; + uint8_t cb = s_aFields[i].cb; + uint8_t f = s_aFields[i].fBridge; + while (cb-- > 0) + { + if (f & 1) AssertMsg(!ASMBitTest(bmDevice, off), ("%#x\n", off)); + if (f & 2) AssertMsg(!ASMBitTest(bmBridge, off), ("%#x\n", off)); + if (f & 1) ASMBitSet(bmDevice, off); + if (f & 2) ASMBitSet(bmBridge, off); + off++; + } + } + for (uint32_t off = 0; off < 0x40; off++) + { + AssertMsg(ASMBitTest(bmDevice, off), ("%#x\n", off)); + AssertMsg(ASMBitTest(bmBridge, off), ("%#x\n", off)); + } +#endif + + /* + * Loop thru the fields covering the 64 bytes of standard registers. + */ + uint8_t const fBridge = pciDevIsPci2PciBridge(pDev) ? 2 : 1; + Assert(!pciDevIsPassthrough(pDev)); + uint8_t *pbDstConfig = &pDev->abConfig[0]; + + for (uint32_t i = 0; i < RT_ELEMENTS(s_aFields); i++) + if (s_aFields[i].fBridge & fBridge) + { + uint8_t const off = s_aFields[i].off; + uint8_t const cb = s_aFields[i].cb; + uint32_t u32Src; + uint32_t u32Dst; + switch (cb) + { + case 1: + u32Src = pbSrcConfig[off]; + u32Dst = pbDstConfig[off]; + break; + case 2: + u32Src = *(uint16_t const *)&pbSrcConfig[off]; + u32Dst = *(uint16_t const *)&pbDstConfig[off]; + break; + case 4: + u32Src = *(uint32_t const *)&pbSrcConfig[off]; + u32Dst = *(uint32_t const *)&pbDstConfig[off]; + break; + default: + AssertFailed(); + continue; + } + + if ( u32Src != u32Dst + || off == VBOX_PCI_COMMAND) + { + if (u32Src != u32Dst) + { + if (!s_aFields[i].fWritable) + LogRel(("PCI: %8s/%u: %2u-bit field %s: %x -> %x - !READ ONLY!\n", + pDev->pszNameR3, pDev->Int.s.CTX_SUFF(pDevIns)->iInstance, cb*8, s_aFields[i].pszName, u32Dst, u32Src)); + else + LogRel(("PCI: %8s/%u: %2u-bit field %s: %x -> %x\n", + pDev->pszNameR3, pDev->Int.s.CTX_SUFF(pDevIns)->iInstance, cb*8, s_aFields[i].pszName, u32Dst, u32Src)); + } + if (off == VBOX_PCI_COMMAND) + /* safe, only needs to go to the config space array */ + PDMPciDevSetCommand(pDev, 0); /* For remapping, see pciR3CommonLoadExec/ich9pciR3CommonLoadExec. */ + devpciR3SetCfg(pDevIns, pDev, off, u32Src, cb); + } + } + + /* + * The device dependent registers. + * + * We will not use ConfigWrite here as we have no clue about the size + * of the registers, so the device is responsible for correctly + * restoring functionality governed by these registers. + */ + for (uint32_t off = 0x40; off < sizeof(pDev->abConfig); off++) + if (pbDstConfig[off] != pbSrcConfig[off]) + { + LogRel(("PCI: %8s/%u: register %02x: %02x -> %02x\n", + pDev->pszNameR3, pDev->Int.s.CTX_SUFF(pDevIns)->iInstance, off, pbDstConfig[off], pbSrcConfig[off])); /** @todo make this Log() later. */ + pbDstConfig[off] = pbSrcConfig[off]; + } +} + + +/** + * @callback_method_impl{FNPCIIOREGIONOLDSETTER} + */ +static DECLCALLBACK(int) devpciR3CommonRestoreOldSetRegion(PPDMPCIDEV pPciDev, uint32_t iRegion, + RTGCPHYS cbRegion, PCIADDRESSSPACE enmType) +{ + AssertLogRelReturn(iRegion < RT_ELEMENTS(pPciDev->Int.s.aIORegions), VERR_INVALID_PARAMETER); + pPciDev->Int.s.aIORegions[iRegion].type = enmType; + pPciDev->Int.s.aIORegions[iRegion].size = cbRegion; + return VINF_SUCCESS; +} + + +/** + * @callback_method_impl{FNPCIIOREGIONSWAP} + */ +static DECLCALLBACK(int) devpciR3CommonRestoreSwapRegions(PPDMPCIDEV pPciDev, uint32_t iRegion, uint32_t iOtherRegion) +{ + AssertReturn(iRegion < iOtherRegion, VERR_INVALID_PARAMETER); + AssertLogRelReturn(iOtherRegion < RT_ELEMENTS(pPciDev->Int.s.aIORegions), VERR_INVALID_PARAMETER); + AssertReturn(pPciDev->Int.s.bPadding0 == (0xe0 | (uint8_t)iRegion), VERR_INVALID_PARAMETER); + + PCIIOREGION Tmp = pPciDev->Int.s.aIORegions[iRegion]; + pPciDev->Int.s.aIORegions[iRegion] = pPciDev->Int.s.aIORegions[iOtherRegion]; + pPciDev->Int.s.aIORegions[iOtherRegion] = Tmp; + + return VINF_SUCCESS; +} + + +/** + * Checks for and deals with changes in resource sizes and types. + * + * @returns VBox status code. + * @param pHlp The device instance helper callback table. + * @param pSSM The Saved state handle. + * @param pPciDev The PCI device in question. + * @param paIoRegions I/O regions with the size and type fields from + * the saved state. + * @param fNewState Set if this is a new state with I/O region sizes + * and types, clear if old one. + */ +int devpciR3CommonRestoreRegions(PCPDMDEVHLPR3 pHlp, PSSMHANDLE pSSM, PPDMPCIDEV pPciDev, PPCIIOREGION paIoRegions, bool fNewState) +{ + int rc; + if (fNewState) + { + for (uint32_t iRegion = 0; iRegion < VBOX_PCI_NUM_REGIONS; iRegion++) + { + if ( pPciDev->Int.s.aIORegions[iRegion].type != paIoRegions[iRegion].type + || pPciDev->Int.s.aIORegions[iRegion].size != paIoRegions[iRegion].size) + { + AssertLogRelMsgFailed(("PCI: %8s/%u: region #%u size/type load change: %#RGp/%#x -> %#RGp/%#x\n", + pPciDev->pszNameR3, pPciDev->Int.s.CTX_SUFF(pDevIns)->iInstance, iRegion, + pPciDev->Int.s.aIORegions[iRegion].size, pPciDev->Int.s.aIORegions[iRegion].type, + paIoRegions[iRegion].size, paIoRegions[iRegion].type)); + if (pPciDev->pfnRegionLoadChangeHookR3) + { + pPciDev->Int.s.bPadding0 = 0xe0 | (uint8_t)iRegion; + rc = pPciDev->pfnRegionLoadChangeHookR3(pPciDev->Int.s.pDevInsR3, pPciDev, iRegion, paIoRegions[iRegion].size, + (PCIADDRESSSPACE)paIoRegions[iRegion].type, NULL /*pfnOldSetter*/, + devpciR3CommonRestoreSwapRegions); + pPciDev->Int.s.bPadding0 = 0; + if (RT_FAILURE(rc)) + return pHlp->pfnSSMSetLoadError(pSSM, rc, RT_SRC_POS, + N_("Device %s/%u failed to respond to region #%u size/type changing from %#RGp/%#x to %#RGp/%#x: %Rrc"), + pPciDev->pszNameR3, pPciDev->Int.s.CTX_SUFF(pDevIns)->iInstance, iRegion, + pPciDev->Int.s.aIORegions[iRegion].size, pPciDev->Int.s.aIORegions[iRegion].type, + paIoRegions[iRegion].size, paIoRegions[iRegion].type, rc); + } + pPciDev->Int.s.aIORegions[iRegion].type = paIoRegions[iRegion].type; + pPciDev->Int.s.aIORegions[iRegion].size = paIoRegions[iRegion].size; + } + } + } + /* Old saved state without sizes and types. Do a special hook call to give + devices with changes a chance to adjust resources back to old values. */ + else if (pPciDev->pfnRegionLoadChangeHookR3) + { + rc = pPciDev->pfnRegionLoadChangeHookR3(pPciDev->Int.s.pDevInsR3, pPciDev, UINT32_MAX, RTGCPHYS_MAX, (PCIADDRESSSPACE)-1, + devpciR3CommonRestoreOldSetRegion, NULL); + if (RT_FAILURE(rc)) + return pHlp->pfnSSMSetLoadError(pSSM, rc, RT_SRC_POS, N_("Device %s/%u failed to resize its resources: %Rrc"), + pPciDev->pszNameR3, pPciDev->Int.s.CTX_SUFF(pDevIns)->iInstance, rc); + } + return VINF_SUCCESS; +} + + +/** + * Common worker for ich9pciR3LoadExec and ich9pcibridgeR3LoadExec. + * + * @returns VBox status code. + * @param pDevIns The device instance of the bus. + * @param pBus The bus which data is being loaded. + * @param pSSM The saved state handle. + * @param uVersion The data version. + * @param uPass The pass. + */ +static int ich9pciR3CommonLoadExec(PPDMDEVINS pDevIns, PDEVPCIBUS pBus, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass) +{ + PCPDMDEVHLPR3 pHlp = pDevIns->pHlpR3; + uint32_t u32; + int rc; + + Assert(uPass == SSM_PASS_FINAL); NOREF(uPass); + if ( uVersion < VBOX_ICH9PCI_SAVED_STATE_VERSION_MSI + || uVersion > VBOX_ICH9PCI_SAVED_STATE_VERSION) + return VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION; + + /* + * Iterate thru all the devices and write 0 to the COMMAND register so + * that all the memory is unmapped before we start restoring the saved + * mapping locations. + * + * The register value is restored afterwards so we can do proper + * LogRels in devpciR3CommonRestoreConfig. + */ + for (uint32_t uDevFn = 0; uDevFn < RT_ELEMENTS(pBus->apDevices); uDevFn++) + { + PPDMPCIDEV pDev = pBus->apDevices[uDevFn]; + if (pDev) + { + /* safe, only needs to go to the config space array */ + uint16_t u16 = PDMPciDevGetCommand(pDev); + devpciR3SetCfg(pDevIns, pDev, VBOX_PCI_COMMAND, 0 /*u32Value*/, 2 /*cb*/); + /* safe, only needs to go to the config space array */ + PDMPciDevSetCommand(pDev, u16); + /* safe, only needs to go to the config space array */ + Assert(PDMPciDevGetCommand(pDev) == u16); + } + } + + /* + * Iterate all the devices. + */ + for (uint32_t uDevFn = 0;; uDevFn++) + { + /* index / terminator */ + rc = pHlp->pfnSSMGetU32(pSSM, &u32); + if (RT_FAILURE(rc)) + break; + if (u32 == (uint32_t)~0) + break; + AssertLogRelMsgBreak(u32 < RT_ELEMENTS(pBus->apDevices) && u32 >= uDevFn, ("u32=%#x uDevFn=%#x\n", u32, uDevFn)); + + /* skip forward to the device checking that no new devices are present. */ + PPDMPCIDEV pDev; + for (; uDevFn < u32; uDevFn++) + { + pDev = pBus->apDevices[uDevFn]; + if (pDev) + { + /* safe, only needs to go to the config space array */ + LogRel(("PCI: New device in slot %#x, %s (vendor=%#06x device=%#06x)\n", uDevFn, pDev->pszNameR3, + PDMPciDevGetVendorId(pDev), PDMPciDevGetDeviceId(pDev))); + if (pHlp->pfnSSMHandleGetAfter(pSSM) != SSMAFTER_DEBUG_IT) + { + /* safe, only needs to go to the config space array */ + rc = pHlp->pfnSSMSetCfgError(pSSM, RT_SRC_POS, N_("New device in slot %#x, %s (vendor=%#06x device=%#06x)"), + uDevFn, pDev->pszNameR3, PDMPciDevGetVendorId(pDev), PDMPciDevGetDeviceId(pDev)); + break; + } + } + } + if (RT_FAILURE(rc)) + break; + pDev = pBus->apDevices[uDevFn]; + + /* get the data */ + union + { + PDMPCIDEV DevTmp; + uint8_t abPadding[RT_UOFFSETOF(PDMPCIDEV, abMsixState) + _32K + _16K]; /* the MSI-X state shouldn't be much more than 32KB. */ + } u; + RT_ZERO(u); + u.DevTmp.Int.s.fFlags = 0; + u.DevTmp.Int.s.u8MsiCapOffset = 0; + u.DevTmp.Int.s.u8MsiCapSize = 0; + u.DevTmp.Int.s.u8MsixCapOffset = 0; + u.DevTmp.Int.s.u8MsixCapSize = 0; + u.DevTmp.Int.s.uIrqPinState = ~0; /* Invalid value in case we have an older saved state to force a state change in pciSetIrq. */ + uint32_t cbConfig = 256; + if (uVersion >= VBOX_ICH9PCI_SAVED_STATE_VERSION_4KB_CFG_SPACE) + { + rc = pHlp->pfnSSMGetU32(pSSM, &cbConfig); + AssertRCReturn(rc, rc); + if (cbConfig != 256 && cbConfig != _4K) + return pHlp->pfnSSMSetLoadError(pSSM, VERR_SSM_DATA_UNIT_FORMAT_CHANGED, RT_SRC_POS, + "cbConfig=%#RX32, expected 0x100 or 0x1000", cbConfig); + } + pHlp->pfnSSMGetMem(pSSM, u.DevTmp.abConfig, cbConfig); + + pHlp->pfnSSMGetU32(pSSM, &u.DevTmp.Int.s.fFlags); + pHlp->pfnSSMGetS32(pSSM, &u.DevTmp.Int.s.uIrqPinState); + pHlp->pfnSSMGetU8(pSSM, &u.DevTmp.Int.s.u8MsiCapOffset); + pHlp->pfnSSMGetU8(pSSM, &u.DevTmp.Int.s.u8MsiCapSize); + pHlp->pfnSSMGetU8(pSSM, &u.DevTmp.Int.s.u8MsixCapOffset); + rc = pHlp->pfnSSMGetU8(pSSM, &u.DevTmp.Int.s.u8MsixCapSize); + AssertRCReturn(rc, rc); + + /* Load MSI-X page state */ + uint32_t cbMsixState = u.DevTmp.Int.s.u8MsixCapOffset != 0 ? _4K : 0; + if (uVersion >= VBOX_ICH9PCI_SAVED_STATE_VERSION_4KB_CFG_SPACE) + { + rc = pHlp->pfnSSMGetU32(pSSM, &cbMsixState); + AssertRCReturn(rc, rc); + } + if (cbMsixState) + { + if ( cbMsixState > (uint32_t)(pDev ? pDev->cbMsixState : _32K + _16K) + || cbMsixState > sizeof(u) - RT_UOFFSETOF(PDMPCIDEV, abMsixState)) + return pHlp->pfnSSMSetLoadError(pSSM, VERR_SSM_DATA_UNIT_FORMAT_CHANGED, RT_SRC_POS, + "cbMsixState=%#RX32, expected at most RT_MIN(%#x, %#zx)", + cbMsixState, (pDev ? pDev->cbMsixState : _32K + _16K), + sizeof(u) - RT_UOFFSETOF(PDMPCIDEV, abMsixState)); + rc = pHlp->pfnSSMGetMem(pSSM, u.DevTmp.abMsixState, cbMsixState); + AssertRCReturn(rc, rc); + } + + /* Load the region types and sizes. */ + if (uVersion >= VBOX_ICH9PCI_SAVED_STATE_VERSION_REGION_SIZES) + { + for (uint32_t iRegion = 0; iRegion < VBOX_PCI_NUM_REGIONS; iRegion++) + { + pHlp->pfnSSMGetU8(pSSM, &u.DevTmp.Int.s.aIORegions[iRegion].type); + rc = pHlp->pfnSSMGetU64(pSSM, &u.DevTmp.Int.s.aIORegions[iRegion].size); + AssertLogRelRCReturn(rc, rc); + } + } + + /* + * Check that it's still around. + */ + pDev = pBus->apDevices[uDevFn]; + if (!pDev) + { + /* safe, only needs to go to the config space array */ + LogRel(("PCI: Device in slot %#x has been removed! vendor=%#06x device=%#06x\n", uDevFn, + PDMPciDevGetVendorId(&u.DevTmp), PDMPciDevGetDeviceId(&u.DevTmp))); + if (pHlp->pfnSSMHandleGetAfter(pSSM) != SSMAFTER_DEBUG_IT) + { + /* safe, only needs to go to the config space array */ + rc = pHlp->pfnSSMSetCfgError(pSSM, RT_SRC_POS, N_("Device in slot %#x has been removed! vendor=%#06x device=%#06x"), + uDevFn, PDMPciDevGetVendorId(&u.DevTmp), PDMPciDevGetDeviceId(&u.DevTmp)); + break; + } + continue; + } + + /* match the vendor id assuming that this will never be changed. */ + /* safe, only needs to go to the config space array */ + if (PDMPciDevGetVendorId(&u.DevTmp) != PDMPciDevGetVendorId(pDev)) + { + /* safe, only needs to go to the config space array */ + rc = pHlp->pfnSSMSetCfgError(pSSM, RT_SRC_POS, N_("Device in slot %#x (%s) vendor id mismatch! saved=%.4Rhxs current=%.4Rhxs"), + uDevFn, pDev->pszNameR3, PDMPciDevGetVendorId(&u.DevTmp), PDMPciDevGetVendorId(pDev)); + break; + } + + /* commit the loaded device config. */ + rc = devpciR3CommonRestoreRegions(pHlp, pSSM, pDev, u.DevTmp.Int.s.aIORegions, + uVersion >= VBOX_ICH9PCI_SAVED_STATE_VERSION_REGION_SIZES); + if (RT_FAILURE(rc)) + break; + Assert(!pciDevIsPassthrough(pDev)); + devpciR3CommonRestoreConfig(pDevIns, pDev, &u.DevTmp.abConfig[0]); + + pDev->Int.s.uIrqPinState = u.DevTmp.Int.s.uIrqPinState; + pDev->Int.s.u8MsiCapOffset = u.DevTmp.Int.s.u8MsiCapOffset; + pDev->Int.s.u8MsiCapSize = u.DevTmp.Int.s.u8MsiCapSize; + pDev->Int.s.u8MsixCapOffset = u.DevTmp.Int.s.u8MsixCapOffset; + pDev->Int.s.u8MsixCapSize = u.DevTmp.Int.s.u8MsixCapSize; + if (u.DevTmp.Int.s.u8MsixCapSize != 0) /** @todo r=bird: Why isn't this checkin u8MsixCapOffset??? */ + { + Assert(pDev->Int.s.cbMsixRegion != 0); + Assert(pDev->cbMsixState != 0); + memcpy(pDev->abMsixState, u.DevTmp.abMsixState, RT_MIN(pDev->Int.s.cbMsixRegion, _32K + _16K)); + } + } + + return rc; +} + +static DECLCALLBACK(int) ich9pciR3LoadExec(PPDMDEVINS pDevIns, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass) +{ + PDEVPCIROOT pThis = PDMINS_2_DATA(pDevIns, PDEVPCIROOT); + PCPDMDEVHLPR3 pHlp = pDevIns->pHlpR3; + PDEVPCIBUS pBus = &pThis->PciBus; + uint32_t u32; + int rc; + + /* We ignore this version as there's no saved state with it anyway */ + if (uVersion <= VBOX_ICH9PCI_SAVED_STATE_VERSION_NOMSI) + return VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION; + if (uVersion > VBOX_ICH9PCI_SAVED_STATE_VERSION) + return VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION; + + /* + * Bus state data. + */ + pHlp->pfnSSMGetU32(pSSM, &pThis->uConfigReg); + + /* + * Load IRQ states. + */ + for (unsigned i = 0; i < RT_ELEMENTS(pThis->auPciApicIrqLevels); i++) + pHlp->pfnSSMGetU32V(pSSM, &pThis->auPciApicIrqLevels[i]); + + /* separator */ + rc = pHlp->pfnSSMGetU32(pSSM, &u32); + if (RT_FAILURE(rc)) + return rc; + if (u32 != (uint32_t)~0) + AssertMsgFailedReturn(("u32=%#x\n", u32), rc); + + return ich9pciR3CommonLoadExec(pDevIns, pBus, pSSM, uVersion, uPass); +} + +static DECLCALLBACK(int) ich9pcibridgeR3LoadExec(PPDMDEVINS pDevIns, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass) +{ + PDEVPCIBUS pThis = PDMINS_2_DATA(pDevIns, PDEVPCIBUS); + return ich9pciR3CommonLoadExec(pDevIns, pThis, pSSM, uVersion, uPass); +} + + + +/* -=-=-=-=-=- Fake PCI BIOS Init -=-=-=-=-=- */ + + +void devpciR3BiosInitSetRegionAddress(PPDMDEVINS pDevIns, PDEVPCIBUS pBus, PPDMPCIDEV pPciDev, int iRegion, uint64_t addr) +{ + NOREF(pBus); + uint32_t uReg = devpciGetRegionReg(iRegion); + + /* Read memory type first. */ + uint8_t uResourceType = devpciR3GetByte(pPciDev, uReg); + bool f64Bit = (uResourceType & ((uint8_t)(PCI_ADDRESS_SPACE_BAR64 | PCI_ADDRESS_SPACE_IO))) + == PCI_ADDRESS_SPACE_BAR64; + + Log(("Set region address: %02x:%02x.%d region %d address=%RX64%s\n", + pBus->iBus, pPciDev->uDevFn >> 3, pPciDev->uDevFn & 7, iRegion, addr, f64Bit ? " (64-bit)" : "")); + + /* Write address of the device. */ + devpciR3SetDWord(pDevIns, pPciDev, uReg, (uint32_t)addr); + if (f64Bit) + devpciR3SetDWord(pDevIns, pPciDev, uReg + 4, (uint32_t)(addr >> 32)); +} + + +static void ich9pciBiosInitBridge(PPDMDEVINS pDevIns, PDEVPCIROOT pPciRoot, PDEVPCIBUS pBus) +{ + PPDMPCIDEV pBridge = pDevIns->apPciDevs[0]; + Log(("BIOS init bridge: %02x:%02x.%d\n", pBus->iBus, pBridge->uDevFn >> 3, pBridge->uDevFn & 7)); + + /* + * The I/O range for the bridge must be aligned to a 4KB boundary. + * This does not change anything really as the access to the device is not going + * through the bridge but we want to be compliant to the spec. + */ + if ((pPciRoot->uPciBiosIo % _4K) != 0) + { + pPciRoot->uPciBiosIo = RT_ALIGN_32(pPciRoot->uPciBiosIo, _4K); + LogFunc(("Aligned I/O start address. New address %#x\n", pPciRoot->uPciBiosIo)); + } + devpciR3SetByte(pDevIns, pBridge, VBOX_PCI_IO_BASE, (pPciRoot->uPciBiosIo >> 8) & 0xf0); + + /* The MMIO range for the bridge must be aligned to a 1MB boundary. */ + if ((pPciRoot->uPciBiosMmio % _1M) != 0) + { + pPciRoot->uPciBiosMmio = RT_ALIGN_32(pPciRoot->uPciBiosMmio, _1M); + LogFunc(("Aligned MMIO start address. New address %#x\n", pPciRoot->uPciBiosMmio)); + } + devpciR3SetWord(pDevIns, pBridge, VBOX_PCI_MEMORY_BASE, (pPciRoot->uPciBiosMmio >> 16) & UINT32_C(0xffff0)); + + /* Save values to compare later to. */ + uint32_t u32IoAddressBase = pPciRoot->uPciBiosIo; + uint32_t u32MMIOAddressBase = pPciRoot->uPciBiosMmio; + + /* Init all devices behind the bridge (recursing to further buses). */ + ich9pciBiosInitAllDevicesOnBus(pDevIns, pPciRoot, pBus); + + /* + * Set I/O limit register. If there is no device with I/O space behind the + * bridge we set a lower value than in the base register. + */ + if (u32IoAddressBase != pPciRoot->uPciBiosIo) + { + /* Need again alignment to a 4KB boundary. */ + pPciRoot->uPciBiosIo = RT_ALIGN_32(pPciRoot->uPciBiosIo, _4K); + devpciR3SetByte(pDevIns, pBridge, VBOX_PCI_IO_LIMIT, ((pPciRoot->uPciBiosIo - 1) >> 8) & 0xf0); + } + else + { + devpciR3SetByte(pDevIns, pBridge, VBOX_PCI_IO_BASE, 0xf0); + devpciR3SetByte(pDevIns, pBridge, VBOX_PCI_IO_LIMIT, 0x00); + } + + /* Same with the MMIO limit register but with 1MB boundary here. */ + if (u32MMIOAddressBase != pPciRoot->uPciBiosMmio) + { + pPciRoot->uPciBiosMmio = RT_ALIGN_32(pPciRoot->uPciBiosMmio, _1M); + devpciR3SetWord(pDevIns, pBridge, VBOX_PCI_MEMORY_LIMIT, ((pPciRoot->uPciBiosMmio - 1) >> 16) & UINT32_C(0xfff0)); + } + else + { + devpciR3SetWord(pDevIns, pBridge, VBOX_PCI_MEMORY_BASE, 0xfff0); + devpciR3SetWord(pDevIns, pBridge, VBOX_PCI_MEMORY_LIMIT, 0x0000); + } + + /* + * Set the prefetch base and limit registers. We currently have no device with a prefetchable region + * which may be behind a bridge. That's why it is unconditionally disabled here atm by writing a higher value into + * the base register than in the limit register. + */ + devpciR3SetWord(pDevIns, pBridge, VBOX_PCI_PREF_MEMORY_BASE, 0xfff0); + devpciR3SetWord(pDevIns, pBridge, VBOX_PCI_PREF_MEMORY_LIMIT, 0x0000); + devpciR3SetDWord(pDevIns, pBridge, VBOX_PCI_PREF_BASE_UPPER32, 0x00000000); + devpciR3SetDWord(pDevIns, pBridge, VBOX_PCI_PREF_LIMIT_UPPER32, 0x00000000); +} + +static int ich9pciBiosInitDeviceGetRegions(PPDMPCIDEV pPciDev) +{ + uint8_t uHeaderType = devpciR3GetByte(pPciDev, VBOX_PCI_HEADER_TYPE) & 0x7f; + if (uHeaderType == 0x00) + /* Ignore ROM region here, which is included in VBOX_PCI_NUM_REGIONS. */ + return VBOX_PCI_NUM_REGIONS - 1; + else if (uHeaderType == 0x01) + /* PCI bridges have 2 BARs. */ + return 2; + else + { + AssertMsgFailed(("invalid header type %#x\n", uHeaderType)); + return 0; + } +} + +static void ich9pciBiosInitDeviceBARs(PPDMDEVINS pDevIns, PDEVPCIROOT pPciRoot, PDEVPCIBUS pBus, PPDMPCIDEV pPciDev) +{ + int cRegions = ich9pciBiosInitDeviceGetRegions(pPciDev); + bool fSuppressMem = false; + bool fActiveMemRegion = false; + bool fActiveIORegion = false; + for (int iRegion = 0; iRegion < cRegions; iRegion++) + { + uint32_t u32Address = devpciGetRegionReg(iRegion); + + /* Calculate size - we write all 1s into the BAR, and then evaluate which bits + are cleared. */ + uint8_t u8ResourceType = devpciR3GetByte(pPciDev, u32Address); + + bool fPrefetch = (u8ResourceType & ((uint8_t)(PCI_ADDRESS_SPACE_MEM_PREFETCH | PCI_ADDRESS_SPACE_IO))) + == PCI_ADDRESS_SPACE_MEM_PREFETCH; + bool f64Bit = (u8ResourceType & ((uint8_t)(PCI_ADDRESS_SPACE_BAR64 | PCI_ADDRESS_SPACE_IO))) + == PCI_ADDRESS_SPACE_BAR64; + bool fIsPio = ((u8ResourceType & PCI_ADDRESS_SPACE_IO) == PCI_ADDRESS_SPACE_IO); + uint64_t cbRegSize64 = 0; + + /* Hack: initialize prefetchable BARs for devices on the root bus + * early, but for all other prefetchable BARs do it after the + * non-prefetchable BARs are initialized on all buses. */ + if (fPrefetch && pBus->iBus != 0) + { + fSuppressMem = true; + if (f64Bit) + iRegion++; /* skip next region */ + continue; + } + + if (f64Bit) + { + devpciR3SetDWord(pDevIns, pPciDev, u32Address, UINT32_C(0xffffffff)); + devpciR3SetDWord(pDevIns, pPciDev, u32Address+4, UINT32_C(0xffffffff)); + cbRegSize64 = RT_MAKE_U64(devpciR3GetDWord(pPciDev, u32Address), + devpciR3GetDWord(pPciDev, u32Address+4)); + cbRegSize64 &= ~UINT64_C(0x0f); + cbRegSize64 = (~cbRegSize64) + 1; + + /* No 64-bit PIO regions possible. */ +#ifndef DEBUG_bird /* EFI triggers this for DevAHCI. */ + AssertMsg((u8ResourceType & PCI_ADDRESS_SPACE_IO) == 0, ("type=%#x rgn=%d\n", u8ResourceType, iRegion)); +#endif + } + else + { + uint32_t cbRegSize32; + devpciR3SetDWord(pDevIns, pPciDev, u32Address, UINT32_C(0xffffffff)); + cbRegSize32 = devpciR3GetDWord(pPciDev, u32Address); + + /* Clear resource information depending on resource type. */ + if (fIsPio) /* PIO */ + cbRegSize32 &= ~UINT32_C(0x01); + else /* MMIO */ + cbRegSize32 &= ~UINT32_C(0x0f); + + /* + * Invert all bits and add 1 to get size of the region. + * (From PCI implementation note) + */ + if (fIsPio && (cbRegSize32 & UINT32_C(0xffff0000)) == 0) + cbRegSize32 = (~(cbRegSize32 | UINT32_C(0xffff0000))) + 1; + else + cbRegSize32 = (~cbRegSize32) + 1; + + cbRegSize64 = cbRegSize32; + } + Log2Func(("Size of region %u for device %d on bus %d is %lld\n", iRegion, pPciDev->uDevFn, pBus->iBus, cbRegSize64)); + + if (cbRegSize64) + { + /* Try 32-bit base first. */ + uint32_t* paddr = fIsPio ? &pPciRoot->uPciBiosIo : &pPciRoot->uPciBiosMmio; + uint64_t uNew = *paddr; + /* Align starting address to region size. */ + uNew = (uNew + cbRegSize64 - 1) & ~(cbRegSize64 - 1); + if (fIsPio) + uNew &= UINT32_C(0xffff); + /* Unconditionally exclude I/O-APIC/HPET/ROM. Pessimistic, but better than causing a mess. */ + if ( !uNew + || (uNew <= UINT32_C(0xffffffff) && uNew + cbRegSize64 - 1 >= UINT32_C(0xfec00000)) + || uNew >= _4G) + { + /* Only prefetchable regions can be placed above 4GB, as the + * address decoder for non-prefetchable addresses in bridges + * is limited to 32 bits. */ + if (f64Bit && fPrefetch) + { + /* Map a 64-bit region above 4GB. */ + Assert(!fIsPio); + uNew = pPciRoot->uPciBiosMmio64; + /* Align starting address to region size. */ + uNew = (uNew + cbRegSize64 - 1) & ~(cbRegSize64 - 1); + LogFunc(("Start address of 64-bit MMIO region %u/%u is %#llx\n", iRegion, iRegion + 1, uNew)); + devpciR3BiosInitSetRegionAddress(pDevIns, pBus, pPciDev, iRegion, uNew); + fActiveMemRegion = true; + pPciRoot->uPciBiosMmio64 = uNew + cbRegSize64; + Log2Func(("New 64-bit address is %#llx\n", pPciRoot->uPciBiosMmio64)); + } + else + { + uint16_t uVendor = devpciR3GetWord(pPciDev, VBOX_PCI_VENDOR_ID); + uint16_t uDevice = devpciR3GetWord(pPciDev, VBOX_PCI_DEVICE_ID); + LogRel(("PCI: no space left for BAR%u of device %u/%u/%u (vendor=%#06x device=%#06x)\n", + iRegion, pBus->iBus, pPciDev->uDevFn >> 3, pPciDev->uDevFn & 7, uVendor, uDevice)); /** @todo make this a VM start failure later. */ + /* Undo the mapping mess caused by the size probing. */ + devpciR3SetDWord(pDevIns, pPciDev, u32Address, UINT32_C(0)); + } + } + else + { + LogFunc(("Start address of %s region %u is %#x\n", (fIsPio ? "I/O" : "MMIO"), iRegion, uNew)); + devpciR3BiosInitSetRegionAddress(pDevIns, pBus, pPciDev, iRegion, uNew); + if (fIsPio) + fActiveIORegion = true; + else + fActiveMemRegion = true; + *paddr = uNew + cbRegSize64; + Log2Func(("New 32-bit address is %#x\n", *paddr)); + } + + if (f64Bit) + iRegion++; /* skip next region */ + } + } + + /* Update the command word appropriately. */ + uint16_t uCmd = devpciR3GetWord(pPciDev, VBOX_PCI_COMMAND); + if (fActiveMemRegion && !fSuppressMem) + uCmd |= VBOX_PCI_COMMAND_MEMORY; /* Enable MMIO access. */ + if (fActiveIORegion) + uCmd |= VBOX_PCI_COMMAND_IO; /* Enable I/O space access. */ + devpciR3SetWord(pDevIns, pPciDev, VBOX_PCI_COMMAND, uCmd); +} + +static bool ich9pciBiosInitDevicePrefetchableBARs(PPDMDEVINS pDevIns, PDEVPCIROOT pPciRoot, PDEVPCIBUS pBus, PPDMPCIDEV pPciDev, bool fUse64Bit, bool fDryrun) +{ + int cRegions = ich9pciBiosInitDeviceGetRegions(pPciDev); + bool fActiveMemRegion = false; + for (int iRegion = 0; iRegion < cRegions; iRegion++) + { + uint32_t u32Address = devpciGetRegionReg(iRegion); + uint8_t u8ResourceType = devpciR3GetByte(pPciDev, u32Address); + bool fPrefetch = (u8ResourceType & ((uint8_t)(PCI_ADDRESS_SPACE_MEM_PREFETCH | PCI_ADDRESS_SPACE_IO))) + == PCI_ADDRESS_SPACE_MEM_PREFETCH; + bool f64Bit = (u8ResourceType & ((uint8_t)(PCI_ADDRESS_SPACE_BAR64 | PCI_ADDRESS_SPACE_IO))) + == PCI_ADDRESS_SPACE_BAR64; + uint64_t cbRegSize64 = 0; + + /* Everything besides prefetchable regions has been set up already. */ + if (!fPrefetch) + continue; + + if (f64Bit) + { + devpciR3SetDWord(pDevIns, pPciDev, u32Address, UINT32_C(0xffffffff)); + devpciR3SetDWord(pDevIns, pPciDev, u32Address+4, UINT32_C(0xffffffff)); + cbRegSize64 = RT_MAKE_U64(devpciR3GetDWord(pPciDev, u32Address), + devpciR3GetDWord(pPciDev, u32Address+4)); + cbRegSize64 &= ~UINT64_C(0x0f); + cbRegSize64 = (~cbRegSize64) + 1; + } + else + { + uint32_t cbRegSize32; + devpciR3SetDWord(pDevIns, pPciDev, u32Address, UINT32_C(0xffffffff)); + cbRegSize32 = devpciR3GetDWord(pPciDev, u32Address); + cbRegSize32 &= ~UINT32_C(0x0f); + cbRegSize32 = (~cbRegSize32) + 1; + + cbRegSize64 = cbRegSize32; + } + Log2Func(("Size of region %u for device %d on bus %d is %lld\n", iRegion, pPciDev->uDevFn, pBus->iBus, cbRegSize64)); + + if (cbRegSize64) + { + uint64_t uNew; + if (!fUse64Bit) + { + uNew = pPciRoot->uPciBiosMmio; + /* Align starting address to region size. */ + uNew = (uNew + cbRegSize64 - 1) & ~(cbRegSize64 - 1); + /* Unconditionally exclude I/O-APIC/HPET/ROM. Pessimistic, but better than causing a mess. Okay for BIOS. */ + if ( !uNew + || (uNew <= UINT32_C(0xffffffff) && uNew + cbRegSize64 - 1 >= UINT32_C(0xfec00000)) + || uNew >= _4G) + { + Log2Func(("region #%u: Rejecting address range: %#x LB %#RX64\n", iRegion, uNew, cbRegSize64)); + Assert(fDryrun); + return true; + } + if (!fDryrun) + { + LogFunc(("Start address of MMIO region %u is %#x\n", iRegion, uNew)); + devpciR3BiosInitSetRegionAddress(pDevIns, pBus, pPciDev, iRegion, uNew); + fActiveMemRegion = true; + } + pPciRoot->uPciBiosMmio = uNew + cbRegSize64; + } + else + { + /* Can't handle 32-bit BARs when forcing 64-bit allocs. */ + if (!f64Bit) + { + Assert(fDryrun); + return true; + } + uNew = pPciRoot->uPciBiosMmio64; + /* Align starting address to region size. */ + uNew = (uNew + cbRegSize64 - 1) & ~(cbRegSize64 - 1); + pPciRoot->uPciBiosMmio64 = uNew + cbRegSize64; + if (!fDryrun) + { + LogFunc(("Start address of 64-bit MMIO region %u/%u is %#llx\n", iRegion, iRegion + 1, uNew)); + devpciR3BiosInitSetRegionAddress(pDevIns, pBus, pPciDev, iRegion, uNew); + fActiveMemRegion = true; + } + } + + if (f64Bit) + iRegion++; /* skip next region */ + } + } + + if (!fDryrun) + { + /* Update the command word appropriately. */ + uint16_t uCmd = devpciR3GetWord(pPciDev, VBOX_PCI_COMMAND); + if (fActiveMemRegion) + uCmd |= VBOX_PCI_COMMAND_MEMORY; /* Enable MMIO access. */ + devpciR3SetWord(pDevIns, pPciDev, VBOX_PCI_COMMAND, uCmd); + } + else + Assert(!fActiveMemRegion); + + return false; +} + +static bool ich9pciBiosInitBridgePrefetchable(PPDMDEVINS pDevIns, PDEVPCIROOT pPciRoot, PDEVPCIBUS pBus, bool fUse64Bit, bool fDryrun) +{ + PPDMPCIDEV pBridge = pDevIns->apPciDevs[0]; + Log(("BIOS init bridge (prefetch): %02x:%02x.%d use64bit=%d dryrun=%d\n", pBus->iBus, pBridge->uDevFn >> 3, pBridge->uDevFn & 7, fUse64Bit, fDryrun)); + + pPciRoot->uPciBiosMmio = RT_ALIGN_32(pPciRoot->uPciBiosMmio, _1M); + pPciRoot->uPciBiosMmio64 = RT_ALIGN_64(pPciRoot->uPciBiosMmio64, _1M); + + /* Save values to compare later to. */ + uint32_t u32MMIOAddressBase = pPciRoot->uPciBiosMmio; + uint64_t u64MMIOAddressBase = pPciRoot->uPciBiosMmio64; + + /* Init all devices behind the bridge (recursing to further buses). */ + bool fRes = ich9pciBiosInitAllDevicesPrefetchableOnBus(pDevIns, pPciRoot, pBus, fUse64Bit, fDryrun); + if (fDryrun) + return fRes; + Assert(!fRes); + + /* Set prefetchable MMIO limit register with 1MB boundary. */ + uint64_t uBase, uLimit; + if (fUse64Bit) + { + if (u64MMIOAddressBase == pPciRoot->uPciBiosMmio64) + return false; + uBase = u64MMIOAddressBase; + uLimit = RT_ALIGN_64(pPciRoot->uPciBiosMmio64, _1M) - 1; + } + else + { + if (u32MMIOAddressBase == pPciRoot->uPciBiosMmio) + return false; + uBase = u32MMIOAddressBase; + uLimit = RT_ALIGN_32(pPciRoot->uPciBiosMmio, _1M) - 1; + } + devpciR3SetDWord(pDevIns, pBridge, VBOX_PCI_PREF_BASE_UPPER32, uBase >> 32); + devpciR3SetWord(pDevIns, pBridge, VBOX_PCI_PREF_MEMORY_BASE, (uint32_t)(uBase >> 16) & UINT32_C(0xfff0)); + devpciR3SetDWord(pDevIns, pBridge, VBOX_PCI_PREF_LIMIT_UPPER32, uLimit >> 32); + devpciR3SetWord(pDevIns, pBridge, VBOX_PCI_PREF_MEMORY_LIMIT, (uint32_t)(uLimit >> 16) & UINT32_C(0xfff0)); + + return false; +} + +static bool ich9pciBiosInitAllDevicesPrefetchableOnBus(PPDMDEVINS pDevIns, PDEVPCIROOT pPciRoot, PDEVPCIBUS pBus, + bool fUse64Bit, bool fDryrun) +{ + /* First pass: assign resources to all devices. */ + for (uint32_t uDevFn = 0; uDevFn < RT_ELEMENTS(pBus->apDevices); uDevFn++) + { + PPDMPCIDEV pPciDev = pBus->apDevices[uDevFn]; + + /* check if device is present */ + if (!pPciDev) + continue; + + Log(("BIOS init device (prefetch): %02x:%02x.%d\n", pBus->iBus, uDevFn >> 3, uDevFn & 7)); + + /* prefetchable memory mappings */ + bool fRes = ich9pciBiosInitDevicePrefetchableBARs(pDevIns, pPciRoot, pBus, pPciDev, fUse64Bit, fDryrun); + if (fRes) + { + Assert(fDryrun); + return fRes; + } + } + + /* Second pass: handle bridges recursively. */ + for (uint32_t iBridge = 0; iBridge < pBus->cBridges; iBridge++) + { + PPDMPCIDEV pBridge = pBus->papBridgesR3[iBridge]; + AssertMsg(pBridge && pciDevIsPci2PciBridge(pBridge), + ("Device is not a PCI bridge but on the list of PCI bridges\n")); + PDEVPCIBUS pChildBus = PDMINS_2_DATA(pBridge->Int.s.CTX_SUFF(pDevIns), PDEVPCIBUS); + + bool fRes = ich9pciBiosInitBridgePrefetchable(pDevIns, pPciRoot, pChildBus, fUse64Bit, fDryrun); + if (fRes) + { + Assert(fDryrun); + return fRes; + } + } + return false; +} + +static void ich9pciBiosInitAllDevicesOnBus(PPDMDEVINS pDevIns, PDEVPCIROOT pPciRoot, PDEVPCIBUS pBus) +{ + PDEVPCIBUSCC pBusCC = PDMINS_2_DATA_CC(pDevIns, PDEVPCIBUSCC); + + /* First pass: assign resources to all devices and map the interrupt. */ + for (uint32_t uDevFn = 0; uDevFn < RT_ELEMENTS(pBus->apDevices); uDevFn++) + { + PPDMPCIDEV pPciDev = pBus->apDevices[uDevFn]; + + /* check if device is present */ + if (!pPciDev) + continue; + + Log(("BIOS init device: %02x:%02x.%d\n", pBus->iBus, uDevFn >> 3, uDevFn & 7)); + + /* default memory mappings */ + ich9pciBiosInitDeviceBARs(pDevIns, pPciRoot, pBus, pPciDev); + uint16_t uDevClass = devpciR3GetWord(pPciDev, VBOX_PCI_CLASS_DEVICE); + switch (uDevClass) + { + case 0x0101: + /* IDE controller */ + devpciR3SetWord(pDevIns, pPciDev, 0x40, 0x8000); /* enable IDE0 */ + devpciR3SetWord(pDevIns, pPciDev, 0x42, 0x8000); /* enable IDE1 */ + break; + case 0x0300: + { + /* VGA controller */ + + /* NB: Default Bochs VGA LFB address is 0xE0000000. Old guest + * software may break if the framebuffer isn't mapped there. + */ + + /* + * Legacy VGA I/O ports are implicitly decoded by a VGA class device. But + * only the framebuffer (i.e., a memory region) is explicitly registered via + * ich9pciSetRegionAddress, so don't forget to enable I/O decoding. + */ + uint16_t uCmd = devpciR3GetWord(pPciDev, VBOX_PCI_COMMAND); + devpciR3SetWord(pDevIns, pPciDev, VBOX_PCI_COMMAND, uCmd | VBOX_PCI_COMMAND_IO); + break; + } +#ifdef VBOX_WITH_IOMMU_AMD + case 0x0806: + { + /* IOMMU. */ + uint16_t const uVendorId = devpciR3GetWord(pPciDev, VBOX_PCI_VENDOR_ID); + if (uVendorId == IOMMU_PCI_VENDOR_ID) + { + /* AMD. */ + devpciR3SetDWord(pDevIns, pPciDev, IOMMU_PCI_OFF_BASE_ADDR_REG_LO, + IOMMU_MMIO_BASE_ADDR | RT_BIT(0)); /* enable base address (bit 0). */ + } + break; + } +#endif + default: + break; + } + + /* map the interrupt */ + uint8_t iPin = devpciR3GetByte(pPciDev, VBOX_PCI_INTERRUPT_PIN); + if (iPin != 0) + { + iPin--; + + /* We need to go up to the host bus to see which irq pin this + device will use there. See logic in ich9pcibridgeSetIrq(). */ + uint32_t idxPdmParentBus; + PPDMDEVINS pDevInsParent = pDevIns; + while ((idxPdmParentBus = pDevInsParent->apPciDevs[0]->Int.s.idxPdmBus) != 0) + { + /* Get the pin the device would assert on the bridge. */ + iPin = ((pDevInsParent->apPciDevs[0]->uDevFn >> 3) + iPin) & 3; + + pDevInsParent = pBusCC->pPciHlpR3->pfnGetBusByNo(pDevIns, idxPdmParentBus); + AssertLogRelBreak(pDevInsParent); + } + + int iIrq = aPciIrqs[ich9pciSlotGetPirq(pBus->iBus, uDevFn, iPin)]; + Log(("Using pin %d and IRQ %d for device %02x:%02x.%d\n", + iPin, iIrq, pBus->iBus, uDevFn>>3, uDevFn&7)); + devpciR3SetByte(pDevIns, pPciDev, VBOX_PCI_INTERRUPT_LINE, iIrq); + } + } + + /* Second pass: handle bridges recursively. */ + for (uint32_t iBridge = 0; iBridge < pBus->cBridges; iBridge++) + { + PPDMPCIDEV pBridge = pBus->papBridgesR3[iBridge]; + AssertMsg(pBridge && pciDevIsPci2PciBridge(pBridge), + ("Device is not a PCI bridge but on the list of PCI bridges\n")); + PDEVPCIBUS pChildBus = PDMINS_2_DATA(pBridge->Int.s.CTX_SUFF(pDevIns), PDEVPCIBUS); + + ich9pciBiosInitBridge(pDevIns, pPciRoot, pChildBus); + } + + /* Third pass (only for bus 0): set up prefetchable BARs recursively. */ + if (pBus->iBus == 0) + { + for (uint32_t iBridge = 0; iBridge < pBus->cBridges; iBridge++) + { + PPDMPCIDEV pBridge = pBus->papBridgesR3[iBridge]; + AssertMsg(pBridge && pciDevIsPci2PciBridge(pBridge), + ("Device is not a PCI bridge but on the list of PCI bridges\n")); + PDEVPCIBUS pChildBus = PDMINS_2_DATA(pBridge->Int.s.CTX_SUFF(pDevIns), PDEVPCIBUS); + + Log(("BIOS init prefetchable memory behind bridge: %02x:%02x.%d\n", pChildBus->iBus, pBridge->uDevFn >> 3, pBridge->uDevFn & 7)); + /* Save values for the prefetchable dryruns. */ + uint32_t u32MMIOAddressBase = pPciRoot->uPciBiosMmio; + uint64_t u64MMIOAddressBase = pPciRoot->uPciBiosMmio64; + + bool fProbe = ich9pciBiosInitBridgePrefetchable(pDevIns, pPciRoot, pChildBus, false /* fUse64Bit */, true /* fDryrun */); + pPciRoot->uPciBiosMmio = u32MMIOAddressBase; + pPciRoot->uPciBiosMmio64 = u64MMIOAddressBase; + if (fProbe) + { + fProbe = ich9pciBiosInitBridgePrefetchable(pDevIns, pPciRoot, pChildBus, true /* fUse64Bit */, true /* fDryrun */); + pPciRoot->uPciBiosMmio = u32MMIOAddressBase; + pPciRoot->uPciBiosMmio64 = u64MMIOAddressBase; + if (fProbe) + LogRel(("PCI: unresolvable prefetchable memory behind bridge %02x:%02x.%d\n", pChildBus->iBus, pBridge->uDevFn >> 3, pBridge->uDevFn & 7)); + else + ich9pciBiosInitBridgePrefetchable(pDevIns, pPciRoot, pChildBus, true /* fUse64Bit */, false /* fDryrun */); + } + else + ich9pciBiosInitBridgePrefetchable(pDevIns, pPciRoot, pChildBus, false /* fUse64Bit */, false /* fDryrun */); + } + } +} + +/** + * Initializes bridges registers used for routing. + * + * We ASSUME PDM bus assignments are the same as the PCI bus assignments and + * will complain if we find any conflicts. This because it is just soo much + * simpler to have the two numbers match one another by default. + * + * @returns Max subordinate bus number. + * @param pDevIns The device instance of the bus. + * @param pPciRoot Global device instance data used to generate unique bus numbers. + * @param pBus The PCI bus to initialize. + * @param pbmUsed Pointer to a 32-bit bitmap tracking which device + * (ranges) has been used. + * @param uBusPrimary The primary bus number the bus is connected to. + */ +static uint8_t ich9pciBiosInitBridgeTopology(PPDMDEVINS pDevIns, PDEVPCIROOT pPciRoot, PDEVPCIBUS pBus, + uint32_t *pbmUsed, uint8_t uBusPrimary) +{ + PPDMPCIDEV pBridgeDev = pDevIns->apPciDevs[0]; + + /* Check if the PDM bus assignment makes sense. */ + AssertLogRelMsg(!(*pbmUsed & RT_BIT_32(pBus->iBus)), + ("PCIBIOS: Bad PCI bridge config! Conflict for bus %#x. Make sure to instantiate bridges for a sub-trees in sequence!\n", + pBus->iBus)); + *pbmUsed |= RT_BIT_32(pBus->iBus); + + /* Set only if we are not on the root bus, it has no primary bus attached. */ + if (pBus->iBus != 0) + { + devpciR3SetByte(pDevIns, pBridgeDev, VBOX_PCI_PRIMARY_BUS, uBusPrimary); + devpciR3SetByte(pDevIns, pBridgeDev, VBOX_PCI_SECONDARY_BUS, pBus->iBus); + /* Since the subordinate bus value can only be finalized once we + * finished recursing through everything behind the bridge, the only + * solution is temporarily configuring the subordinate to the maximum + * possible value. This makes sure that the config space accesses work + * (for our own sloppy emulation it apparently doesn't matter, but + * this is vital for real PCI bridges/devices in passthrough mode). */ + devpciR3SetByte(pDevIns, pBridgeDev, VBOX_PCI_SUBORDINATE_BUS, 0xff); + } + + uint8_t uMaxSubNum = pBus->iBus; + for (uint32_t iBridge = 0; iBridge < pBus->cBridges; iBridge++) + { + PPDMPCIDEV pBridge = pBus->papBridgesR3[iBridge]; + AssertMsg(pBridge && pciDevIsPci2PciBridge(pBridge), + ("Device is not a PCI bridge but on the list of PCI bridges\n")); + PDEVPCIBUS pChildBus = PDMINS_2_DATA(pBridge->Int.s.CTX_SUFF(pDevIns), PDEVPCIBUS); + uint8_t uMaxChildSubBus = ich9pciBiosInitBridgeTopology(pDevIns, pPciRoot, pChildBus, pbmUsed, pBus->iBus); + uMaxSubNum = RT_MAX(uMaxSubNum, uMaxChildSubBus); + } + + if (pBus->iBus != 0) + devpciR3SetByte(pDevIns, pBridgeDev, VBOX_PCI_SUBORDINATE_BUS, uMaxSubNum); + for (uint32_t i = pBus->iBus; i <= uMaxSubNum; i++) + *pbmUsed |= RT_BIT_32(i); + + /* Make sure that transactions are able to get through the bridge. Not + * strictly speaking necessary this early (before any device is set up), + * but on the other hand it can't hurt either. */ + if (pBus->iBus != 0) + devpciR3SetWord(pDevIns, pBridgeDev, VBOX_PCI_COMMAND, + VBOX_PCI_COMMAND_IO + | VBOX_PCI_COMMAND_MEMORY + | VBOX_PCI_COMMAND_MASTER); + + /* safe, only needs to go to the config space array */ + Log2Func(("for bus %p: primary=%d secondary=%d subordinate=%d\n", pBus, + PDMPciDevGetByte(pBridgeDev, VBOX_PCI_PRIMARY_BUS), + PDMPciDevGetByte(pBridgeDev, VBOX_PCI_SECONDARY_BUS), + PDMPciDevGetByte(pBridgeDev, VBOX_PCI_SUBORDINATE_BUS) )); + + return uMaxSubNum; +} + + +/** + * Worker for Fake PCI BIOS config + * + * @returns VBox status code. + * + * @param pDevIns ICH9 device instance. + */ +static int ich9pciFakePCIBIOS(PPDMDEVINS pDevIns) +{ + PDEVPCIROOT pPciRoot = PDMINS_2_DATA(pDevIns, PDEVPCIROOT); + uint32_t const cbBelow4GB = PDMDevHlpMMPhysGetRamSizeBelow4GB(pDevIns); + uint64_t const cbAbove4GB = PDMDevHlpMMPhysGetRamSizeAbove4GB(pDevIns); + + LogRel(("PCI: setting up topology, resources and interrupts\n")); + + /** @todo r=klaus this needs to do the same elcr magic as DevPCI.cpp, as the BIOS can't be trusted to do the right thing. Of course it's more difficult than with the old code, as there are bridges to be handled. The interrupt routing needs to be taken into account. Also I highly suspect that the chipset has 8 interrupt lines which we might be able to use for handling things on the root bus better (by treating them as devices on the mainboard). */ + + /* + * Set the start addresses. + */ + pPciRoot->uPciBiosBus = 0; + pPciRoot->uPciBiosIo = 0xd000; + pPciRoot->uPciBiosMmio = cbBelow4GB; + pPciRoot->uPciBiosMmio64 = cbAbove4GB + _4G; + + /* NB: Assume that if PCI controller MMIO range is enabled, it is below the beginning of the memory hole. */ + if (pPciRoot->u64PciConfigMMioAddress) + { + AssertRelease(pPciRoot->u64PciConfigMMioAddress >= cbBelow4GB); + pPciRoot->uPciBiosMmio = pPciRoot->u64PciConfigMMioAddress + pPciRoot->u64PciConfigMMioLength; + } + Log(("cbBelow4GB: %#RX32, uPciBiosMmio: %#RX64, cbAbove4GB: %#RX64, uPciBiosMmio64=%#RX64\n", + cbBelow4GB, pPciRoot->uPciBiosMmio, cbAbove4GB, pPciRoot->uPciBiosMmio64)); + + /* + * Assign bridge topology, for further routing to work. + */ + PDEVPCIBUS pBus = &pPciRoot->PciBus; + AssertLogRel(pBus->iBus == 0); + uint32_t bmUsed = 0; + ich9pciBiosInitBridgeTopology(pDevIns, pPciRoot, pBus, &bmUsed, 0); + + /* + * Init all devices on bus 0 (recursing to further buses). + */ + ich9pciBiosInitAllDevicesOnBus(pDevIns, pPciRoot, pBus); + + return VINF_SUCCESS; +} + + +/* -=-=-=-=-=- PCI Config Space -=-=-=-=-=- */ + + +/** + * Reads config space for a device, ignoring interceptors. + */ +DECLHIDDEN(VBOXSTRICTRC) devpciR3CommonConfigReadWorker(PPDMPCIDEV pPciDev, uint32_t uAddress, unsigned cb, uint32_t *pu32Value) +{ + uint32_t uValue; + if (uAddress + cb <= RT_MIN(pPciDev->cbConfig, sizeof(pPciDev->abConfig))) + { + switch (cb) + { + case 1: + /* safe, only needs to go to the config space array */ + uValue = PDMPciDevGetByte(pPciDev, uAddress); + break; + case 2: + /* safe, only needs to go to the config space array */ + uValue = PDMPciDevGetWord(pPciDev, uAddress); + break; + case 4: + /* safe, only needs to go to the config space array */ + uValue = PDMPciDevGetDWord(pPciDev, uAddress); + break; + default: + AssertFailed(); + uValue = 0; + break; + } + +#ifdef LOG_ENABLED + if ( pciDevIsMsiCapable(pPciDev) + && uAddress - (uint32_t)pPciDev->Int.s.u8MsiCapOffset < (uint32_t)pPciDev->Int.s.u8MsiCapSize ) + Log2Func(("MSI CAP: %#x LB %u -> %#x\n", uAddress - (uint32_t)pPciDev->Int.s.u8MsiCapOffset, cb, uValue)); + else if ( pciDevIsMsixCapable(pPciDev) + && uAddress - (uint32_t)pPciDev->Int.s.u8MsixCapOffset < (uint32_t)pPciDev->Int.s.u8MsixCapSize) + Log2Func(("MSI-X CAP: %#x LB %u -> %#x\n", uAddress - (uint32_t)pPciDev->Int.s.u8MsiCapOffset, cb, uValue)); +#endif + } + else + { + AssertMsgFailed(("Read after end of PCI config space: %#x LB %u\n", uAddress, cb)); + uValue = 0; + } + + *pu32Value = uValue; + return VINF_SUCCESS; +} + + +/** + * @interface_method_impl{PDMPCIBUSREGR3,pfnConfigRead} + */ +DECLCALLBACK(VBOXSTRICTRC) devpciR3CommonConfigRead(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, + uint32_t uAddress, unsigned cb, uint32_t *pu32Value) +{ + RT_NOREF(pDevIns); + return devpciR3CommonConfigReadWorker(pPciDev, uAddress, cb, pu32Value); +} + + +/** + * Worker for devpciR3ResetDevice and devpciR3UpdateMappings that unmaps a region. + * + * @returns VBox status code. + * @param pDev The PCI device. + * @param iRegion The region to unmap. + */ +static int devpciR3UnmapRegion(PPDMPCIDEV pDev, int iRegion) +{ + PPCIIOREGION pRegion = &pDev->Int.s.aIORegions[iRegion]; + AssertReturn(pRegion->size != 0, VINF_SUCCESS); + + int rc = VINF_SUCCESS; + if (pRegion->addr != INVALID_PCI_ADDRESS) + { + /* + * Do callout first (optional), then do the unmapping via handle if we've been handed one. + */ + if (pRegion->pfnMap) + { + rc = pRegion->pfnMap(pDev->Int.s.pDevInsR3, pDev, iRegion, + NIL_RTGCPHYS, pRegion->size, (PCIADDRESSSPACE)(pRegion->type)); + AssertRC(rc); + } + + switch (pRegion->fFlags & PDMPCIDEV_IORGN_F_HANDLE_MASK) + { + case PDMPCIDEV_IORGN_F_IOPORT_HANDLE: + rc = PDMDevHlpIoPortUnmap(pDev->Int.s.pDevInsR3, (IOMIOPORTHANDLE)pRegion->hHandle); + AssertRC(rc); + break; + + case PDMPCIDEV_IORGN_F_MMIO_HANDLE: + rc = PDMDevHlpMmioUnmap(pDev->Int.s.pDevInsR3, (IOMMMIOHANDLE)pRegion->hHandle); + AssertRC(rc); + break; + + case PDMPCIDEV_IORGN_F_MMIO2_HANDLE: + rc = PDMDevHlpMmio2Unmap(pDev->Int.s.pDevInsR3, (PGMMMIO2HANDLE)pRegion->hHandle); + AssertRC(rc); + break; + + case PDMPCIDEV_IORGN_F_NO_HANDLE: + Assert(pRegion->fFlags & PDMPCIDEV_IORGN_F_NEW_STYLE); + Assert(pRegion->hHandle == UINT64_MAX); + break; + + default: + AssertLogRelFailed(); + } + pRegion->addr = INVALID_PCI_ADDRESS; + } + return rc; +} + + +/** + * Worker for devpciR3CommonDefaultConfigWrite that updates BAR and ROM mappings. + * + * @returns VINF_SUCCESS of DBGFSTOP result. + * @param pPciDev The PCI device to update the mappings for. + * @param fP2PBridge Whether this is a PCI to PCI bridge or not. + */ +static VBOXSTRICTRC devpciR3UpdateMappings(PPDMPCIDEV pPciDev, bool fP2PBridge) +{ + /* safe, only needs to go to the config space array */ + uint16_t const u16Cmd = PDMPciDevGetWord(pPciDev, VBOX_PCI_COMMAND); + Log4(("devpciR3UpdateMappings: dev %u/%u (%s): u16Cmd=%#x\n", + pPciDev->uDevFn >> VBOX_PCI_DEVFN_DEV_SHIFT, pPciDev->uDevFn & VBOX_PCI_DEVFN_FUN_MASK, pPciDev->pszNameR3, u16Cmd)); + for (unsigned iRegion = 0; iRegion < VBOX_PCI_NUM_REGIONS; iRegion++) + { + /* Skip over BAR2..BAR5 for bridges, as they have a different meaning there. */ + if (fP2PBridge && iRegion >= 2 && iRegion <= 5) + continue; + PCIIOREGION *pRegion = &pPciDev->Int.s.aIORegions[iRegion]; + uint64_t const cbRegion = pRegion->size; + if (cbRegion != 0) + { + uint32_t const offCfgReg = devpciGetRegionReg(iRegion); + bool const f64Bit = (pRegion->type & ((uint8_t)(PCI_ADDRESS_SPACE_BAR64 | PCI_ADDRESS_SPACE_IO))) + == PCI_ADDRESS_SPACE_BAR64; + uint64_t uNew = INVALID_PCI_ADDRESS; + + /* + * Port I/O region. Check if mapped and within 1..65535 range. + */ + if (pRegion->type & PCI_ADDRESS_SPACE_IO) + { + if (u16Cmd & VBOX_PCI_COMMAND_IO) + { + /* safe, only needs to go to the config space array */ + uint32_t uIoBase = PDMPciDevGetDWord(pPciDev, offCfgReg); + uIoBase &= ~(uint32_t)(cbRegion - 1); + + uint64_t uLast = cbRegion - 1 + uIoBase; + if ( uLast < _64K + && uIoBase < uLast + && uIoBase > 0) + uNew = uIoBase; + else + Log4(("devpciR3UpdateMappings: dev %u/%u (%s): region #%u: Disregarding invalid I/O port range: %#RX32..%#RX64\n", + pPciDev->uDevFn >> VBOX_PCI_DEVFN_DEV_SHIFT, pPciDev->uDevFn & VBOX_PCI_DEVFN_FUN_MASK, + pPciDev->pszNameR3, iRegion, uIoBase, uLast)); + } + } + /* + * MMIO or ROM. Check ROM enable bit and range. + * + * Note! We exclude the I/O-APIC/HPET/ROM area at the end of the first 4GB to + * prevent the (fake) PCI BIOS and others from making a mess. Pure paranoia. + * Additionally addresses with the top 32 bits all set are excluded, to + * catch silly OSes which probe 64-bit BARs without disabling the + * corresponding transactions. + * + * Update: The pure paranoia above broke NT 3.51, so it was changed to only + * exclude the 64KB BIOS mapping at the top. NT 3.51 excludes the + * top 256KB, btw. + */ + /** @todo Query upper boundrary from CPUM and PGMPhysRom instead of making + * incorrect assumptions. */ + else if (u16Cmd & VBOX_PCI_COMMAND_MEMORY) + { + /* safe, only needs to go to the config space array */ + uint64_t uMemBase = PDMPciDevGetDWord(pPciDev, offCfgReg); + if (f64Bit) + { + Assert(iRegion < VBOX_PCI_ROM_SLOT); + /* safe, only needs to go to the config space array */ + uMemBase |= (uint64_t)PDMPciDevGetDWord(pPciDev, offCfgReg + 4) << 32; + } + if ( iRegion != PCI_ROM_SLOT + || (uMemBase & RT_BIT_32(0))) /* ROM enable bit. */ + { + uMemBase &= ~(cbRegion - 1); + + uint64_t uLast = uMemBase + cbRegion - 1; + if ( uMemBase < uLast + && uMemBase > 0) + { + if ( ( uMemBase > UINT32_C(0xffffffff) + || uLast < UINT32_C(0xffff0000) ) /* UINT32_C(0xfec00000) - breaks NT3.51! */ + && uMemBase < UINT64_C(0xffffffff00000000) ) + uNew = uMemBase; + else + Log(("devpciR3UpdateMappings: dev %u/%u (%s): region #%u: Rejecting address range: %#RX64..%#RX64!\n", + pPciDev->uDevFn >> VBOX_PCI_DEVFN_DEV_SHIFT, pPciDev->uDevFn & VBOX_PCI_DEVFN_FUN_MASK, + pPciDev->pszNameR3, iRegion, uMemBase, uLast)); + } + else + Log2(("devpciR3UpdateMappings: dev %u/%u (%s): region #%u: Disregarding invalid address range: %#RX64..%#RX64\n", + pPciDev->uDevFn >> VBOX_PCI_DEVFN_DEV_SHIFT, pPciDev->uDevFn & VBOX_PCI_DEVFN_FUN_MASK, + pPciDev->pszNameR3, iRegion, uMemBase, uLast)); + } + } + + /* + * Do real unmapping and/or mapping if the address change. + */ + Log4(("devpciR3UpdateMappings: dev %u/%u (%s): iRegion=%u addr=%#RX64 uNew=%#RX64\n", + pPciDev->uDevFn >> VBOX_PCI_DEVFN_DEV_SHIFT, pPciDev->uDevFn & VBOX_PCI_DEVFN_FUN_MASK, pPciDev->pszNameR3, + iRegion, pRegion->addr, uNew)); + if (uNew != pRegion->addr) + { + LogRel2(("PCI: config dev %u/%u (%s) BAR%i: %#RX64 -> %#RX64 (LB %RX64 (%RU64))\n", + pPciDev->uDevFn >> VBOX_PCI_DEVFN_DEV_SHIFT, pPciDev->uDevFn & VBOX_PCI_DEVFN_FUN_MASK, + pPciDev->pszNameR3, iRegion, pRegion->addr, uNew, cbRegion, cbRegion)); + + int rc = devpciR3UnmapRegion(pPciDev, iRegion); + AssertLogRelRC(rc); + pRegion->addr = uNew; + if (uNew != INVALID_PCI_ADDRESS) + { + /* The callout is optional (typically not used): */ + if (!pRegion->pfnMap) + rc = VINF_SUCCESS; + else + { + rc = pRegion->pfnMap(pPciDev->Int.s.pDevInsR3, pPciDev, iRegion, + uNew, cbRegion, (PCIADDRESSSPACE)(pRegion->type)); + AssertLogRelRC(rc); + } + + /* We do the mapping for most devices: */ + if (pRegion->hHandle != UINT64_MAX && rc != VINF_PCI_MAPPING_DONE) + { + switch (pRegion->fFlags & PDMPCIDEV_IORGN_F_HANDLE_MASK) + { + case PDMPCIDEV_IORGN_F_IOPORT_HANDLE: + rc = PDMDevHlpIoPortMap(pPciDev->Int.s.pDevInsR3, (IOMIOPORTHANDLE)pRegion->hHandle, (RTIOPORT)uNew); + AssertLogRelRC(rc); + break; + + case PDMPCIDEV_IORGN_F_MMIO_HANDLE: + rc = PDMDevHlpMmioMap(pPciDev->Int.s.pDevInsR3, (IOMMMIOHANDLE)pRegion->hHandle, uNew); + AssertLogRelRC(rc); + break; + + case PDMPCIDEV_IORGN_F_MMIO2_HANDLE: + rc = PDMDevHlpMmio2Map(pPciDev->Int.s.pDevInsR3, (PGMMMIO2HANDLE)pRegion->hHandle, uNew); + AssertRC(rc); + break; + + default: + AssertLogRelFailed(); + } + } + } + } + + if (f64Bit) + iRegion++; + } + /* else: size == 0: unused region */ + } + + return VINF_SUCCESS; +} + + +/** + * Worker for devpciR3CommonDefaultConfigWrite that write a byte to a BAR. + * + * @param pPciDev The PCI device. + * @param iRegion The region. + * @param off The BAR offset. + * @param bVal The byte to write. + */ +DECLINLINE(void) devpciR3WriteBarByte(PPDMPCIDEV pPciDev, uint32_t iRegion, uint32_t off, uint8_t bVal) +{ + PCIIOREGION *pRegion = &pPciDev->Int.s.aIORegions[iRegion]; + Log3Func(("region=%d off=%d val=%#x size=%#llx\n", iRegion, off, bVal, pRegion->size)); + Assert(off <= 3); + + /* Check if we're writing to upper part of 64-bit BAR. */ + if (pRegion->type == 0xff) + { + AssertLogRelReturnVoid(iRegion > 0 && iRegion < VBOX_PCI_ROM_SLOT); + pRegion--; + iRegion--; + off += 4; + Assert(pRegion->type & PCI_ADDRESS_SPACE_BAR64); + } + + /* Ignore zero sized regions (they don't exist). */ + if (pRegion->size != 0) + { + uint32_t uAddr = devpciGetRegionReg(iRegion) + off; + Assert((pRegion->size & (pRegion->size - 1)) == 0); /* Region size must be power of two. */ + uint8_t bMask = ( (pRegion->size - 1) >> (off * 8) ) & 0xff; + if (off == 0) + bMask |= (pRegion->type & PCI_ADDRESS_SPACE_IO) + ? (1 << 2) - 1 /* 2 lowest bits for IO region */ : + (1 << 4) - 1 /* 4 lowest bits for memory region, also ROM enable bit for ROM region */; + + /* safe, only needs to go to the config space array */ + uint8_t bOld = PDMPciDevGetByte(pPciDev, uAddr) & bMask; + bVal = (bOld & bMask) | (bVal & ~bMask); + + Log3Func(("%x changed to %x\n", bOld, bVal)); + + /* safe, only needs to go to the config space array */ + PDMPciDevSetByte(pPciDev, uAddr, bVal); + } +} + + +/** + * Checks if the given configuration byte is writable. + * + * @returns true if writable, false if not + * @param uAddress The config space byte byte. + * @param bHeaderType The device header byte. + */ +DECLINLINE(bool) devpciR3IsConfigByteWritable(uint32_t uAddress, uint8_t bHeaderType) +{ + switch (bHeaderType) + { + case 0x00: /* normal device */ + case 0x80: /* multi-function device */ + switch (uAddress) + { + /* Read-only registers. */ + case VBOX_PCI_VENDOR_ID: + case VBOX_PCI_VENDOR_ID+1: + case VBOX_PCI_DEVICE_ID: + case VBOX_PCI_DEVICE_ID+1: + case VBOX_PCI_REVISION_ID: + case VBOX_PCI_CLASS_PROG: + case VBOX_PCI_CLASS_SUB: + case VBOX_PCI_CLASS_BASE: + case VBOX_PCI_HEADER_TYPE: + case VBOX_PCI_SUBSYSTEM_VENDOR_ID: + case VBOX_PCI_SUBSYSTEM_VENDOR_ID+1: + case VBOX_PCI_SUBSYSTEM_ID: + case VBOX_PCI_SUBSYSTEM_ID+1: + case VBOX_PCI_ROM_ADDRESS: + case VBOX_PCI_ROM_ADDRESS+1: + case VBOX_PCI_ROM_ADDRESS+2: + case VBOX_PCI_ROM_ADDRESS+3: + case VBOX_PCI_CAPABILITY_LIST: + case VBOX_PCI_INTERRUPT_PIN: + return false; + /* Other registers can be written. */ + default: + return true; + } + break; + case 0x01: /* PCI-PCI bridge */ + switch (uAddress) + { + /* Read-only registers. */ + case VBOX_PCI_VENDOR_ID: + case VBOX_PCI_VENDOR_ID+1: + case VBOX_PCI_DEVICE_ID: + case VBOX_PCI_DEVICE_ID+1: + case VBOX_PCI_REVISION_ID: + case VBOX_PCI_CLASS_PROG: + case VBOX_PCI_CLASS_SUB: + case VBOX_PCI_CLASS_BASE: + case VBOX_PCI_HEADER_TYPE: + case VBOX_PCI_ROM_ADDRESS_BR: + case VBOX_PCI_ROM_ADDRESS_BR+1: + case VBOX_PCI_ROM_ADDRESS_BR+2: + case VBOX_PCI_ROM_ADDRESS_BR+3: + case VBOX_PCI_INTERRUPT_PIN: + return false; + /* Other registers can be written. */ + default: + return true; + } + break; + default: + AssertMsgFailed(("Unknown header type %#x\n", bHeaderType)); + return false; + } +} + + +/** + * Writes config space for a device, ignoring interceptors. + * + * See paragraph 7.5 of PCI Express specification (p. 349) for + * definition of registers and their writability policy. + */ +DECLHIDDEN(VBOXSTRICTRC) devpciR3CommonConfigWriteWorker(PPDMDEVINS pDevIns, PDEVPCIBUSCC pBusCC, + PPDMPCIDEV pPciDev, uint32_t uAddress, unsigned cb, uint32_t u32Value) +{ + Assert(cb <= 4 && cb != 3); + VBOXSTRICTRC rcStrict = VINF_SUCCESS; + + if (uAddress + cb <= RT_MIN(pPciDev->cbConfig, sizeof(pPciDev->abConfig))) + { + /* + * MSI and MSI-X capabilites needs to be handled separately. + */ + if ( pciDevIsMsiCapable(pPciDev) + && uAddress - (uint32_t)pPciDev->Int.s.u8MsiCapOffset < (uint32_t)pPciDev->Int.s.u8MsiCapSize) + MsiR3PciConfigWrite(pDevIns, pBusCC->CTX_SUFF(pPciHlp), pPciDev, uAddress, u32Value, cb); + else if ( pciDevIsMsixCapable(pPciDev) + && uAddress - (uint32_t)pPciDev->Int.s.u8MsixCapOffset < (uint32_t)pPciDev->Int.s.u8MsixCapSize) + MsixR3PciConfigWrite(pDevIns, pBusCC->CTX_SUFF(pPciHlp), pPciDev, uAddress, u32Value, cb); + else + { + /* + * Handle the writes byte-by-byte to catch all possible cases. + * + * Note! Real hardware may not necessarily handle non-dword writes like + * we do here and even produce erratic behavior. We don't (yet) + * try emulate that. + */ + uint8_t const bHeaderType = devpciR3GetByte(pPciDev, VBOX_PCI_HEADER_TYPE); + bool const fP2PBridge = bHeaderType == 0x01; /* PCI-PCI bridge */ + bool fUpdateMappings = false; + while (cb-- > 0) + { + bool fWritable = devpciR3IsConfigByteWritable(uAddress, bHeaderType); + uint8_t bVal = (uint8_t)u32Value; + bool fRom = false; + switch (uAddress) + { + case VBOX_PCI_COMMAND: /* Command register, bits 0-7. */ + if (fWritable) + { + /* safe, only needs to go to the config space array */ + PDMPciDevSetByte(pPciDev, uAddress, bVal); + fUpdateMappings = true; + } + break; + + case VBOX_PCI_COMMAND+1: /* Command register, bits 8-15. */ + if (fWritable) + { + /* don't change reserved bits (11-15) */ + bVal &= ~UINT8_C(0xf8); + /* safe, only needs to go to the config space array */ + PDMPciDevSetByte(pPciDev, uAddress, bVal); + fUpdateMappings = true; + } + break; + + case VBOX_PCI_STATUS: /* Status register, bits 0-7. */ + /* don't change read-only bits => actually all lower bits are read-only */ + bVal &= ~UINT8_C(0xff); + /* status register, low part: clear bits by writing a '1' to the corresponding bit */ + pPciDev->abConfig[uAddress] &= ~bVal; + break; + + case VBOX_PCI_STATUS+1: /* Status register, bits 8-15. */ + /* don't change read-only bits */ + bVal &= ~UINT8_C(0x06); + /* status register, high part: clear bits by writing a '1' to the corresponding bit */ + pPciDev->abConfig[uAddress] &= ~bVal; + break; + + case VBOX_PCI_ROM_ADDRESS: case VBOX_PCI_ROM_ADDRESS +1: case VBOX_PCI_ROM_ADDRESS +2: case VBOX_PCI_ROM_ADDRESS +3: + fRom = true; + RT_FALL_THRU(); + case VBOX_PCI_BASE_ADDRESS_0: case VBOX_PCI_BASE_ADDRESS_0+1: case VBOX_PCI_BASE_ADDRESS_0+2: case VBOX_PCI_BASE_ADDRESS_0+3: + case VBOX_PCI_BASE_ADDRESS_1: case VBOX_PCI_BASE_ADDRESS_1+1: case VBOX_PCI_BASE_ADDRESS_1+2: case VBOX_PCI_BASE_ADDRESS_1+3: + case VBOX_PCI_BASE_ADDRESS_2: case VBOX_PCI_BASE_ADDRESS_2+1: case VBOX_PCI_BASE_ADDRESS_2+2: case VBOX_PCI_BASE_ADDRESS_2+3: + case VBOX_PCI_BASE_ADDRESS_3: case VBOX_PCI_BASE_ADDRESS_3+1: case VBOX_PCI_BASE_ADDRESS_3+2: case VBOX_PCI_BASE_ADDRESS_3+3: + case VBOX_PCI_BASE_ADDRESS_4: case VBOX_PCI_BASE_ADDRESS_4+1: case VBOX_PCI_BASE_ADDRESS_4+2: case VBOX_PCI_BASE_ADDRESS_4+3: + case VBOX_PCI_BASE_ADDRESS_5: case VBOX_PCI_BASE_ADDRESS_5+1: case VBOX_PCI_BASE_ADDRESS_5+2: case VBOX_PCI_BASE_ADDRESS_5+3: + /* We check that, as same PCI register numbers as BARs may mean different registers for bridges */ + if (!fP2PBridge) + { + uint32_t iRegion = fRom ? VBOX_PCI_ROM_SLOT : (uAddress - VBOX_PCI_BASE_ADDRESS_0) >> 2; + devpciR3WriteBarByte(pPciDev, iRegion, uAddress & 0x3, bVal); + fUpdateMappings = true; + break; + } + if (uAddress < VBOX_PCI_BASE_ADDRESS_2 || uAddress > VBOX_PCI_BASE_ADDRESS_5+3) + { + /* PCI bridges have only BAR0, BAR1 and ROM */ + uint32_t iRegion = fRom ? VBOX_PCI_ROM_SLOT : (uAddress - VBOX_PCI_BASE_ADDRESS_0) >> 2; + devpciR3WriteBarByte(pPciDev, iRegion, uAddress & 0x3, bVal); + fUpdateMappings = true; + break; + } + if ( uAddress == VBOX_PCI_IO_BASE + || uAddress == VBOX_PCI_IO_LIMIT + || uAddress == VBOX_PCI_MEMORY_BASE + || uAddress == VBOX_PCI_MEMORY_LIMIT + || uAddress == VBOX_PCI_PREF_MEMORY_BASE + || uAddress == VBOX_PCI_PREF_MEMORY_LIMIT) + { + /* All bridge address decoders have the low 4 bits + * as readonly, and all but the prefetchable ones + * have the low 4 bits as 0 (the prefetchable have + * it as 1 to show the 64-bit decoder support. */ + bVal &= 0xf0; + if ( uAddress == VBOX_PCI_PREF_MEMORY_BASE + || uAddress == VBOX_PCI_PREF_MEMORY_LIMIT) + bVal |= 0x01; + } + /* (bridge config space which isn't a BAR) */ + RT_FALL_THRU(); + default: + if (fWritable) + /* safe, only needs to go to the config space array */ + PDMPciDevSetByte(pPciDev, uAddress, bVal); + break; + } + uAddress++; + u32Value >>= 8; + } + + /* + * Update the region mappings if anything changed related to them (command, BARs, ROM). + */ + if (fUpdateMappings) + rcStrict = devpciR3UpdateMappings(pPciDev, fP2PBridge); + } + } + else + AssertMsgFailed(("Write after end of PCI config space: %#x LB %u\n", uAddress, cb)); + + return rcStrict; +} + + +/** + * @interface_method_impl{PDMPCIBUSREGR3,pfnConfigWrite} + */ +DECLCALLBACK(VBOXSTRICTRC) devpciR3CommonConfigWrite(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, + uint32_t uAddress, unsigned cb, uint32_t u32Value) +{ + PDEVPCIBUSCC pBusCC = PDMINS_2_DATA_CC(pDevIns, PDEVPCIBUSCC); + return devpciR3CommonConfigWriteWorker(pDevIns, pBusCC, pPciDev, uAddress, cb, u32Value); +} + + +/* -=-=-=-=-=- Debug Info Handlers -=-=-=-=-=- */ + +/** + * Indents an info line. + * @param pHlp The info helper. + * @param iIndentLvl The desired indentation level. + */ +static void devpciR3InfoIndent(PCDBGFINFOHLP pHlp, unsigned iIndentLvl) +{ + for (unsigned i = 0; i < iIndentLvl; i++) + pHlp->pfnPrintf(pHlp, " "); +} + +static const char *devpciR3InInfoPciBusClassName(uint8_t iBaseClass) +{ + static const char *s_szBaseClass[] = + { + /* 00h */ "unknown", + /* 01h */ "mass storage controller", + /* 02h */ "network controller", + /* 03h */ "display controller", + /* 04h */ "multimedia controller", + /* 05h */ "memory controller", + /* 06h */ "bridge device", + /* 07h */ "simple communication controllers", + /* 08h */ "base system peripherals", + /* 09h */ "input devices", + /* 0Ah */ "docking stations", + /* 0Bh */ "processors", + /* 0Ch */ "serial bus controllers", + /* 0Dh */ "wireless controller", + /* 0Eh */ "intelligent I/O controllers", + /* 0Fh */ "satellite communication controllers", + /* 10h */ "encryption/decryption controllers", + /* 11h */ "data acquisition and signal processing controllers" + }; + if (iBaseClass < RT_ELEMENTS(s_szBaseClass)) + return s_szBaseClass[iBaseClass]; + if (iBaseClass < 0xFF) + return "reserved"; + return "device does not fit in any defined classes"; +} + + +/** + * Recursive worker for devpciR3InfoPci. + * + * @param pBus The bus to show info for. + * @param pHlp The info helpers. + * @param iIndentLvl The indentation level. + * @param fRegisters Whether to show device registers or not. + */ +static void devpciR3InfoPciBus(PDEVPCIBUS pBus, PCDBGFINFOHLP pHlp, unsigned iIndentLvl, bool fRegisters) +{ + /* This has to use the callbacks for accuracy reasons. Otherwise it can get + * confusing in the passthrough case or when the callbacks for some device + * are doing something non-trivial (like implementing an indirect + * passthrough approach), because then the abConfig array is an imprecise + * cache needed for efficiency (so that certain reads can be done from + * R0/RC), but far from authoritative or what the guest would see. */ + + for (uint32_t uDevFn = 0; uDevFn < RT_ELEMENTS(pBus->apDevices); uDevFn++) + { + PPDMPCIDEV pPciDev = pBus->apDevices[uDevFn]; + if (pPciDev != NULL) + { + devpciR3InfoIndent(pHlp, iIndentLvl); + + /* + * For passthrough devices MSI/MSI-X mostly reflects the way interrupts delivered to the guest, + * as host driver handles real devices interrupts. + */ + pHlp->pfnPrintf(pHlp, "%02x:%02x.%d %s%s: %04x-%04x %s%s%s", + pBus->iBus, (uDevFn >> 3) & 0xff, uDevFn & 0x7, + pPciDev->pszNameR3, + pciDevIsPassthrough(pPciDev) ? " (PASSTHROUGH)" : "", + devpciR3GetWord(pPciDev, VBOX_PCI_VENDOR_ID), devpciR3GetWord(pPciDev, VBOX_PCI_DEVICE_ID), + pBus->fTypeIch9 ? "ICH9" : pBus->fTypePiix3 ? "PIIX3" : "?type?", + pciDevIsMsiCapable(pPciDev) ? " MSI" : "", + pciDevIsMsixCapable(pPciDev) ? " MSI-X" : "" + ); + if (devpciR3GetByte(pPciDev, VBOX_PCI_INTERRUPT_PIN) != 0) + { + pHlp->pfnPrintf(pHlp, " IRQ%d", devpciR3GetByte(pPciDev, VBOX_PCI_INTERRUPT_LINE)); + pHlp->pfnPrintf(pHlp, " (INTA#->IRQ%d)", 0x10 + ich9pciSlot2ApicIrq(uDevFn >> 3, 0)); + } + pHlp->pfnPrintf(pHlp, "\n"); + devpciR3InfoIndent(pHlp, iIndentLvl + 2); + uint8_t uClassBase = devpciR3GetByte(pPciDev, VBOX_PCI_CLASS_BASE); + uint8_t uClassSub = devpciR3GetByte(pPciDev, VBOX_PCI_CLASS_SUB); + pHlp->pfnPrintf(pHlp, "Class base/sub: %02x%02x (%s)\n", + uClassBase, uClassSub, devpciR3InInfoPciBusClassName(uClassBase)); + + if (pciDevIsMsiCapable(pPciDev) || pciDevIsMsixCapable(pPciDev)) + { + devpciR3InfoIndent(pHlp, iIndentLvl + 2); + + if (pciDevIsMsiCapable(pPciDev)) + pHlp->pfnPrintf(pHlp, "MSI: %s ", MsiIsEnabled(pPciDev) ? "on" : "off"); + + if (pciDevIsMsixCapable(pPciDev)) + pHlp->pfnPrintf(pHlp, "MSI-X: %s ", MsixIsEnabled(pPciDev) ? "on" : "off"); + + pHlp->pfnPrintf(pHlp, "\n"); + } + + for (unsigned iRegion = 0; iRegion < VBOX_PCI_NUM_REGIONS; iRegion++) + { + PCIIOREGION const *pRegion = &pPciDev->Int.s.aIORegions[iRegion]; + uint64_t const cbRegion = pRegion->size; + + if (cbRegion == 0) + continue; + + uint32_t uAddr = devpciR3GetDWord(pPciDev, devpciGetRegionReg(iRegion)); + const char * pszDesc; + char szDescBuf[128]; + + bool f64Bit = (pRegion->type & ((uint8_t)(PCI_ADDRESS_SPACE_BAR64 | PCI_ADDRESS_SPACE_IO))) + == PCI_ADDRESS_SPACE_BAR64; + if (pRegion->type & PCI_ADDRESS_SPACE_IO) + { + pszDesc = "IO"; + uAddr &= ~0x3; + } + else + { + RTStrPrintf(szDescBuf, sizeof(szDescBuf), "MMIO%s%s", + f64Bit ? "64" : "32", + pRegion->type & PCI_ADDRESS_SPACE_MEM_PREFETCH ? " PREFETCH" : ""); + pszDesc = szDescBuf; + uAddr &= ~0xf; + } + + devpciR3InfoIndent(pHlp, iIndentLvl + 2); + pHlp->pfnPrintf(pHlp, "%s region #%u: ", pszDesc, iRegion); + if (f64Bit) + { + uint32_t u32High = devpciR3GetDWord(pPciDev, devpciGetRegionReg(iRegion+1)); + uint64_t u64Addr = RT_MAKE_U64(uAddr, u32High); + pHlp->pfnPrintf(pHlp, "%RX64..%RX64\n", u64Addr, u64Addr + cbRegion - 1); + iRegion++; + } + else + pHlp->pfnPrintf(pHlp, "%x..%x\n", uAddr, uAddr + (uint32_t)cbRegion - 1); + } + + devpciR3InfoIndent(pHlp, iIndentLvl + 2); + uint16_t iCmd = devpciR3GetWord(pPciDev, VBOX_PCI_COMMAND); + uint16_t iStatus = devpciR3GetWord(pPciDev, VBOX_PCI_STATUS); + pHlp->pfnPrintf(pHlp, "Command: %04x, Status: %04x\n", iCmd, iStatus); + devpciR3InfoIndent(pHlp, iIndentLvl + 2); + pHlp->pfnPrintf(pHlp, "Bus master: %s\n", iCmd & VBOX_PCI_COMMAND_MASTER ? "Yes" : "No"); + if (iCmd != PDMPciDevGetCommand(pPciDev)) + { + devpciR3InfoIndent(pHlp, iIndentLvl + 2); + pHlp->pfnPrintf(pHlp, "CACHE INCONSISTENCY: Command: %04x\n", PDMPciDevGetCommand(pPciDev)); + } + + if (fRegisters) + { + devpciR3InfoIndent(pHlp, iIndentLvl + 2); + pHlp->pfnPrintf(pHlp, "PCI registers:\n"); + for (unsigned iReg = 0; iReg < 0x100; ) + { + unsigned iPerLine = 0x10; + Assert(0x100 % iPerLine == 0); + devpciR3InfoIndent(pHlp, iIndentLvl + 3); + + while (iPerLine-- > 0) + pHlp->pfnPrintf(pHlp, "%02x ", devpciR3GetByte(pPciDev, iReg++)); + pHlp->pfnPrintf(pHlp, "\n"); + } + } + } + } + + if (pBus->cBridges > 0) + { + devpciR3InfoIndent(pHlp, iIndentLvl); + pHlp->pfnPrintf(pHlp, "Registered %d bridges, subordinate buses info follows\n", pBus->cBridges); + for (uint32_t iBridge = 0; iBridge < pBus->cBridges; iBridge++) + { + PPDMDEVINS pDevInsSub = pBus->papBridgesR3[iBridge]->Int.s.CTX_SUFF(pDevIns); + PPDMPCIDEV pPciDevSub = pDevInsSub->apPciDevs[0]; + PDEVPCIBUS pBusSub = PDMINS_2_DATA(pDevInsSub, PDEVPCIBUS); + uint8_t uPrimary = devpciR3GetByte(pPciDevSub, VBOX_PCI_PRIMARY_BUS); + uint8_t uSecondary = devpciR3GetByte(pPciDevSub, VBOX_PCI_SECONDARY_BUS); + uint8_t uSubordinate = devpciR3GetByte(pPciDevSub, VBOX_PCI_SUBORDINATE_BUS); + devpciR3InfoIndent(pHlp, iIndentLvl); + pHlp->pfnPrintf(pHlp, "%02x:%02x.%d: bridge topology: primary=%d secondary=%d subordinate=%d\n", + uPrimary, pPciDevSub->uDevFn >> 3, pPciDevSub->uDevFn & 7, + uPrimary, uSecondary, uSubordinate); + if ( uPrimary != PDMPciDevGetByte(pPciDevSub, VBOX_PCI_PRIMARY_BUS) + || uSecondary != PDMPciDevGetByte(pPciDevSub, VBOX_PCI_SECONDARY_BUS) + || uSubordinate != PDMPciDevGetByte(pPciDevSub, VBOX_PCI_SUBORDINATE_BUS)) + { + devpciR3InfoIndent(pHlp, iIndentLvl); + pHlp->pfnPrintf(pHlp, "CACHE INCONSISTENCY: primary=%d secondary=%d subordinate=%d\n", + PDMPciDevGetByte(pPciDevSub, VBOX_PCI_PRIMARY_BUS), + PDMPciDevGetByte(pPciDevSub, VBOX_PCI_SECONDARY_BUS), + PDMPciDevGetByte(pPciDevSub, VBOX_PCI_SUBORDINATE_BUS)); + } + devpciR3InfoIndent(pHlp, iIndentLvl); + pHlp->pfnPrintf(pHlp, "behind bridge: "); + uint8_t uIoBase = devpciR3GetByte(pPciDevSub, VBOX_PCI_IO_BASE); + uint8_t uIoLimit = devpciR3GetByte(pPciDevSub, VBOX_PCI_IO_LIMIT); + pHlp->pfnPrintf(pHlp, "I/O %#06x..%#06x", + (uIoBase & 0xf0) << 8, + (uIoLimit & 0xf0) << 8 | 0xfff); + if (uIoBase > uIoLimit) + pHlp->pfnPrintf(pHlp, " (IGNORED)"); + pHlp->pfnPrintf(pHlp, "\n"); + devpciR3InfoIndent(pHlp, iIndentLvl); + pHlp->pfnPrintf(pHlp, "behind bridge: "); + uint32_t uMemoryBase = devpciR3GetWord(pPciDevSub, VBOX_PCI_MEMORY_BASE); + uint32_t uMemoryLimit = devpciR3GetWord(pPciDevSub, VBOX_PCI_MEMORY_LIMIT); + pHlp->pfnPrintf(pHlp, "memory %#010x..%#010x", + (uMemoryBase & 0xfff0) << 16, + (uMemoryLimit & 0xfff0) << 16 | 0xfffff); + if (uMemoryBase > uMemoryLimit) + pHlp->pfnPrintf(pHlp, " (IGNORED)"); + pHlp->pfnPrintf(pHlp, "\n"); + devpciR3InfoIndent(pHlp, iIndentLvl); + pHlp->pfnPrintf(pHlp, "behind bridge: "); + uint32_t uPrefMemoryRegBase = devpciR3GetWord(pPciDevSub, VBOX_PCI_PREF_MEMORY_BASE); + uint32_t uPrefMemoryRegLimit = devpciR3GetWord(pPciDevSub, VBOX_PCI_PREF_MEMORY_LIMIT); + uint64_t uPrefMemoryBase = (uPrefMemoryRegBase & 0xfff0) << 16; + uint64_t uPrefMemoryLimit = (uPrefMemoryRegLimit & 0xfff0) << 16 | 0xfffff; + if ( (uPrefMemoryRegBase & 0xf) == 1 + && (uPrefMemoryRegLimit & 0xf) == 1) + { + uPrefMemoryBase |= (uint64_t)devpciR3GetDWord(pPciDevSub, VBOX_PCI_PREF_BASE_UPPER32) << 32; + uPrefMemoryLimit |= (uint64_t)devpciR3GetDWord(pPciDevSub, VBOX_PCI_PREF_LIMIT_UPPER32) << 32; + pHlp->pfnPrintf(pHlp, "64-bit "); + } + else + pHlp->pfnPrintf(pHlp, "32-bit "); + pHlp->pfnPrintf(pHlp, "prefetch memory %#018llx..%#018llx", uPrefMemoryBase, uPrefMemoryLimit); + if (uPrefMemoryBase > uPrefMemoryLimit) + pHlp->pfnPrintf(pHlp, " (IGNORED)"); + pHlp->pfnPrintf(pHlp, "\n"); + devpciR3InfoPciBus(pBusSub, pHlp, iIndentLvl + 1, fRegisters); + } + } +} + + +/** + * @callback_method_impl{FNDBGFHANDLERDEV, 'pci'} + */ +DECLCALLBACK(void) devpciR3InfoPci(PPDMDEVINS pDevIns, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + PDEVPCIBUS pBus = DEVINS_2_DEVPCIBUS(pDevIns); + + if (pszArgs == NULL || !*pszArgs || !strcmp(pszArgs, "basic")) + devpciR3InfoPciBus(pBus, pHlp, 0 /*iIndentLvl*/, false /*fRegisters*/); + else if (!strcmp(pszArgs, "verbose")) + devpciR3InfoPciBus(pBus, pHlp, 0 /*iIndentLvl*/, true /*fRegisters*/); + else + pHlp->pfnPrintf(pHlp, "Invalid argument. Recognized arguments are 'basic', 'verbose'.\n"); +} + + +/** + * @callback_method_impl{FNDBGFHANDLERDEV, 'pciirq'} + */ +DECLCALLBACK(void) devpciR3InfoPciIrq(PPDMDEVINS pDevIns, PCDBGFINFOHLP pHlp, const char *pszArgs) +{ + PDEVPCIROOT pPciRoot = PDMINS_2_DATA(pDevIns, PDEVPCIROOT); + NOREF(pszArgs); + + pHlp->pfnPrintf(pHlp, "PCI I/O APIC IRQ levels:\n"); + for (int i = 0; i < DEVPCI_APIC_IRQ_PINS; ++i) + pHlp->pfnPrintf(pHlp, " IRQ%02d: %u\n", 0x10 + i, pPciRoot->auPciApicIrqLevels[i]); +} + + +/** + * @interface_method_impl{PDMDEVREG,pfnConstruct} + */ +static DECLCALLBACK(int) ich9pciR3Construct(PPDMDEVINS pDevIns, int iInstance, PCFGMNODE pCfg) +{ + RT_NOREF1(iInstance); + Assert(iInstance == 0); + PDMDEV_CHECK_VERSIONS_RETURN(pDevIns); + + PDEVPCIBUSCC pBusCC = PDMINS_2_DATA_CC(pDevIns, PDEVPCIBUSCC); + PDEVPCIROOT pPciRoot = PDMINS_2_DATA(pDevIns, PDEVPCIROOT); + PCPDMDEVHLPR3 pHlp = pDevIns->pHlpR3; + PDEVPCIBUS pBus = &pPciRoot->PciBus; + Assert(ASMMemIsZero(pPciRoot, sizeof(*pPciRoot))); /* code used to memset it for some funny reason. just temp insurance. */ + + /* + * Validate and read configuration. + */ + PDMDEV_VALIDATE_CONFIG_RETURN(pDevIns, "IOAPIC|McfgBase|McfgLength", ""); + + /* query whether we got an IOAPIC */ + int rc = pHlp->pfnCFGMQueryBoolDef(pCfg, "IOAPIC", &pPciRoot->fUseIoApic, false /** @todo default to true? */); + AssertRCReturn(rc, PDMDEV_SET_ERROR(pDevIns, rc, N_("Configuration error: Failed to query boolean value \"IOAPIC\""))); + + if (!pPciRoot->fUseIoApic) + return PDMDEV_SET_ERROR(pDevIns, rc, N_("Must use IO-APIC with ICH9 chipset")); + + rc = pHlp->pfnCFGMQueryU64Def(pCfg, "McfgBase", &pPciRoot->u64PciConfigMMioAddress, 0); + AssertRCReturn(rc, PDMDEV_SET_ERROR(pDevIns, rc, N_("Configuration error: Failed to read \"McfgBase\""))); + + rc = pHlp->pfnCFGMQueryU64Def(pCfg, "McfgLength", &pPciRoot->u64PciConfigMMioLength, 0); + AssertRCReturn(rc, PDMDEV_SET_ERROR(pDevIns, rc, N_("Configuration error: Failed to read \"McfgLength\""))); + + Log(("PCI: fUseIoApic=%RTbool McfgBase=%#RX64 McfgLength=%#RX64 fR0Enabled=%RTbool fRCEnabled=%RTbool\n", pPciRoot->fUseIoApic, + pPciRoot->u64PciConfigMMioAddress, pPciRoot->u64PciConfigMMioLength, pDevIns->fR0Enabled, pDevIns->fRCEnabled)); + + /* + * Init data. + */ + /* And fill values */ + pBusCC->pDevInsR3 = pDevIns; + pPciRoot->hIoPortAddress = NIL_IOMIOPORTHANDLE; + pPciRoot->hIoPortData = NIL_IOMIOPORTHANDLE; + pPciRoot->hIoPortMagic = NIL_IOMIOPORTHANDLE; + pPciRoot->hMmioMcfg = NIL_IOMMMIOHANDLE; + pPciRoot->PciBus.fTypePiix3 = false; + pPciRoot->PciBus.fTypeIch9 = true; + pPciRoot->PciBus.fPureBridge = false; + pPciRoot->PciBus.papBridgesR3 = (PPDMPCIDEV *)PDMDevHlpMMHeapAllocZ(pDevIns, sizeof(PPDMPCIDEV) * RT_ELEMENTS(pPciRoot->PciBus.apDevices)); + AssertLogRelReturn(pPciRoot->PciBus.papBridgesR3, VERR_NO_MEMORY); + + /* + * Disable default device locking. + */ + rc = PDMDevHlpSetDeviceCritSect(pDevIns, PDMDevHlpCritSectGetNop(pDevIns)); + AssertRCReturn(rc, rc); + + /* + * Register bus + */ + PDMPCIBUSREGCC PciBusReg; + PciBusReg.u32Version = PDM_PCIBUSREGCC_VERSION; + PciBusReg.pfnRegisterR3 = devpciR3CommonRegisterDevice; + PciBusReg.pfnRegisterMsiR3 = ich9pciRegisterMsi; + PciBusReg.pfnIORegionRegisterR3 = devpciR3CommonIORegionRegister; + PciBusReg.pfnInterceptConfigAccesses = devpciR3CommonInterceptConfigAccesses; + PciBusReg.pfnConfigRead = devpciR3CommonConfigRead; + PciBusReg.pfnConfigWrite = devpciR3CommonConfigWrite; + PciBusReg.pfnSetIrqR3 = ich9pciSetIrq; + PciBusReg.u32EndVersion = PDM_PCIBUSREGCC_VERSION; + rc = PDMDevHlpPCIBusRegister(pDevIns, &PciBusReg, &pBusCC->pPciHlpR3, &pBus->iBus); + if (RT_FAILURE(rc)) + return PDMDEV_SET_ERROR(pDevIns, rc, N_("Failed to register ourselves as a PCI Bus")); + Assert(pBus->iBus == 0); + if (pBusCC->pPciHlpR3->u32Version != PDM_PCIHLPR3_VERSION) + return PDMDevHlpVMSetError(pDevIns, VERR_VERSION_MISMATCH, RT_SRC_POS, + N_("PCI helper version mismatch; got %#x expected %#x"), + pBusCC->pPciHlpR3->u32Version, PDM_PCIHLPR3_VERSION); + + /* + * Fill in PCI configs and add them to the bus. + */ + /** @todo Disabled for now because this causes error messages with Linux guests. + * The guest loads the x38_edac device which tries to map a memory region + * using an address given at place 0x48 - 0x4f in the PCI config space. + * This fails. because we don't register such a region. + */ +#if 0 + /* Host bridge device */ + PDMPciDevSetVendorId( &pBus->PciDev, 0x8086); /* Intel */ + PDMPciDevSetDeviceId( &pBus->PciDev, 0x29e0); /* Desktop */ + PDMPciDevSetRevisionId(&pBus->PciDev, 0x01); /* rev. 01 */ + PDMPciDevSetClassBase( &pBus->PciDev, 0x06); /* bridge */ + PDMPciDevSetClassSub( &pBus->PciDev, 0x00); /* Host/PCI bridge */ + PDMPciDevSetClassProg( &pBus->PciDev, 0x00); /* Host/PCI bridge */ + PDMPciDevSetHeaderType(&pBus->PciDev, 0x00); /* bridge */ + PDMPciDevSetWord(&pBus->PciDev, VBOX_PCI_SEC_STATUS, 0x0280); /* secondary status */ + + pBus->PciDev.pDevIns = pDevIns; + /* We register Host<->PCI controller on the bus */ + ich9pciRegisterInternal(pBus, 0, &pBus->PciDev, "dram"); +#endif + + /* + * Register I/O ports. + */ + static const IOMIOPORTDESC s_aAddrDesc[] = { { "PCI address", "PCI address", NULL, NULL }, { NULL, NULL, NULL, NULL } }; + rc = PDMDevHlpIoPortCreateAndMap(pDevIns, 0x0cf8, 1, ich9pciIOPortAddressWrite, ich9pciIOPortAddressRead, + "ICH9 (PCI)", s_aAddrDesc, &pPciRoot->hIoPortAddress); + AssertLogRelRCReturn(rc, rc); + + static const IOMIOPORTDESC s_aDataDesc[] = { { "PCI data", "PCI data", NULL, NULL }, { NULL, NULL, NULL, NULL } }; + rc = PDMDevHlpIoPortCreateAndMap(pDevIns, 0x0cfc, 4, ich9pciIOPortDataWrite, ich9pciIOPortDataRead, + "ICH9 (PCI)", s_aDataDesc, &pPciRoot->hIoPortData); + AssertLogRelRCReturn(rc, rc); + + static const IOMIOPORTDESC s_aMagicDesc[] = { { "PCI magic", NULL, NULL, NULL }, { NULL, NULL, NULL, NULL } }; + rc = PDMDevHlpIoPortCreateAndMap(pDevIns, 0x0410, 1, ich9pciR3IOPortMagicPCIWrite, ich9pciR3IOPortMagicPCIRead, + "ICH9 (Fake PCI BIOS trigger)", s_aMagicDesc, &pPciRoot->hIoPortMagic); + AssertLogRelRCReturn(rc, rc); + + /* + * MMIO handlers. + */ + if (pPciRoot->u64PciConfigMMioAddress != 0) + { + rc = PDMDevHlpMmioCreateAndMap(pDevIns, pPciRoot->u64PciConfigMMioAddress, pPciRoot->u64PciConfigMMioLength, + ich9pciMcfgMMIOWrite, ich9pciMcfgMMIORead, + IOMMMIO_FLAGS_READ_PASSTHRU | IOMMMIO_FLAGS_WRITE_PASSTHRU, + "MCFG ranges", &pPciRoot->hMmioMcfg); + AssertMsgRCReturn(rc, ("rc=%Rrc %#RX64/%#RX64\n", rc, pPciRoot->u64PciConfigMMioAddress, pPciRoot->u64PciConfigMMioLength), rc); + } + + /* + * Saved state and info handlers. + */ + rc = PDMDevHlpSSMRegisterEx(pDevIns, VBOX_ICH9PCI_SAVED_STATE_VERSION, + sizeof(*pBus) + 16*128, "pgm", + NULL, NULL, NULL, + NULL, ich9pciR3SaveExec, NULL, + NULL, ich9pciR3LoadExec, NULL); + AssertRCReturn(rc, rc); + + /** @todo other chipset devices shall be registered too */ + + PDMDevHlpDBGFInfoRegister(pDevIns, "pci", + "Display PCI bus status. Recognizes 'basic' or 'verbose' as arguments, defaults to 'basic'.", + devpciR3InfoPci); + PDMDevHlpDBGFInfoRegister(pDevIns, "pciirq", "Display PCI IRQ state. (no arguments)", devpciR3InfoPciIrq); + + return VINF_SUCCESS; +} + + +/** + * @interface_method_impl{PDMDEVREG,pfnDestruct} + */ +static DECLCALLBACK(int) ich9pciR3Destruct(PPDMDEVINS pDevIns) +{ + PDEVPCIROOT pPciRoot = PDMINS_2_DATA(pDevIns, PDEVPCIROOT); + if (pPciRoot->PciBus.papBridgesR3) + { + PDMDevHlpMMHeapFree(pDevIns, pPciRoot->PciBus.papBridgesR3); + pPciRoot->PciBus.papBridgesR3 = NULL; + } + return VINF_SUCCESS; +} + + +/** + * @param pDevIns The PCI bus device instance. + * @param pDev The PCI device to reset. + */ +void devpciR3ResetDevice(PPDMDEVINS pDevIns, PPDMPCIDEV pDev) +{ + /* Clear regions */ + for (int iRegion = 0; iRegion < VBOX_PCI_NUM_REGIONS; iRegion++) + { + PCIIOREGION *pRegion = &pDev->Int.s.aIORegions[iRegion]; + if (pRegion->size == 0) + continue; + bool const f64Bit = (pRegion->type & ((uint8_t)(PCI_ADDRESS_SPACE_BAR64 | PCI_ADDRESS_SPACE_IO))) + == PCI_ADDRESS_SPACE_BAR64; + + devpciR3UnmapRegion(pDev, iRegion); + + if (f64Bit) + iRegion++; + } + + if (pciDevIsPassthrough(pDev)) + { + // no reset handler - we can do what we need in PDM reset handler + /// @todo is it correct? + } + else + { + devpciR3SetWord(pDevIns, pDev, VBOX_PCI_COMMAND, + devpciR3GetWord(pDev, VBOX_PCI_COMMAND) + & ~(VBOX_PCI_COMMAND_IO | VBOX_PCI_COMMAND_MEMORY | + VBOX_PCI_COMMAND_MASTER | VBOX_PCI_COMMAND_SPECIAL | + VBOX_PCI_COMMAND_PARITY | VBOX_PCI_COMMAND_SERR | + VBOX_PCI_COMMAND_FAST_BACK | VBOX_PCI_COMMAND_INTX_DISABLE)); + + /* Bridge device reset handlers processed later */ + if (!pciDevIsPci2PciBridge(pDev)) + { + devpciR3SetByte(pDevIns, pDev, VBOX_PCI_CACHE_LINE_SIZE, 0x0); + devpciR3SetByte(pDevIns, pDev, VBOX_PCI_INTERRUPT_LINE, 0x0); + } + + /* Reset MSI message control. */ + if (pciDevIsMsiCapable(pDev)) + devpciR3SetWord(pDevIns, pDev, pDev->Int.s.u8MsiCapOffset + VBOX_MSI_CAP_MESSAGE_CONTROL, + devpciR3GetWord(pDev, pDev->Int.s.u8MsiCapOffset + VBOX_MSI_CAP_MESSAGE_CONTROL) & 0xff8e); + + /* Reset MSI-X message control. */ + if (pciDevIsMsixCapable(pDev)) + devpciR3SetWord(pDevIns, pDev, pDev->Int.s.u8MsixCapOffset + VBOX_MSIX_CAP_MESSAGE_CONTROL, + devpciR3GetWord(pDev, pDev->Int.s.u8MsixCapOffset + VBOX_MSIX_CAP_MESSAGE_CONTROL) & 0x3fff); + } +} + +/** + * Returns the PCI express encoding for the given PCI Express Device/Port type string. + * + * @returns PCI express encoding. + * @param pszExpressPortType The string identifier for the port/device type. + */ +static uint8_t ich9pcibridgeR3GetExpressPortTypeFromString(const char *pszExpressPortType) +{ + if (!RTStrCmp(pszExpressPortType, "EndPtDev")) + return VBOX_PCI_EXP_TYPE_ENDPOINT; + if (!RTStrCmp(pszExpressPortType, "LegEndPtDev")) + return VBOX_PCI_EXP_TYPE_LEG_END; + if (!RTStrCmp(pszExpressPortType, "RootCmplxRootPort")) + return VBOX_PCI_EXP_TYPE_ROOT_PORT; + if (!RTStrCmp(pszExpressPortType, "ExpressSwUpstream")) + return VBOX_PCI_EXP_TYPE_UPSTREAM; + if (!RTStrCmp(pszExpressPortType, "ExpressSwDownstream")) + return VBOX_PCI_EXP_TYPE_DOWNSTREAM; + if (!RTStrCmp(pszExpressPortType, "Express2PciBridge")) + return VBOX_PCI_EXP_TYPE_PCI_BRIDGE; + if (!RTStrCmp(pszExpressPortType, "Pci2ExpressBridge")) + return VBOX_PCI_EXP_TYPE_PCIE_BRIDGE; + if (!RTStrCmp(pszExpressPortType, "RootCmplxIntEp")) + return VBOX_PCI_EXP_TYPE_ROOT_INT_EP; + if (!RTStrCmp(pszExpressPortType, "RootCmplxEc")) + return VBOX_PCI_EXP_TYPE_ROOT_EC; + + AssertLogRelMsgFailedReturn(("Unknown express port type specified"), VBOX_PCI_EXP_TYPE_ROOT_INT_EP); +} + +/** + * Recursive worker for ich9pciReset. + * + * @param pDevIns ICH9 bridge (root or PCI-to-PCI) instance. + */ +static void ich9pciResetBridge(PPDMDEVINS pDevIns) +{ + PDEVPCIBUS pBus = PDMINS_2_DATA(pDevIns, PDEVPCIBUS); + + /* PCI-specific reset for each device. */ + for (uint32_t uDevFn = 0; uDevFn < RT_ELEMENTS(pBus->apDevices); uDevFn++) + { + if (pBus->apDevices[uDevFn]) + devpciR3ResetDevice(pDevIns, pBus->apDevices[uDevFn]); + } + + for (uint32_t iBridge = 0; iBridge < pBus->cBridges; iBridge++) + { + if (pBus->papBridgesR3[iBridge]) + ich9pciResetBridge(pBus->papBridgesR3[iBridge]->Int.s.CTX_SUFF(pDevIns)); + } + + /* Reset topology config for non-root bridge. Last thing to do, otherwise + * the secondary and subordinate are instantly unreachable. */ + if (pBus->iBus != 0) + { + PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0]; + + devpciR3SetByte(pDevIns, pPciDev, VBOX_PCI_PRIMARY_BUS, 0); + devpciR3SetByte(pDevIns, pPciDev, VBOX_PCI_SECONDARY_BUS, 0); + devpciR3SetByte(pDevIns, pPciDev, VBOX_PCI_SUBORDINATE_BUS, 0); + /* Not resetting the address decoders of the bridge to 0, since the + * PCI-to-PCI Bridge spec says that there is no default value. */ + } +} + + +/** + * @interface_method_impl{PDMDEVREG,pfnReset} + */ +static DECLCALLBACK(void) ich9pciReset(PPDMDEVINS pDevIns) +{ + /* Reset everything under the root bridge. */ + ich9pciResetBridge(pDevIns); +} + + +/** + * @interface_method_impl{PDMIBASE,pfnQueryInterface} + */ +static DECLCALLBACK(void *) ich9pcibridgeQueryInterface(PPDMIBASE pInterface, const char *pszIID) +{ + PPDMDEVINS pDevIns = RT_FROM_MEMBER(pInterface, PDMDEVINS, IBase); + PDMIBASE_RETURN_INTERFACE(pszIID, PDMIBASE, &pDevIns->IBase); + + /* HACK ALERT! Special access to the PDMPCIDEV structure of an ich9pcibridge + instance (see PDMIICH9BRIDGEPDMPCIDEV_IID for details). */ + PDMIBASE_RETURN_INTERFACE(pszIID, PDMIICH9BRIDGEPDMPCIDEV, pDevIns->apPciDevs[0]); + return NULL; +} + + +/** + * @interface_method_impl{PDMDEVREG,pfnDestruct} + */ +static DECLCALLBACK(int) ich9pcibridgeR3Destruct(PPDMDEVINS pDevIns) +{ + PDEVPCIBUS pBus = PDMINS_2_DATA(pDevIns, PDEVPCIBUS); + if (pBus->papBridgesR3) + { + PDMDevHlpMMHeapFree(pDevIns, pBus->papBridgesR3); + pBus->papBridgesR3 = NULL; + } + return VINF_SUCCESS; +} + + +/** + * @interface_method_impl{PDMDEVREG,pfnConstruct} + */ +static DECLCALLBACK(int) ich9pcibridgeR3Construct(PPDMDEVINS pDevIns, int iInstance, PCFGMNODE pCfg) +{ + PDMDEV_CHECK_VERSIONS_RETURN(pDevIns); + PCPDMDEVHLPR3 pHlp = pDevIns->pHlpR3; + + /* + * Validate and read configuration. + */ + PDMDEV_VALIDATE_CONFIG_RETURN(pDevIns, "ExpressEnabled|ExpressPortType", ""); + + /* check if we're supposed to implement a PCIe bridge. */ + bool fExpress; + int rc = pHlp->pfnCFGMQueryBoolDef(pCfg, "ExpressEnabled", &fExpress, false); + AssertRCReturn(rc, PDMDEV_SET_ERROR(pDevIns, rc, N_("Configuration error: Failed to query boolean value \"ExpressEnabled\""))); + + char szExpressPortType[80]; + rc = pHlp->pfnCFGMQueryStringDef(pCfg, "ExpressPortType", szExpressPortType, sizeof(szExpressPortType), "RootCmplxIntEp"); + AssertRCReturn(rc, PDMDEV_SET_ERROR(pDevIns, rc, N_("Configuration error: failed to read \"ExpressPortType\" as string"))); + + uint8_t const uExpressPortType = ich9pcibridgeR3GetExpressPortTypeFromString(szExpressPortType); + Log(("PCI/bridge#%u: fR0Enabled=%RTbool fRCEnabled=%RTbool fExpress=%RTbool uExpressPortType=%u (%s)\n", + iInstance, pDevIns->fR0Enabled, pDevIns->fRCEnabled, fExpress, uExpressPortType, szExpressPortType)); + + /* + * Init data and register the PCI bus. + */ + pDevIns->IBase.pfnQueryInterface = ich9pcibridgeQueryInterface; + + PDEVPCIBUSCC pBusCC = PDMINS_2_DATA_CC(pDevIns, PDEVPCIBUSCC); + PDEVPCIBUS pBus = PDMINS_2_DATA(pDevIns, PDEVPCIBUS); + + pBus->fTypePiix3 = false; + pBus->fTypeIch9 = true; + pBus->fPureBridge = true; + pBusCC->pDevInsR3 = pDevIns; + pBus->papBridgesR3 = (PPDMPCIDEV *)PDMDevHlpMMHeapAllocZ(pDevIns, sizeof(PPDMPCIDEV) * RT_ELEMENTS(pBus->apDevices)); + AssertLogRelReturn(pBus->papBridgesR3, VERR_NO_MEMORY); + + PDMPCIBUSREGCC PciBusReg; + PciBusReg.u32Version = PDM_PCIBUSREGCC_VERSION; + PciBusReg.pfnRegisterR3 = devpcibridgeR3CommonRegisterDevice; + PciBusReg.pfnRegisterMsiR3 = ich9pciRegisterMsi; + PciBusReg.pfnIORegionRegisterR3 = devpciR3CommonIORegionRegister; + PciBusReg.pfnInterceptConfigAccesses = devpciR3CommonInterceptConfigAccesses; + PciBusReg.pfnConfigWrite = devpciR3CommonConfigWrite; + PciBusReg.pfnConfigRead = devpciR3CommonConfigRead; + PciBusReg.pfnSetIrqR3 = ich9pcibridgeSetIrq; + PciBusReg.u32EndVersion = PDM_PCIBUSREGCC_VERSION; + rc = PDMDevHlpPCIBusRegister(pDevIns, &PciBusReg, &pBusCC->pPciHlpR3, &pBus->iBus); + if (RT_FAILURE(rc)) + return PDMDEV_SET_ERROR(pDevIns, rc, N_("Failed to register ourselves as a PCI Bus")); + Assert(pBus->iBus == (uint32_t)iInstance + 1); /* Can be removed when adding support for multiple bridge implementations. */ + if (pBusCC->pPciHlpR3->u32Version != PDM_PCIHLPR3_VERSION) + return PDMDevHlpVMSetError(pDevIns, VERR_VERSION_MISMATCH, RT_SRC_POS, + N_("PCI helper version mismatch; got %#x expected %#x"), + pBusCC->pPciHlpR3->u32Version, PDM_PCIHLPR3_VERSION); + + LogRel(("PCI: Registered bridge instance #%u as PDM bus no %u.\n", iInstance, pBus->iBus)); + + + /* Disable default device locking. */ + rc = PDMDevHlpSetDeviceCritSect(pDevIns, PDMDevHlpCritSectGetNop(pDevIns)); + AssertRCReturn(rc, rc); + + /* + * Fill in PCI configs and add them to the bus. + */ + PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0]; + PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev); + + PDMPciDevSetVendorId( pPciDev, 0x8086); /* Intel */ + if (fExpress) + { + PDMPciDevSetDeviceId(pPciDev, 0x29e1); /* 82X38/X48 Express Host-Primary PCI Express Bridge. */ + PDMPciDevSetRevisionId(pPciDev, 0x01); + } + else + { + PDMPciDevSetDeviceId(pPciDev, 0x2448); /* 82801 Mobile PCI bridge. */ + PDMPciDevSetRevisionId(pPciDev, 0xf2); + } + PDMPciDevSetClassSub( pPciDev, 0x04); /* pci2pci */ + PDMPciDevSetClassBase( pPciDev, 0x06); /* PCI_bridge */ + if (fExpress) + PDMPciDevSetClassProg(pPciDev, 0x00); /* Normal decoding. */ + else + PDMPciDevSetClassProg(pPciDev, 0x01); /* Supports subtractive decoding. */ + PDMPciDevSetHeaderType(pPciDev, 0x01); /* Single function device which adheres to the PCI-to-PCI bridge spec. */ + if (fExpress) + { + PDMPciDevSetCommand(pPciDev, VBOX_PCI_COMMAND_SERR); + PDMPciDevSetStatus(pPciDev, VBOX_PCI_STATUS_CAP_LIST); /* Has capabilities. */ + PDMPciDevSetByte(pPciDev, VBOX_PCI_CACHE_LINE_SIZE, 8); /* 32 bytes */ + /* PCI Express */ + PDMPciDevSetByte(pPciDev, 0xa0 + 0, VBOX_PCI_CAP_ID_EXP); /* PCI_Express */ + PDMPciDevSetByte(pPciDev, 0xa0 + 1, 0); /* next */ + PDMPciDevSetWord(pPciDev, 0xa0 + 2, + /* version */ 0x2 + | (uExpressPortType << 4)); + PDMPciDevSetDWord(pPciDev, 0xa0 + 4, VBOX_PCI_EXP_DEVCAP_RBE); /* Device capabilities. */ + PDMPciDevSetWord(pPciDev, 0xa0 + 8, 0x0000); /* Device control. */ + PDMPciDevSetWord(pPciDev, 0xa0 + 10, 0x0000); /* Device status. */ + PDMPciDevSetDWord(pPciDev, 0xa0 + 12, + /* Max Link Speed */ 2 + | /* Maximum Link Width */ (16 << 4) + | /* Active State Power Management (ASPM) Sopport */ (0 << 10) + | VBOX_PCI_EXP_LNKCAP_LBNC + | /* Port Number */ ((2 + iInstance) << 24)); /* Link capabilities. */ + PDMPciDevSetWord(pPciDev, 0xa0 + 16, VBOX_PCI_EXP_LNKCTL_CLOCK); /* Link control. */ + PDMPciDevSetWord(pPciDev, 0xa0 + 18, + /* Current Link Speed */ 2 + | /* Negotiated Link Width */ (16 << 4) + | VBOX_PCI_EXP_LNKSTA_SL_CLK); /* Link status. */ + PDMPciDevSetDWord(pPciDev, 0xa0 + 20, + /* Slot Power Limit Value */ (75 << 7) + | /* Physical Slot Number */ (0 << 19)); /* Slot capabilities. */ + PDMPciDevSetWord(pPciDev, 0xa0 + 24, 0x0000); /* Slot control. */ + PDMPciDevSetWord(pPciDev, 0xa0 + 26, 0x0000); /* Slot status. */ + PDMPciDevSetWord(pPciDev, 0xa0 + 28, 0x0000); /* Root control. */ + PDMPciDevSetWord(pPciDev, 0xa0 + 30, 0x0000); /* Root capabilities. */ + PDMPciDevSetDWord(pPciDev, 0xa0 + 32, 0x00000000); /* Root status. */ + PDMPciDevSetDWord(pPciDev, 0xa0 + 36, 0x00000000); /* Device capabilities 2. */ + PDMPciDevSetWord(pPciDev, 0xa0 + 40, 0x0000); /* Device control 2. */ + PDMPciDevSetWord(pPciDev, 0xa0 + 42, 0x0000); /* Device status 2. */ + PDMPciDevSetDWord(pPciDev, 0xa0 + 44, + /* Supported Link Speeds Vector */ (2 << 1)); /* Link capabilities 2. */ + PDMPciDevSetWord(pPciDev, 0xa0 + 48, + /* Target Link Speed */ 2); /* Link control 2. */ + PDMPciDevSetWord(pPciDev, 0xa0 + 50, 0x0000); /* Link status 2. */ + PDMPciDevSetDWord(pPciDev, 0xa0 + 52, 0x00000000); /* Slot capabilities 2. */ + PDMPciDevSetWord(pPciDev, 0xa0 + 56, 0x0000); /* Slot control 2. */ + PDMPciDevSetWord(pPciDev, 0xa0 + 58, 0x0000); /* Slot status 2. */ + PDMPciDevSetCapabilityList(pPciDev, 0xa0); + } + else + { + PDMPciDevSetCommand(pPciDev, 0x00); + PDMPciDevSetStatus(pPciDev, 0x20); /* 66MHz Capable. */ + } + PDMPciDevSetInterruptLine(pPciDev, 0x00); /* This device does not assert interrupts. */ + + /* + * This device does not generate interrupts. Interrupt delivery from + * devices attached to the bus is unaffected. + */ + PDMPciDevSetInterruptPin (pPciDev, 0x00); + + if (fExpress) + { + /** @todo r=klaus set up the PCIe config space beyond the old 256 byte + * limit, containing additional capability descriptors. */ + } + + /* + * Register this PCI bridge. The called function will take care on which bus we will get registered. + */ + rc = PDMDevHlpPCIRegisterEx(pDevIns, pPciDev, PDMPCIDEVREG_F_PCI_BRIDGE, PDMPCIDEVREG_DEV_NO_FIRST_UNUSED, + PDMPCIDEVREG_FUN_NO_FIRST_UNUSED, "ich9pcibridge"); + AssertLogRelRCReturn(rc, rc); + + pPciDev->Int.s.pfnBridgeConfigRead = ich9pcibridgeConfigRead; + pPciDev->Int.s.pfnBridgeConfigWrite = ich9pcibridgeConfigWrite; + + /* + * Register SSM handlers. We use the same saved state version as for the host bridge + * to make changes easier. + */ + rc = PDMDevHlpSSMRegisterEx(pDevIns, VBOX_ICH9PCI_SAVED_STATE_VERSION, + sizeof(*pBus) + 16*128, + "pgm" /* before */, + NULL, NULL, NULL, + NULL, ich9pcibridgeR3SaveExec, NULL, + NULL, ich9pcibridgeR3LoadExec, NULL); + AssertLogRelRCReturn(rc, rc); + + return VINF_SUCCESS; +} + +#else /* !IN_RING3 */ + +/** + * @interface_method_impl{PDMDEVREGR0,pfnConstruct} + */ +DECLCALLBACK(int) ich9pciRZConstruct(PPDMDEVINS pDevIns) +{ + PDMDEV_CHECK_VERSIONS_RETURN(pDevIns); + PDEVPCIROOT pPciRoot = PDMINS_2_DATA(pDevIns, PDEVPCIROOT); + PDEVPCIBUSCC pBusCC = PDMINS_2_DATA_CC(pDevIns, PDEVPCIBUSCC); + + /* Mirror the ring-3 device lock disabling: */ + int rc = PDMDevHlpSetDeviceCritSect(pDevIns, PDMDevHlpCritSectGetNop(pDevIns)); + AssertRCReturn(rc, rc); + + /* Set up the RZ PCI bus callbacks: */ + PDMPCIBUSREGCC PciBusReg; + PciBusReg.u32Version = PDM_PCIBUSREGCC_VERSION; + PciBusReg.iBus = pPciRoot->PciBus.iBus; + PciBusReg.pfnSetIrq = ich9pciSetIrq; + PciBusReg.u32EndVersion = PDM_PCIBUSREGCC_VERSION; + rc = PDMDevHlpPCIBusSetUpContext(pDevIns, &PciBusReg, &pBusCC->CTX_SUFF(pPciHlp)); + AssertRCReturn(rc, rc); + + /* Set up I/O port callbacks, except for the magic port: */ + rc = PDMDevHlpIoPortSetUpContext(pDevIns, pPciRoot->hIoPortAddress, ich9pciIOPortAddressWrite, ich9pciIOPortAddressRead, NULL); + AssertLogRelRCReturn(rc, rc); + + rc = PDMDevHlpIoPortSetUpContext(pDevIns, pPciRoot->hIoPortData, ich9pciIOPortDataWrite, ich9pciIOPortDataRead, NULL); + AssertLogRelRCReturn(rc, rc); + + /* Set up MMIO callbacks: */ + if (pPciRoot->hMmioMcfg != NIL_IOMMMIOHANDLE) + { + rc = PDMDevHlpMmioSetUpContext(pDevIns, pPciRoot->hMmioMcfg, ich9pciMcfgMMIOWrite, ich9pciMcfgMMIORead, NULL /*pvUser*/); + AssertLogRelRCReturn(rc, rc); + } + + return rc; +} + + +/** + * @interface_method_impl{PDMDEVREGR0,pfnConstruct} + */ +DECLCALLBACK(int) ich9pcibridgeRZConstruct(PPDMDEVINS pDevIns) +{ + PDMDEV_CHECK_VERSIONS_RETURN(pDevIns); + PDEVPCIBUS pBus = PDMINS_2_DATA(pDevIns, PDEVPCIBUS); + PDEVPCIBUSCC pBusCC = PDMINS_2_DATA_CC(pDevIns, PDEVPCIBUSCC); + + /* Mirror the ring-3 device lock disabling: */ + int rc = PDMDevHlpSetDeviceCritSect(pDevIns, PDMDevHlpCritSectGetNop(pDevIns)); + AssertRCReturn(rc, rc); + + /* Set up the RZ PCI bus callbacks: */ + PDMPCIBUSREGCC PciBusReg; + PciBusReg.u32Version = PDM_PCIBUSREGCC_VERSION; + PciBusReg.iBus = pBus->iBus; + PciBusReg.pfnSetIrq = ich9pcibridgeSetIrq; + PciBusReg.u32EndVersion = PDM_PCIBUSREGCC_VERSION; + rc = PDMDevHlpPCIBusSetUpContext(pDevIns, &PciBusReg, &pBusCC->CTX_SUFF(pPciHlp)); + AssertRCReturn(rc, rc); + + return rc; +} + +#endif /* !IN_RING3 */ + +/** + * The PCI bus device registration structure. + */ +const PDMDEVREG g_DevicePciIch9 = +{ + /* .u32Version = */ PDM_DEVREG_VERSION, + /* .uReserved0 = */ 0, + /* .szName = */ "ich9pci", + /* .fFlags = */ PDM_DEVREG_FLAGS_DEFAULT_BITS | PDM_DEVREG_FLAGS_RZ | PDM_DEVREG_FLAGS_NEW_STYLE, + /* .fClass = */ PDM_DEVREG_CLASS_BUS_PCI, + /* .cMaxInstances = */ 1, + /* .uSharedVersion = */ 42, + /* .cbInstanceShared = */ sizeof(DEVPCIROOT), + /* .cbInstanceCC = */ sizeof(CTX_SUFF(DEVPCIBUS)), + /* .cbInstanceRC = */ sizeof(DEVPCIBUSRC), + /* .cMaxPciDevices = */ 1, + /* .cMaxMsixVectors = */ 0, + /* .pszDescription = */ "ICH9 PCI bridge", +#if defined(IN_RING3) + /* .pszRCMod = */ "VBoxDDRC.rc", + /* .pszR0Mod = */ "VBoxDDR0.r0", + /* .pfnConstruct = */ ich9pciR3Construct, + /* .pfnDestruct = */ ich9pciR3Destruct, + /* .pfnRelocate = */ NULL, + /* .pfnMemSetup = */ NULL, + /* .pfnPowerOn = */ NULL, + /* .pfnReset = */ ich9pciReset, + /* .pfnSuspend = */ NULL, + /* .pfnResume = */ NULL, + /* .pfnAttach = */ NULL, + /* .pfnDetach = */ NULL, + /* .pfnQueryInterface = */ NULL, + /* .pfnInitComplete = */ NULL, + /* .pfnPowerOff = */ NULL, + /* .pfnSoftReset = */ NULL, + /* .pfnReserved0 = */ NULL, + /* .pfnReserved1 = */ NULL, + /* .pfnReserved2 = */ NULL, + /* .pfnReserved3 = */ NULL, + /* .pfnReserved4 = */ NULL, + /* .pfnReserved5 = */ NULL, + /* .pfnReserved6 = */ NULL, + /* .pfnReserved7 = */ NULL, +#elif defined(IN_RING0) + /* .pfnEarlyConstruct = */ NULL, + /* .pfnConstruct = */ ich9pciRZConstruct, + /* .pfnDestruct = */ NULL, + /* .pfnFinalDestruct = */ NULL, + /* .pfnRequest = */ NULL, + /* .pfnReserved0 = */ NULL, + /* .pfnReserved1 = */ NULL, + /* .pfnReserved2 = */ NULL, + /* .pfnReserved3 = */ NULL, + /* .pfnReserved4 = */ NULL, + /* .pfnReserved5 = */ NULL, + /* .pfnReserved6 = */ NULL, + /* .pfnReserved7 = */ NULL, +#elif defined(IN_RC) + /* .pfnConstruct = */ ich9pciRZConstruct, + /* .pfnReserved0 = */ NULL, + /* .pfnReserved1 = */ NULL, + /* .pfnReserved2 = */ NULL, + /* .pfnReserved3 = */ NULL, + /* .pfnReserved4 = */ NULL, + /* .pfnReserved5 = */ NULL, + /* .pfnReserved6 = */ NULL, + /* .pfnReserved7 = */ NULL, +#else +# error "Not in IN_RING3, IN_RING0 or IN_RC!" +#endif + /* .u32VersionEnd = */ PDM_DEVREG_VERSION +}; + +/** + * The device registration structure + * for the PCI-to-PCI bridge. + */ +const PDMDEVREG g_DevicePciIch9Bridge = +{ + /* .u32Version = */ PDM_DEVREG_VERSION, + /* .uReserved0 = */ 0, + /* .szName = */ "ich9pcibridge", + /* .fFlags = */ PDM_DEVREG_FLAGS_DEFAULT_BITS | PDM_DEVREG_FLAGS_RZ | PDM_DEVREG_FLAGS_NEW_STYLE, + /* .fClass = */ PDM_DEVREG_CLASS_BUS_PCI, + /* .cMaxInstances = */ ~0U, + /* .uSharedVersion = */ 42, + /* .cbInstanceShared = */ sizeof(DEVPCIBUS), + /* .cbInstanceCC = */ sizeof(CTX_SUFF(DEVPCIBUS)), + /* .cbInstanceRC = */ 0, + /* .cMaxPciDevices = */ 1, + /* .cMaxMsixVectors = */ 0, + /* .pszDescription = */ "ICH9 PCI to PCI bridge", +#if defined(IN_RING3) + /* .pszRCMod = */ "VBoxDDRC.rc", + /* .pszR0Mod = */ "VBoxDDR0.r0", + /* .pfnConstruct = */ ich9pcibridgeR3Construct, + /* .pfnDestruct = */ ich9pcibridgeR3Destruct, + /* .pfnRelocate = */ NULL, + /* .pfnMemSetup = */ NULL, + /* .pfnPowerOn = */ NULL, + /* .pfnReset = */ NULL, /* Must be NULL, to make sure only bus driver handles reset */ + /* .pfnSuspend = */ NULL, + /* .pfnResume = */ NULL, + /* .pfnAttach = */ NULL, + /* .pfnDetach = */ NULL, + /* .pfnQueryInterface = */ NULL, + /* .pfnInitComplete = */ NULL, + /* .pfnPowerOff = */ NULL, + /* .pfnSoftReset = */ NULL, + /* .pfnReserved0 = */ NULL, + /* .pfnReserved1 = */ NULL, + /* .pfnReserved2 = */ NULL, + /* .pfnReserved3 = */ NULL, + /* .pfnReserved4 = */ NULL, + /* .pfnReserved5 = */ NULL, + /* .pfnReserved6 = */ NULL, + /* .pfnReserved7 = */ NULL, +#elif defined(IN_RING0) + /* .pfnEarlyConstruct = */ NULL, + /* .pfnConstruct = */ ich9pcibridgeRZConstruct, + /* .pfnDestruct = */ NULL, + /* .pfnFinalDestruct = */ NULL, + /* .pfnRequest = */ NULL, + /* .pfnReserved0 = */ NULL, + /* .pfnReserved1 = */ NULL, + /* .pfnReserved2 = */ NULL, + /* .pfnReserved3 = */ NULL, + /* .pfnReserved4 = */ NULL, + /* .pfnReserved5 = */ NULL, + /* .pfnReserved6 = */ NULL, + /* .pfnReserved7 = */ NULL, +#elif defined(IN_RC) + /* .pfnConstruct = */ ich9pcibridgeRZConstruct, + /* .pfnReserved0 = */ NULL, + /* .pfnReserved1 = */ NULL, + /* .pfnReserved2 = */ NULL, + /* .pfnReserved3 = */ NULL, + /* .pfnReserved4 = */ NULL, + /* .pfnReserved5 = */ NULL, + /* .pfnReserved6 = */ NULL, + /* .pfnReserved7 = */ NULL, +#else +# error "Not in IN_RING3, IN_RING0 or IN_RC!" +#endif + /* .u32VersionEnd = */ PDM_DEVREG_VERSION +}; + diff --git a/src/VBox/Devices/Bus/DevPciInternal.h b/src/VBox/Devices/Bus/DevPciInternal.h new file mode 100644 index 00000000..a30fc782 --- /dev/null +++ b/src/VBox/Devices/Bus/DevPciInternal.h @@ -0,0 +1,279 @@ +/* $Id: DevPciInternal.h $ */ +/** @file + * DevPCI - Common Internal Header. + */ + +/* + * Copyright (C) 2010-2022 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifndef VBOX_INCLUDED_SRC_Bus_DevPciInternal_h +#define VBOX_INCLUDED_SRC_Bus_DevPciInternal_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#ifndef PDMPCIDEV_INCLUDE_PRIVATE +# define PDMPCIDEV_INCLUDE_PRIVATE /* Hack to get pdmpcidevint.h included at the right point. */ +#endif +#include <VBox/vmm/pdmdev.h> + + +/** + * PCI bus shared instance data (common to both PCI buses). + * + * The PCI device for the bus is always the first one (PDMDEVINSR3::apPciDevs[0]). + */ +typedef struct DEVPCIBUS +{ + /** Bus number. */ + uint32_t iBus; + /** Number of bridges attached to the bus. */ + uint32_t cBridges; + /** Start device number - always zero (only for DevPCI source compat). */ + uint32_t iDevSearch; + /** Set if PIIX3 type. */ + uint32_t fTypePiix3 : 1; + /** Set if ICH9 type. */ + uint32_t fTypeIch9 : 1; + /** Set if this is a pure bridge, i.e. not part of DEVPCIGLOBALS struct. */ + uint32_t fPureBridge : 1; + /** Reserved for future config flags. */ + uint32_t uReservedConfigFlags : 29; + + /** Array of bridges attached to the bus. */ + R3PTRTYPE(PPDMPCIDEV *) papBridgesR3; + /** Cache line align apDevices. */ + uint32_t au32Alignment1[HC_ARCH_BITS == 32 ? 3 + 8 : 2 + 8]; + /** Array of PCI devices. We assume 32 slots, each with 8 functions. */ + R3PTRTYPE(PPDMPCIDEV) apDevices[256]; +} DEVPCIBUS; +/** Pointer to PCI bus shared instance data. */ +typedef DEVPCIBUS *PDEVPCIBUS; + +/** + * PCI bus ring-3 instance data (common to both PCI buses). + */ +typedef struct DEVPCIBUSR3 +{ + /** R3 pointer to the device instance. */ + PPDMDEVINSR3 pDevInsR3; + /** Pointer to the PCI R3 helpers. */ + PCPDMPCIHLPR3 pPciHlpR3; +} DEVPCIBUSR3; +/** Pointer to PCI bus ring-3 instance data. */ +typedef DEVPCIBUSR3 *PDEVPCIBUSR3; + +/** + * PCI bus ring-0 instance data (common to both PCI buses). + */ +typedef struct DEVPCIBUSR0 +{ + /** R0 pointer to the device instance. */ + PPDMDEVINSR0 pDevInsR0; + /** Pointer to the PCI R0 helpers. */ + PCPDMPCIHLPR0 pPciHlpR0; +} DEVPCIBUSR0; +/** Pointer to PCI bus ring-0 instance data. */ +typedef DEVPCIBUSR0 *PDEVPCIBUSR0; + +/** + * PCI bus raw-mode instance data (common to both PCI buses). + */ +typedef struct DEVPCIBUSRC +{ + /** R0 pointer to the device instance. */ + PPDMDEVINSRC pDevInsRC; + /** Pointer to the PCI raw-mode helpers. */ + PCPDMPCIHLPRC pPciHlpRC; +} DEVPCIBUSRC; +/** Pointer to PCI bus raw-mode instance data. */ +typedef DEVPCIBUSRC *PDEVPCIBUSRC; + +/** DEVPCIBUSR3, DEVPCIBUSR0 or DEVPCIBUSRC depending on context. */ +typedef CTX_SUFF(DEVPCIBUS) DEVPCIBUSCC; +/** PDEVPCIBUSR3, PDEVPCIBUSR0 or PDEVPCIBUSRC depending on context. */ +typedef CTX_SUFF(PDEVPCIBUS) PDEVPCIBUSCC; + + +/** @def DEVPCI_APIC_IRQ_PINS + * Number of pins for interrupts if the APIC is used. + */ +#define DEVPCI_APIC_IRQ_PINS 8 +/** @def DEVPCI_LEGACY_IRQ_PINS + * Number of pins for interrupts (PIRQ#0...PIRQ#3). + * @remarks Labling this "legacy" might be a bit off... + */ +#define DEVPCI_LEGACY_IRQ_PINS 4 + + +/** + * PCI Globals - This is the host-to-pci bridge and the root bus, shared data. + * + * @note Only used by the root bus, not the bridges. + */ +typedef struct DEVPCIROOT +{ + /** PCI bus which is attached to the host-to-PCI bridge. + * @note This must come first so we can share more code with the bridges! */ + DEVPCIBUS PciBus; + + /** I/O APIC usage flag (always true of ICH9, see constructor). */ + bool fUseIoApic; + /** Reserved for future config flags. */ + bool afFutureFlags[3+4+8]; + /** Physical address of PCI config space MMIO region. */ + uint64_t u64PciConfigMMioAddress; + /** Length of PCI config space MMIO region. */ + uint64_t u64PciConfigMMioLength; + + /** I/O APIC irq levels */ + volatile uint32_t auPciApicIrqLevels[DEVPCI_APIC_IRQ_PINS]; + /** Value latched in Configuration Address Port (0CF8h) */ + uint32_t uConfigReg; + /** Alignment padding. */ + uint32_t u32Alignment1; + /** Members only used by the PIIX3 code variant. + * (The PCI device for the PCI-to-ISA bridge is PDMDEVINSR3::apPciDevs[1].) */ + struct + { + /** ACPI IRQ level */ + uint32_t iAcpiIrqLevel; + /** ACPI PIC IRQ */ + int32_t iAcpiIrq; + /** Irq levels for the four PCI Irqs. + * These count how many devices asserted the IRQ line. If greater 0 an IRQ + * is sent to the guest. If it drops to 0 the IRQ is deasserted. + * @remarks Labling this "legacy" might be a bit off... + */ + volatile uint32_t auPciLegacyIrqLevels[DEVPCI_LEGACY_IRQ_PINS]; + } Piix3; + + /** The address I/O port handle. */ + IOMIOPORTHANDLE hIoPortAddress; + /** The data I/O port handle. */ + IOMIOPORTHANDLE hIoPortData; + /** The magic I/O port handle. */ + IOMIOPORTHANDLE hIoPortMagic; + /** The MCFG MMIO region. */ + IOMMMIOHANDLE hMmioMcfg; + +#if 1 /* Will be moved into the BIOS "soon". */ + /** Current bus number - obsolete (still used by DevPCI, but merge will fix that). */ + uint8_t uPciBiosBus; + uint8_t abAlignment2[7]; + /** The next I/O port address which the PCI BIOS will use. */ + uint32_t uPciBiosIo; + /** The next MMIO address which the PCI BIOS will use. */ + uint32_t uPciBiosMmio; + /** The next 64-bit MMIO address which the PCI BIOS will use. */ + uint64_t uPciBiosMmio64; +#endif + +} DEVPCIROOT; +/** Pointer to PCI device globals. */ +typedef DEVPCIROOT *PDEVPCIROOT; +/** Converts a PCI bus device instance pointer to a DEVPCIBUS pointer. */ +#define DEVINS_2_DEVPCIBUS(pDevIns) (&PDMINS_2_DATA(pDevIns, PDEVPCIROOT)->PciBus) +/** Converts a pointer to a PCI bus instance to a DEVPCIROOT pointer. */ +#define DEVPCIBUS_2_DEVPCIROOT(pPciBus) RT_FROM_MEMBER(pPciBus, DEVPCIROOT, PciBus) + + +/** @def PCI_LOCK_RET + * Acquires the PDM lock. This is a NOP if locking is disabled. */ +#define PCI_LOCK_RET(pDevIns, rcBusy) \ + do { \ + int const rcLock = PDMINS_2_DATA_CC(pDevIns, PDEVPCIBUSCC)->CTX_SUFF(pPciHlp)->pfnLock((pDevIns), rcBusy); \ + if (rcLock == VINF_SUCCESS) \ + { /* likely */ } \ + else \ + return rcLock; \ + } while (0) +/** @def PCI_UNLOCK + * Releases the PDM lock. This is a NOP if locking is disabled. */ +#define PCI_UNLOCK(pDevIns) \ + PDMINS_2_DATA_CC(pDevIns, PDEVPCIBUSCC)->CTX_SUFF(pPciHlp)->pfnUnlock(pDevIns) + + +DECLHIDDEN(PPDMDEVINS) devpcibridgeCommonSetIrqRootWalk(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, int iIrq, + PDEVPCIBUS *ppBus, uint8_t *puDevFnBridge, int *piIrqPinBridge); + +#ifdef IN_RING3 + +DECLCALLBACK(void) devpciR3InfoPci(PPDMDEVINS pDevIns, PCDBGFINFOHLP pHlp, const char *pszArgs); +DECLCALLBACK(void) devpciR3InfoPciIrq(PPDMDEVINS pDevIns, PCDBGFINFOHLP pHlp, const char *pszArgs); +DECLCALLBACK(int) devpciR3CommonRegisterDevice(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, uint32_t fFlags, + uint8_t uPciDevNo, uint8_t uPciFunNo, const char *pszName); +DECLCALLBACK(int) devpcibridgeR3CommonRegisterDevice(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, uint32_t fFlags, + uint8_t uPciDevNo, uint8_t uPciFunNo, const char *pszName); +DECLCALLBACK(int) devpciR3CommonIORegionRegister(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, uint32_t iRegion, + RTGCPHYS cbRegion, PCIADDRESSSPACE enmType, uint32_t fFlags, + uint64_t hHandle, PFNPCIIOREGIONMAP pfnMapUnmap); +DECLCALLBACK(void) devpciR3CommonInterceptConfigAccesses(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, + PFNPCICONFIGREAD pfnRead, PFNPCICONFIGWRITE pfnWrite); +DECLCALLBACK(VBOXSTRICTRC) devpciR3CommonConfigRead(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, + uint32_t uAddress, unsigned cb, uint32_t *pu32Value); +DECLHIDDEN(VBOXSTRICTRC) devpciR3CommonConfigReadWorker(PPDMPCIDEV pPciDev, uint32_t uAddress, unsigned cb, uint32_t *pu32Value); +DECLCALLBACK(VBOXSTRICTRC) devpciR3CommonConfigWrite(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, + uint32_t uAddress, unsigned cb, uint32_t u32Value); +DECLHIDDEN(VBOXSTRICTRC) devpciR3CommonConfigWriteWorker(PPDMDEVINS pDevIns, PDEVPCIBUSCC pBusCC, + PPDMPCIDEV pPciDev, uint32_t uAddress, unsigned cb, uint32_t u32Value); +void devpciR3CommonRestoreConfig(PPDMDEVINS pDevIns, PPDMPCIDEV pDev, uint8_t const *pbSrcConfig); +int devpciR3CommonRestoreRegions(PCPDMDEVHLPR3 pHlp, PSSMHANDLE pSSM, PPDMPCIDEV pPciDev, PPCIIOREGION paIoRegions, bool fNewState); +void devpciR3ResetDevice(PPDMDEVINS pDevIns, PPDMPCIDEV pDev); +void devpciR3BiosInitSetRegionAddress(PPDMDEVINS pDevIns, PDEVPCIBUS pBus, PPDMPCIDEV pPciDev, int iRegion, uint64_t addr); +uint32_t devpciR3GetCfg(PPDMPCIDEV pPciDev, int32_t iRegister, int cb); +void devpciR3SetCfg(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, int32_t iRegister, uint32_t u32, int cb); + +DECLINLINE(uint8_t) devpciR3GetByte(PPDMPCIDEV pPciDev, int32_t iRegister) +{ + return (uint8_t)devpciR3GetCfg(pPciDev, iRegister, 1); +} + +DECLINLINE(uint16_t) devpciR3GetWord(PPDMPCIDEV pPciDev, int32_t iRegister) +{ + return (uint16_t)devpciR3GetCfg(pPciDev, iRegister, 2); +} + +DECLINLINE(uint32_t) devpciR3GetDWord(PPDMPCIDEV pPciDev, int32_t iRegister) +{ + return (uint32_t)devpciR3GetCfg(pPciDev, iRegister, 4); +} + +DECLINLINE(void) devpciR3SetByte(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, int32_t iRegister, uint8_t u8) +{ + devpciR3SetCfg(pDevIns, pPciDev, iRegister, u8, 1); +} + +DECLINLINE(void) devpciR3SetWord(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, int32_t iRegister, uint16_t u16) +{ + devpciR3SetCfg(pDevIns, pPciDev, iRegister, u16, 2); +} + +DECLINLINE(void) devpciR3SetDWord(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, int32_t iRegister, uint32_t u32) +{ + devpciR3SetCfg(pDevIns, pPciDev, iRegister, u32, 4); +} + +#endif /* IN_RING3 */ + +#endif /* !VBOX_INCLUDED_SRC_Bus_DevPciInternal_h */ + diff --git a/src/VBox/Devices/Bus/Makefile.kup b/src/VBox/Devices/Bus/Makefile.kup new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/VBox/Devices/Bus/Makefile.kup diff --git a/src/VBox/Devices/Bus/MsiCommon.cpp b/src/VBox/Devices/Bus/MsiCommon.cpp new file mode 100644 index 00000000..bef8655c --- /dev/null +++ b/src/VBox/Devices/Bus/MsiCommon.cpp @@ -0,0 +1,358 @@ +/* $Id: MsiCommon.cpp $ */ +/** @file + * MSI support routines + * + * @todo Straighten up this file!! + */ + +/* + * Copyright (C) 2010-2022 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#define LOG_GROUP LOG_GROUP_DEV_PCI +#define PDMPCIDEV_INCLUDE_PRIVATE /* Hack to get pdmpcidevint.h included at the right point. */ +#include <VBox/pci.h> +#include <VBox/msi.h> +#include <VBox/vmm/pdmdev.h> +#include <VBox/log.h> + +#include "MsiCommon.h" +#include "PciInline.h" +#include "DevPciInternal.h" + + +DECLINLINE(uint16_t) msiGetMessageControl(PPDMPCIDEV pDev) +{ + uint32_t idxMessageControl = pDev->Int.s.u8MsiCapOffset + VBOX_MSI_CAP_MESSAGE_CONTROL; +#ifdef IN_RING3 + if (pciDevIsPassthrough(pDev) && pDev->Int.s.pfnConfigRead) + { + uint32_t u32Value = 0; + VBOXSTRICTRC rcStrict = pDev->Int.s.pfnConfigRead(pDev->Int.s.CTX_SUFF(pDevIns), pDev, idxMessageControl, 2, &u32Value); + AssertRCSuccess(VBOXSTRICTRC_VAL(rcStrict)); + return (uint16_t)u32Value; + } +#endif + return PCIDevGetWord(pDev, idxMessageControl); +} + +DECLINLINE(bool) msiIs64Bit(PPDMPCIDEV pDev) +{ + return pciDevIsMsi64Capable(pDev); +} + +/** @todo r=klaus This design assumes that the config space cache is always + * up to date, which is a wrong assumption for the "emulate passthrough" case + * where only the callbacks give the correct data. */ +DECLINLINE(uint32_t *) msiGetMaskBits(PPDMPCIDEV pDev) +{ + uint8_t iOff = msiIs64Bit(pDev) ? VBOX_MSI_CAP_MASK_BITS_64 : VBOX_MSI_CAP_MASK_BITS_32; + /* devices may have no masked/pending support */ + if (iOff >= pDev->Int.s.u8MsiCapSize) + return NULL; + iOff += pDev->Int.s.u8MsiCapOffset; + return (uint32_t*)(pDev->abConfig + iOff); +} + +/** @todo r=klaus This design assumes that the config space cache is always + * up to date, which is a wrong assumption for the "emulate passthrough" case + * where only the callbacks give the correct data. */ +DECLINLINE(uint32_t*) msiGetPendingBits(PPDMPCIDEV pDev) +{ + uint8_t iOff = msiIs64Bit(pDev) ? VBOX_MSI_CAP_PENDING_BITS_64 : VBOX_MSI_CAP_PENDING_BITS_32; + /* devices may have no masked/pending support */ + if (iOff >= pDev->Int.s.u8MsiCapSize) + return NULL; + iOff += pDev->Int.s.u8MsiCapOffset; + return (uint32_t*)(pDev->abConfig + iOff); +} + +DECLINLINE(bool) msiIsEnabled(PPDMPCIDEV pDev) +{ + return (msiGetMessageControl(pDev) & VBOX_PCI_MSI_FLAGS_ENABLE) != 0; +} + +DECLINLINE(uint8_t) msiGetMme(PPDMPCIDEV pDev) +{ + return (msiGetMessageControl(pDev) & VBOX_PCI_MSI_FLAGS_QSIZE) >> 4; +} + +DECLINLINE(RTGCPHYS) msiGetMsiAddress(PPDMPCIDEV pDev) +{ + if (msiIs64Bit(pDev)) + { + uint32_t lo = PCIDevGetDWord(pDev, pDev->Int.s.u8MsiCapOffset + VBOX_MSI_CAP_MESSAGE_ADDRESS_LO); + uint32_t hi = PCIDevGetDWord(pDev, pDev->Int.s.u8MsiCapOffset + VBOX_MSI_CAP_MESSAGE_ADDRESS_HI); + return RT_MAKE_U64(lo, hi); + } + return PCIDevGetDWord(pDev, pDev->Int.s.u8MsiCapOffset + VBOX_MSI_CAP_MESSAGE_ADDRESS_32); +} + +DECLINLINE(uint32_t) msiGetMsiData(PPDMPCIDEV pDev, int32_t iVector) +{ + int16_t iOff = msiIs64Bit(pDev) ? VBOX_MSI_CAP_MESSAGE_DATA_64 : VBOX_MSI_CAP_MESSAGE_DATA_32; + uint16_t lo = PCIDevGetWord(pDev, pDev->Int.s.u8MsiCapOffset + iOff); + + // vector encoding into lower bits of message data + uint8_t bits = msiGetMme(pDev); + uint16_t uMask = ((1 << bits) - 1); + lo &= ~uMask; + lo |= iVector & uMask; + + return RT_MAKE_U32(lo, 0); +} + +#ifdef IN_RING3 + +DECLINLINE(bool) msiR3BitJustCleared(uint32_t uOldValue, uint32_t uNewValue, uint32_t uMask) +{ + return !!(uOldValue & uMask) && !(uNewValue & uMask); +} + +DECLINLINE(bool) msiR3BitJustSet(uint32_t uOldValue, uint32_t uNewValue, uint32_t uMask) +{ + return !(uOldValue & uMask) && !!(uNewValue & uMask); +} + +/** + * PCI config space accessors for MSI registers. + */ +void MsiR3PciConfigWrite(PPDMDEVINS pDevIns, PCPDMPCIHLP pPciHlp, PPDMPCIDEV pDev, + uint32_t u32Address, uint32_t val, unsigned len) +{ + int32_t iOff = u32Address - pDev->Int.s.u8MsiCapOffset; + Assert(iOff >= 0 && (pciDevIsMsiCapable(pDev) && iOff < pDev->Int.s.u8MsiCapSize)); + + Log2(("MsiR3PciConfigWrite: %d <- %x (%d)\n", iOff, val, len)); + + uint32_t uAddr = u32Address; + bool f64Bit = msiIs64Bit(pDev); + + for (uint32_t i = 0; i < len; i++) + { + uint32_t reg = i + iOff; + uint8_t u8Val = (uint8_t)val; + switch (reg) + { + case 0: /* Capability ID, ro */ + case 1: /* Next pointer, ro */ + break; + case VBOX_MSI_CAP_MESSAGE_CONTROL: + /* don't change read-only bits: 1-3,7 */ + u8Val &= UINT8_C(~0x8e); + pDev->abConfig[uAddr] = u8Val | (pDev->abConfig[uAddr] & UINT8_C(0x8e)); + break; + case VBOX_MSI_CAP_MESSAGE_CONTROL + 1: + /* don't change read-only bit 8, and reserved 9-15 */ + break; + default: + if (pDev->abConfig[uAddr] != u8Val) + { + int32_t maskUpdated = -1; + + /* If we're enabling masked vector, and have pending messages + for this vector, we have to send this message now */ + if ( !f64Bit + && (reg >= VBOX_MSI_CAP_MASK_BITS_32) + && (reg < VBOX_MSI_CAP_MASK_BITS_32 + 4) + ) + { + maskUpdated = reg - VBOX_MSI_CAP_MASK_BITS_32; + } + if ( f64Bit + && (reg >= VBOX_MSI_CAP_MASK_BITS_64) + && (reg < VBOX_MSI_CAP_MASK_BITS_64 + 4) + ) + { + maskUpdated = reg - VBOX_MSI_CAP_MASK_BITS_64; + } + + if (maskUpdated != -1 && msiIsEnabled(pDev)) + { + uint32_t* puPending = msiGetPendingBits(pDev); + for (int iBitNum = 0; iBitNum < 8; iBitNum++) + { + int32_t iBit = 1 << iBitNum; + uint32_t uVector = maskUpdated*8 + iBitNum; + + if (msiR3BitJustCleared(pDev->abConfig[uAddr], u8Val, iBit)) + { + Log(("msi: mask updated bit %d@%x (%d)\n", iBitNum, uAddr, maskUpdated)); + + /* To ensure that we're no longer masked */ + pDev->abConfig[uAddr] &= ~iBit; + if ((*puPending & (1 << uVector)) != 0) + { + Log(("msi: notify earlier masked pending vector: %d\n", uVector)); + MsiNotify(pDevIns, pPciHlp, pDev, uVector, PDM_IRQ_LEVEL_HIGH, 0 /*uTagSrc*/); + } + } + if (msiR3BitJustSet(pDev->abConfig[uAddr], u8Val, iBit)) + { + Log(("msi: mask vector: %d\n", uVector)); + } + } + } + + pDev->abConfig[uAddr] = u8Val; + } + } + uAddr++; + val >>= 8; + } +} + +/** + * Initializes MSI support for the given PCI device. + */ +int MsiR3Init(PPDMPCIDEV pDev, PPDMMSIREG pMsiReg) +{ + if (pMsiReg->cMsiVectors == 0) + return VINF_SUCCESS; + + /* XXX: done in pcirawAnalyzePciCaps() */ + if (pciDevIsPassthrough(pDev)) + return VINF_SUCCESS; + + uint16_t cVectors = pMsiReg->cMsiVectors; + uint8_t iCapOffset = pMsiReg->iMsiCapOffset; + uint8_t iNextOffset = pMsiReg->iMsiNextOffset; + bool f64bit = pMsiReg->fMsi64bit; + bool fNoMasking = pMsiReg->fMsiNoMasking; + uint16_t iFlags = 0; + + Assert(iCapOffset != 0 && iCapOffset < 0xff && iNextOffset < 0xff); + + if (!fNoMasking) + { + int iMmc; + + /* Compute multiple-message capable bitfield */ + for (iMmc = 0; iMmc < 6; iMmc++) + { + if ((1 << iMmc) >= cVectors) + break; + } + + if ((cVectors > VBOX_MSI_MAX_ENTRIES) || (1 << iMmc) < cVectors) + return VERR_TOO_MUCH_DATA; + + /* We support per-vector masking */ + iFlags |= VBOX_PCI_MSI_FLAGS_MASKBIT; + /* How many vectors we're capable of */ + iFlags |= iMmc; + } + + if (f64bit) + iFlags |= VBOX_PCI_MSI_FLAGS_64BIT; + + pDev->Int.s.u8MsiCapOffset = iCapOffset; + pDev->Int.s.u8MsiCapSize = f64bit ? VBOX_MSI_CAP_SIZE_64 : VBOX_MSI_CAP_SIZE_32; + + PCIDevSetByte(pDev, iCapOffset + 0, VBOX_PCI_CAP_ID_MSI); + PCIDevSetByte(pDev, iCapOffset + 1, iNextOffset); /* next */ + PCIDevSetWord(pDev, iCapOffset + VBOX_MSI_CAP_MESSAGE_CONTROL, iFlags); + + if (!fNoMasking) + { + *msiGetMaskBits(pDev) = 0; + *msiGetPendingBits(pDev) = 0; + } + + pciDevSetMsiCapable(pDev); + if (f64bit) + pciDevSetMsi64Capable(pDev); + + return VINF_SUCCESS; +} + +#endif /* IN_RING3 */ + + +/** + * Checks if MSI is enabled for the given PCI device. + * + * (Must use MSINotify() for notifications when true.) + */ +bool MsiIsEnabled(PPDMPCIDEV pDev) +{ + return pciDevIsMsiCapable(pDev) && msiIsEnabled(pDev); +} + +/** + * Device notification (aka interrupt). + */ +void MsiNotify(PPDMDEVINS pDevIns, PCPDMPCIHLP pPciHlp, PPDMPCIDEV pDev, int iVector, int iLevel, uint32_t uTagSrc) +{ + AssertMsg(msiIsEnabled(pDev), ("Must be enabled to use that")); + + uint32_t uMask; + uint32_t *puPending = msiGetPendingBits(pDev); + if (puPending) + { + uint32_t *puMask = msiGetMaskBits(pDev); + AssertPtr(puMask); + uMask = *puMask; + LogFlow(("MsiNotify: %d pending=%x mask=%x\n", iVector, *puPending, uMask)); + } + else + { + uMask = 0; + LogFlow(("MsiNotify: %d\n", iVector)); + } + + /* We only trigger MSI on level up */ + if ((iLevel & PDM_IRQ_LEVEL_HIGH) == 0) + { + /** @todo maybe clear pending interrupts on level down? */ +#if 0 + if (puPending) + { + *puPending &= ~(1<<iVector); + LogFlow(("msi: clear pending %d, now %x\n", iVector, *puPending)); + } +#endif + return; + } + + if ((uMask & (1<<iVector)) != 0) + { + *puPending |= (1<<iVector); + LogFlow(("msi: %d is masked, mark pending, now %x\n", iVector, *puPending)); + return; + } + + MSIMSG Msi; + Msi.Addr.u64 = msiGetMsiAddress(pDev); + Msi.Data.u32 = msiGetMsiData(pDev, iVector); + + if (puPending) + *puPending &= ~(1<<iVector); + + PPDMDEVINS pDevInsBus = pPciHlp->pfnGetBusByNo(pDevIns, pDev->Int.s.idxPdmBus); + Assert(pDevInsBus); + PDEVPCIBUS pBus = PDMINS_2_DATA(pDevInsBus, PDEVPCIBUS); + uint16_t const uBusDevFn = PCIBDF_MAKE(pBus->iBus, pDev->uDevFn); + + Assert(pPciHlp->pfnIoApicSendMsi != NULL); + pPciHlp->pfnIoApicSendMsi(pDevIns, uBusDevFn, &Msi, uTagSrc); +} + diff --git a/src/VBox/Devices/Bus/MsiCommon.h b/src/VBox/Devices/Bus/MsiCommon.h new file mode 100644 index 00000000..51771754 --- /dev/null +++ b/src/VBox/Devices/Bus/MsiCommon.h @@ -0,0 +1,51 @@ +/* $Id: MsiCommon.h $ */ +/** @file + * Header for MSI/MSI-X support routines. + */ + +/* + * Copyright (C) 2010-2022 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifndef VBOX_INCLUDED_SRC_Bus_MsiCommon_h +#define VBOX_INCLUDED_SRC_Bus_MsiCommon_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +typedef CTX_SUFF(PCPDMPCIHLP) PCPDMPCIHLP; + +#ifdef IN_RING3 +int MsiR3Init(PPDMPCIDEV pDev, PPDMMSIREG pMsiReg); +void MsiR3PciConfigWrite(PPDMDEVINS pDevIns, PCPDMPCIHLP pPciHlp, PPDMPCIDEV pDev, uint32_t u32Address, uint32_t val, unsigned len); +#endif +bool MsiIsEnabled(PPDMPCIDEV pDev); +void MsiNotify(PPDMDEVINS pDevIns, PCPDMPCIHLP pPciHlp, PPDMPCIDEV pDev, int iVector, int iLevel, uint32_t uTagSrc); + +#ifdef IN_RING3 +int MsixR3Init(PCPDMPCIHLP pPciHlp, PPDMPCIDEV pDev, PPDMMSIREG pMsiReg); +void MsixR3PciConfigWrite(PPDMDEVINS pDevIns, PCPDMPCIHLP pPciHlp, PPDMPCIDEV pDev, uint32_t u32Address, uint32_t val, unsigned len); +#endif +bool MsixIsEnabled(PPDMPCIDEV pDev); +void MsixNotify(PPDMDEVINS pDevIns, PCPDMPCIHLP pPciHlp, PPDMPCIDEV pDev, int iVector, int iLevel, uint32_t uTagSrc); + +#endif /* !VBOX_INCLUDED_SRC_Bus_MsiCommon_h */ + diff --git a/src/VBox/Devices/Bus/MsixCommon.cpp b/src/VBox/Devices/Bus/MsixCommon.cpp new file mode 100644 index 00000000..5597e013 --- /dev/null +++ b/src/VBox/Devices/Bus/MsixCommon.cpp @@ -0,0 +1,355 @@ +/* $Id: MsixCommon.cpp $ */ +/** @file + * MSI-X support routines + */ + +/* + * Copyright (C) 2010-2022 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + + +#define LOG_GROUP LOG_GROUP_DEV_PCI +#define PDMPCIDEV_INCLUDE_PRIVATE /* Hack to get pdmpcidevint.h included at the right point. */ +#include <VBox/pci.h> +#include <VBox/msi.h> +#include <VBox/vmm/pdmdev.h> +#include <VBox/log.h> +#include <VBox/vmm/mm.h> +#include <VBox/AssertGuest.h> + +#include <iprt/assert.h> + +#include "MsiCommon.h" +#include "DevPciInternal.h" +#include "PciInline.h" + +typedef struct +{ + uint32_t u32MsgAddressLo; + uint32_t u32MsgAddressHi; + uint32_t u32MsgData; + uint32_t u32VectorControl; +} MsixTableRecord; +AssertCompileSize(MsixTableRecord, VBOX_MSIX_ENTRY_SIZE); + + +/** @todo use accessors so that raw PCI devices work correctly with MSI-X. */ +DECLINLINE(uint16_t) msixGetMessageControl(PPDMPCIDEV pDev) +{ + return PCIDevGetWord(pDev, pDev->Int.s.u8MsixCapOffset + VBOX_MSIX_CAP_MESSAGE_CONTROL); +} + +DECLINLINE(bool) msixIsEnabled(PPDMPCIDEV pDev) +{ + return (msixGetMessageControl(pDev) & VBOX_PCI_MSIX_FLAGS_ENABLE) != 0; +} + +DECLINLINE(bool) msixIsMasked(PPDMPCIDEV pDev) +{ + return (msixGetMessageControl(pDev) & VBOX_PCI_MSIX_FLAGS_FUNCMASK) != 0; +} + +#ifdef IN_RING3 +DECLINLINE(uint16_t) msixTableSize(PPDMPCIDEV pDev) +{ + return (msixGetMessageControl(pDev) & 0x3ff) + 1; +} +#endif + +DECLINLINE(uint8_t *) msixGetPageOffset(PPDMPCIDEV pDev, uint32_t off) +{ + return &pDev->abMsixState[off]; +} + +DECLINLINE(MsixTableRecord *) msixGetVectorRecord(PPDMPCIDEV pDev, uint32_t iVector) +{ + return (MsixTableRecord *)msixGetPageOffset(pDev, iVector * VBOX_MSIX_ENTRY_SIZE); +} + +DECLINLINE(RTGCPHYS) msixGetMsiAddress(PPDMPCIDEV pDev, uint32_t iVector) +{ + MsixTableRecord *pRec = msixGetVectorRecord(pDev, iVector); + return RT_MAKE_U64(pRec->u32MsgAddressLo & ~UINT32_C(0x3), pRec->u32MsgAddressHi); +} + +DECLINLINE(uint32_t) msixGetMsiData(PPDMPCIDEV pDev, uint32_t iVector) +{ + return msixGetVectorRecord(pDev, iVector)->u32MsgData; +} + +DECLINLINE(uint32_t) msixIsVectorMasked(PPDMPCIDEV pDev, uint32_t iVector) +{ + return (msixGetVectorRecord(pDev, iVector)->u32VectorControl & 0x1) != 0; +} + +DECLINLINE(uint8_t *) msixPendingByte(PPDMPCIDEV pDev, uint32_t iVector) +{ + return msixGetPageOffset(pDev, pDev->Int.s.offMsixPba + iVector / 8); +} + +DECLINLINE(void) msixSetPending(PPDMPCIDEV pDev, uint32_t iVector) +{ + *msixPendingByte(pDev, iVector) |= (1 << (iVector & 0x7)); +} + +DECLINLINE(void) msixClearPending(PPDMPCIDEV pDev, uint32_t iVector) +{ + *msixPendingByte(pDev, iVector) &= ~(1 << (iVector & 0x7)); +} + +#ifdef IN_RING3 + +DECLINLINE(bool) msixR3IsPending(PPDMPCIDEV pDev, uint32_t iVector) +{ + return (*msixPendingByte(pDev, iVector) & (1 << (iVector & 0x7))) != 0; +} + +static void msixR3CheckPendingVector(PPDMDEVINS pDevIns, PCPDMPCIHLP pPciHlp, PPDMPCIDEV pDev, uint32_t iVector) +{ + if (msixR3IsPending(pDev, iVector) && !msixIsVectorMasked(pDev, iVector)) + MsixNotify(pDevIns, pPciHlp, pDev, iVector, 1 /* iLevel */, 0 /*uTagSrc*/); +} + +/** + * @callback_method_impl{FNIOMMMIONEWREAD} + */ +static DECLCALLBACK(VBOXSTRICTRC) msixR3MMIORead(PPDMDEVINS pDevIns, void *pvUser, RTGCPHYS off, void *pv, unsigned cb) +{ + PPDMPCIDEV pPciDev = (PPDMPCIDEV)pvUser; + RT_NOREF(pDevIns); + + /* Validate IOM behaviour. */ + Assert(cb == 4); + Assert((off & 3) == 0); + + /* Do the read if it's within the MSI state. */ + ASSERT_GUEST_MSG_RETURN(off + cb <= pPciDev->Int.s.cbMsixRegion, ("Out of bounds access for the MSI-X region\n"), + VINF_IOM_MMIO_UNUSED_FF); + *(uint32_t *)pv = *(uint32_t *)&pPciDev->abMsixState[off]; + + LogFlowFunc(("off=%RGp cb=%d -> %#010RX32\n", off, cb, *(uint32_t *)pv)); + return VINF_SUCCESS; +} + +/** + * @callback_method_impl{FNIOMMMIONEWWRITE} + */ +static DECLCALLBACK(VBOXSTRICTRC) msixR3MMIOWrite(PPDMDEVINS pDevIns, void *pvUser, RTGCPHYS off, void const *pv, unsigned cb) +{ + PPDMPCIDEV pPciDev = (PPDMPCIDEV)pvUser; + LogFlowFunc(("off=%RGp cb=%d %#010RX32\n", off, cb, *(uint32_t *)pv)); + + /* Validate IOM behaviour. */ + Assert(cb == 4); + Assert((off & 3) == 0); + + /* Do the write if it's within the MSI state. */ + ASSERT_GUEST_MSG_RETURN(off + cb <= pPciDev->Int.s.offMsixPba, ("Trying to write to PBA\n"), + VINF_SUCCESS); + *(uint32_t *)&pPciDev->abMsixState[off] = *(uint32_t *)pv; + + /* (See MsixR3Init the setting up of pvPciBusPtrR3.) */ + msixR3CheckPendingVector(pDevIns, (PCPDMPCIHLP)pPciDev->Int.s.pvPciBusPtrR3, pPciDev, off / VBOX_MSIX_ENTRY_SIZE); + return VINF_SUCCESS; +} + +/** + * Initalizes MSI-X support for the given PCI device. + */ +int MsixR3Init(PCPDMPCIHLP pPciHlp, PPDMPCIDEV pDev, PPDMMSIREG pMsiReg) +{ + if (pMsiReg->cMsixVectors == 0) + return VINF_SUCCESS; + + /* We cannot init MSI-X on raw devices yet. */ + Assert(!pciDevIsPassthrough(pDev)); + + uint16_t cVectors = pMsiReg->cMsixVectors; + uint8_t iCapOffset = pMsiReg->iMsixCapOffset; + uint8_t iNextOffset = pMsiReg->iMsixNextOffset; + uint8_t iBar = pMsiReg->iMsixBar; + + AssertMsgReturn(cVectors <= VBOX_MSIX_MAX_ENTRIES, ("Too many MSI-X vectors: %d\n", cVectors), VERR_TOO_MUCH_DATA); + AssertMsgReturn(iBar <= 5, ("Using wrong BAR for MSI-X: %d\n", iBar), VERR_INVALID_PARAMETER); + Assert(iCapOffset != 0 && iCapOffset < 0xff && iNextOffset < 0xff); + + uint16_t cbPba = cVectors / 8; + if (cVectors % 8) + cbPba++; + uint16_t cbMsixRegion = RT_ALIGN_T(cVectors * sizeof(MsixTableRecord) + cbPba, _4K, uint16_t); + AssertLogRelMsgReturn(cbMsixRegion <= pDev->cbMsixState, + ("%#x vs %#x (%s)\n", cbMsixRegion, pDev->cbMsixState, pDev->pszNameR3), + VERR_MISMATCH); + + /* If device is passthrough, BAR is registered using common mechanism. */ + if (!pciDevIsPassthrough(pDev)) + { + /** @todo r=bird: This used to be IOMMMIO_FLAGS_READ_PASSTHRU | + * IOMMMIO_FLAGS_WRITE_PASSTHRU with the callbacks asserting and + * returning VERR_INTERNAL_ERROR on non-dword reads. That is of + * course certifiable insane behaviour. So, instead I've changed it + * so the callbacks only see dword reads and writes. I'm not at all + * sure about the read-missing behaviour, but it seems like a good + * idea for now. */ + /** @todo r=bird: Shouldn't we at least handle writes in ring-0? */ + int rc = PDMDevHlpPCIIORegionCreateMmio(pDev->Int.s.CTX_SUFF(pDevIns), iBar, cbMsixRegion, PCI_ADDRESS_SPACE_MEM, + msixR3MMIOWrite, msixR3MMIORead, pDev, + IOMMMIO_FLAGS_READ_DWORD | IOMMMIO_FLAGS_WRITE_DWORD_READ_MISSING, + "MSI-X tables", &pDev->Int.s.hMmioMsix); + AssertRCReturn(rc, rc); + } + + uint16_t offTable = 0; + uint16_t offPBA = cVectors * sizeof(MsixTableRecord); + + pDev->Int.s.u8MsixCapOffset = iCapOffset; + pDev->Int.s.u8MsixCapSize = VBOX_MSIX_CAP_SIZE; + pDev->Int.s.cbMsixRegion = cbMsixRegion; + pDev->Int.s.offMsixPba = offPBA; + + /* R3 PCI helper */ + pDev->Int.s.pvPciBusPtrR3 = pPciHlp; + + PCIDevSetByte(pDev, iCapOffset + 0, VBOX_PCI_CAP_ID_MSIX); + PCIDevSetByte(pDev, iCapOffset + 1, iNextOffset); /* next */ + PCIDevSetWord(pDev, iCapOffset + VBOX_MSIX_CAP_MESSAGE_CONTROL, cVectors - 1); + + PCIDevSetDWord(pDev, iCapOffset + VBOX_MSIX_TABLE_BIROFFSET, offTable | iBar); + PCIDevSetDWord(pDev, iCapOffset + VBOX_MSIX_PBA_BIROFFSET, offPBA | iBar); + + pciDevSetMsixCapable(pDev); + + return VINF_SUCCESS; +} + +#endif /* IN_RING3 */ + +/** + * Checks if MSI-X is enabled for the tiven PCI device. + * + * (Must use MSIXNotify() for notifications when true.) + */ +bool MsixIsEnabled(PPDMPCIDEV pDev) +{ + return pciDevIsMsixCapable(pDev) && msixIsEnabled(pDev); +} + +/** + * Device notification (aka interrupt). + */ +void MsixNotify(PPDMDEVINS pDevIns, PCPDMPCIHLP pPciHlp, PPDMPCIDEV pDev, int iVector, int iLevel, uint32_t uTagSrc) +{ + AssertMsg(msixIsEnabled(pDev), ("Must be enabled to use that")); + + Assert(pPciHlp->pfnIoApicSendMsi != NULL); + + /* We only trigger MSI-X on level up */ + if ((iLevel & PDM_IRQ_LEVEL_HIGH) == 0) + { + return; + } + + // if this vector is somehow disabled + if (msixIsMasked(pDev) || msixIsVectorMasked(pDev, iVector)) + { + // mark pending bit + msixSetPending(pDev, iVector); + return; + } + + // clear pending bit + msixClearPending(pDev, iVector); + + MSIMSG Msi; + Msi.Addr.u64 = msixGetMsiAddress(pDev, iVector); + Msi.Data.u32 = msixGetMsiData(pDev, iVector); + + PPDMDEVINS pDevInsBus = pPciHlp->pfnGetBusByNo(pDevIns, pDev->Int.s.idxPdmBus); + Assert(pDevInsBus); + PDEVPCIBUS pBus = PDMINS_2_DATA(pDevInsBus, PDEVPCIBUS); + uint16_t const uBusDevFn = PCIBDF_MAKE(pBus->iBus, pDev->uDevFn); + + pPciHlp->pfnIoApicSendMsi(pDevIns, uBusDevFn, &Msi, uTagSrc); +} + +#ifdef IN_RING3 + +DECLINLINE(bool) msixR3BitJustCleared(uint32_t uOldValue, uint32_t uNewValue, uint32_t uMask) +{ + return !!(uOldValue & uMask) && !(uNewValue & uMask); +} + + +static void msixR3CheckPendingVectors(PPDMDEVINS pDevIns, PCPDMPCIHLP pPciHlp, PPDMPCIDEV pDev) +{ + for (uint32_t i = 0; i < msixTableSize(pDev); i++) + msixR3CheckPendingVector(pDevIns, pPciHlp, pDev, i); +} + +/** + * PCI config space accessors for MSI-X. + */ +void MsixR3PciConfigWrite(PPDMDEVINS pDevIns, PCPDMPCIHLP pPciHlp, PPDMPCIDEV pDev, uint32_t u32Address, uint32_t val, unsigned len) +{ + int32_t iOff = u32Address - pDev->Int.s.u8MsixCapOffset; + Assert(iOff >= 0 && (pciDevIsMsixCapable(pDev) && iOff < pDev->Int.s.u8MsixCapSize)); + + Log2(("MsixR3PciConfigWrite: %d <- %x (%d)\n", iOff, val, len)); + + uint32_t uAddr = u32Address; + uint8_t u8NewVal; + bool fJustEnabled = false; + + for (uint32_t i = 0; i < len; i++) + { + uint32_t reg = i + iOff; + uint8_t u8Val = (uint8_t)val; + switch (reg) + { + case 0: /* Capability ID, ro */ + case 1: /* Next pointer, ro */ + break; + case VBOX_MSIX_CAP_MESSAGE_CONTROL: + /* don't change read-only bits: 0-7 */ + break; + case VBOX_MSIX_CAP_MESSAGE_CONTROL + 1: + { + /* don't change read-only bits 8-13 */ + u8NewVal = (u8Val & UINT8_C(~0x3f)) | (pDev->abConfig[uAddr] & UINT8_C(0x3f)); + /* If just enabled globally - check pending vectors */ + fJustEnabled |= msixR3BitJustCleared(pDev->abConfig[uAddr], u8NewVal, VBOX_PCI_MSIX_FLAGS_ENABLE >> 8); + fJustEnabled |= msixR3BitJustCleared(pDev->abConfig[uAddr], u8NewVal, VBOX_PCI_MSIX_FLAGS_FUNCMASK >> 8); + pDev->abConfig[uAddr] = u8NewVal; + break; + } + default: + /* other fields read-only too */ + break; + } + uAddr++; + val >>= 8; + } + + if (fJustEnabled) + msixR3CheckPendingVectors(pDevIns, pPciHlp, pDev); +} + +#endif /* IN_RING3 */ diff --git a/src/VBox/Devices/Bus/PciInline.h b/src/VBox/Devices/Bus/PciInline.h new file mode 100644 index 00000000..968c0c8d --- /dev/null +++ b/src/VBox/Devices/Bus/PciInline.h @@ -0,0 +1,115 @@ +/* $Id: PciInline.h $ */ +/** @file + * PCI - The PCI Controller And Devices, inline device helpers. + */ + +/* + * Copyright (C) 2006-2022 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#ifndef VBOX_INCLUDED_SRC_Bus_PciInline_h +#define VBOX_INCLUDED_SRC_Bus_PciInline_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +DECLINLINE(void) pciDevSetPci2PciBridge(PPDMPCIDEV pDev) +{ + pDev->Int.s.fFlags |= PCIDEV_FLAG_PCI_TO_PCI_BRIDGE; +} + +DECLINLINE(bool) pciDevIsPci2PciBridge(PPDMPCIDEV pDev) +{ + return (pDev->Int.s.fFlags & PCIDEV_FLAG_PCI_TO_PCI_BRIDGE) != 0; +} + +DECLINLINE(void) pciDevSetPciExpress(PPDMPCIDEV pDev) +{ + pDev->Int.s.fFlags |= PCIDEV_FLAG_PCI_EXPRESS_DEVICE; +} + +DECLINLINE(bool) pciDevIsPciExpress(PPDMPCIDEV pDev) +{ + return (pDev->Int.s.fFlags & PCIDEV_FLAG_PCI_EXPRESS_DEVICE) != 0; +} + +DECLINLINE(void) pciDevSetMsiCapable(PPDMPCIDEV pDev) +{ + pDev->Int.s.fFlags |= PCIDEV_FLAG_MSI_CAPABLE; +} + +DECLINLINE(void) pciDevClearMsiCapable(PPDMPCIDEV pDev) +{ + pDev->Int.s.fFlags &= ~PCIDEV_FLAG_MSI_CAPABLE; +} + +DECLINLINE(bool) pciDevIsMsiCapable(PPDMPCIDEV pDev) +{ + return (pDev->Int.s.fFlags & PCIDEV_FLAG_MSI_CAPABLE) != 0; +} + +DECLINLINE(void) pciDevSetMsi64Capable(PPDMPCIDEV pDev) +{ + pDev->Int.s.fFlags |= PCIDEV_FLAG_MSI64_CAPABLE; +} + +DECLINLINE(void) pciDevClearMsi64Capable(PPDMPCIDEV pDev) +{ + pDev->Int.s.fFlags &= ~PCIDEV_FLAG_MSI64_CAPABLE; +} + +DECLINLINE(bool) pciDevIsMsi64Capable(PPDMPCIDEV pDev) +{ + return (pDev->Int.s.fFlags & PCIDEV_FLAG_MSI64_CAPABLE) != 0; +} + +DECLINLINE(void) pciDevSetMsixCapable(PPDMPCIDEV pDev) +{ + pDev->Int.s.fFlags |= PCIDEV_FLAG_MSIX_CAPABLE; +} + +DECLINLINE(void) pciDevClearMsixCapable(PPDMPCIDEV pDev) +{ + pDev->Int.s.fFlags &= ~PCIDEV_FLAG_MSIX_CAPABLE; +} + +DECLINLINE(bool) pciDevIsMsixCapable(PPDMPCIDEV pDev) +{ + return (pDev->Int.s.fFlags & PCIDEV_FLAG_MSIX_CAPABLE) != 0; +} + +DECLINLINE(void) pciDevSetPassthrough(PPDMPCIDEV pDev) +{ + pDev->Int.s.fFlags |= PCIDEV_FLAG_PASSTHROUGH; +} + +DECLINLINE(void) pciDevClearPassthrough(PPDMPCIDEV pDev) +{ + pDev->Int.s.fFlags &= ~PCIDEV_FLAG_PASSTHROUGH; +} + +DECLINLINE(bool) pciDevIsPassthrough(PPDMPCIDEV pDev) +{ + return (pDev->Int.s.fFlags & PCIDEV_FLAG_PASSTHROUGH) != 0; +} + +#endif /* !VBOX_INCLUDED_SRC_Bus_PciInline_h */ + diff --git a/src/VBox/Devices/Bus/SrvPciRawR0.cpp b/src/VBox/Devices/Bus/SrvPciRawR0.cpp new file mode 100644 index 00000000..5cf381bd --- /dev/null +++ b/src/VBox/Devices/Bus/SrvPciRawR0.cpp @@ -0,0 +1,1041 @@ +/* $Id: SrvPciRawR0.cpp $ */ +/** @file + * PCI passthrough - The ring 0 service. + */ + +/* + * Copyright (C) 2011-2022 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <https://www.gnu.org/licenses>. + * + * SPDX-License-Identifier: GPL-3.0-only + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_DEV_PCI_RAW +#include <VBox/log.h> +#include <VBox/sup.h> +#include <VBox/rawpci.h> +#include <VBox/vmm/pdmpci.h> +#include <VBox/vmm/pdm.h> +#include <VBox/vmm/gvm.h> +#include <VBox/vmm/gvmm.h> +#include <VBox/vmm/vmcc.h> + +#include <iprt/asm.h> +#include <iprt/assert.h> +#include <iprt/handletable.h> +#include <iprt/mp.h> +#include <iprt/mem.h> +#include <iprt/semaphore.h> +#include <iprt/spinlock.h> +#include <iprt/string.h> +#include <iprt/thread.h> +#include <iprt/time.h> +#include <iprt/asm-amd64-x86.h> + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ +typedef struct PCIRAWSRVSTATE +{ + /** Structure lock. */ + RTSPINLOCK hSpinlock; + + /** Handle table for devices. */ + RTHANDLETABLE hHtDevs; + +} PCIRAWSRVSTATE; +typedef PCIRAWSRVSTATE *PPCIRAWSRVSTATE; + +typedef struct PCIRAWDEV +{ + /* Port pointer. */ + PRAWPCIDEVPORT pPort; + + /* Handle used by everybody else. */ + PCIRAWDEVHANDLE hHandle; + + /** The session this device is associated with. */ + PSUPDRVSESSION pSession; + + /** Structure lock. */ + RTSPINLOCK hSpinlock; + + /** Event for IRQ updates. */ + RTSEMEVENT hIrqEvent; + + /** Current pending IRQ for the device. */ + int32_t iPendingIrq; + + /** ISR handle. */ + PCIRAWISRHANDLE hIsr; + + /* If object is being destroyed. */ + bool fTerminate; + + /** The SUPR0 object. */ + void *pvObj; +} PCIRAWDEV; +typedef PCIRAWDEV *PPCIRAWDEV; + +static PCIRAWSRVSTATE g_State; + + +/** Interrupt handler. Could be called in the interrupt context, + * depending on host OS implmenetation. */ +static DECLCALLBACK(bool) pcirawr0Isr(void* pContext, int32_t iHostIrq) +{ + PPCIRAWDEV pThis = (PPCIRAWDEV)pContext; + +#ifdef VBOX_WITH_SHARED_PCI_INTERRUPTS + uint16_t uStatus; + PCIRAWMEMLOC Loc; + int rc; + + Loc.cb = 2; + rc = pThis->pPort->pfnPciCfgRead(pThis->pPort, VBOX_PCI_STATUS, &Loc); + /* Cannot read, assume non-shared. */ + if (RT_FAILURE(rc)) + return false; + + /* Check interrupt status bit. */ + if ((Loc.u.u16 & (1 << 3)) == 0) + return false; +#endif + + RTSpinlockAcquire(pThis->hSpinlock); + pThis->iPendingIrq = iHostIrq; + RTSpinlockRelease(pThis->hSpinlock); + + /** + * @todo RTSemEventSignal() docs claims that it's platform-dependent + * if RTSemEventSignal() could be called from the ISR, but it seems IPRT + * doesn't provide primitives that guaranteed to work this way. + */ + RTSemEventSignal(pThis->hIrqEvent); + + return true; +} + +static DECLCALLBACK(int) pcirawr0DevRetainHandle(RTHANDLETABLE hHandleTable, void *pvObj, void *pvCtx, void *pvUser) +{ + NOREF(pvUser); + NOREF(hHandleTable); + PPCIRAWDEV pDev = (PPCIRAWDEV)pvObj; + if (pDev->hHandle != 0) + return SUPR0ObjAddRefEx(pDev->pvObj, (PSUPDRVSESSION)pvCtx, true /* fNoBlocking */); + + return VINF_SUCCESS; +} + + +/** + * Initializes the raw PCI ring-0 service. + * + * @returns VBox status code. + */ +PCIRAWR0DECL(int) PciRawR0Init(void) +{ + LogFlow(("PciRawR0Init:\n")); + int rc = VINF_SUCCESS; + + rc = RTHandleTableCreateEx(&g_State.hHtDevs, RTHANDLETABLE_FLAGS_LOCKED | RTHANDLETABLE_FLAGS_CONTEXT, + UINT32_C(0xfefe0000), 4096, pcirawr0DevRetainHandle, NULL); + + LogFlow(("PciRawR0Init: returns %Rrc\n", rc)); + return rc; +} + +/** + * Destroys raw PCI ring-0 service. + */ +PCIRAWR0DECL(void) PciRawR0Term(void) +{ + LogFlow(("PciRawR0Term:\n")); + RTHandleTableDestroy(g_State.hHtDevs, NULL, NULL); + g_State.hHtDevs = NIL_RTHANDLETABLE; +} + + +/** + * Per-VM R0 module init. + */ +PCIRAWR0DECL(int) PciRawR0InitVM(PGVM pGVM) +{ + PRAWPCIFACTORY pFactory = NULL; + int rc = SUPR0ComponentQueryFactory(pGVM->pSession, "VBoxRawPci", RAWPCIFACTORY_UUID_STR, (void **)&pFactory); + if (RT_SUCCESS(rc)) + { + if (pFactory) + { + rc = pFactory->pfnInitVm(pFactory, pGVM, &pGVM->rawpci.s); + pFactory->pfnRelease(pFactory); + } + } + return VINF_SUCCESS; +} + +/** + * Per-VM R0 module termination routine. + */ +PCIRAWR0DECL(void) PciRawR0TermVM(PGVM pGVM) +{ + PRAWPCIFACTORY pFactory = NULL; + int rc = SUPR0ComponentQueryFactory(pGVM->pSession, "VBoxRawPci", RAWPCIFACTORY_UUID_STR, (void **)&pFactory); + if (RT_SUCCESS(rc)) + { + if (pFactory) + { + pFactory->pfnDeinitVm(pFactory, pGVM, &pGVM->rawpci.s); + pFactory->pfnRelease(pFactory); + } + } +} + +static int pcirawr0DevTerm(PPCIRAWDEV pThis, int32_t fFlags) +{ + ASMAtomicWriteBool(&pThis->fTerminate, true); + + if (pThis->hIrqEvent) + RTSemEventSignal(pThis->hIrqEvent); + + /* Enable that, once figure our how to make sure + IRQ getter thread notified and woke up. */ +#if 0 + if (pThis->hIrqEvent) + { + RTSemEventDestroy(pThis->hIrqEvent); + pThis->hIrqEvent = NIL_RTSEMEVENT; + } +#endif + + if (pThis->hSpinlock) + { + RTSpinlockDestroy(pThis->hSpinlock); + pThis->hSpinlock = NIL_RTSPINLOCK; + } + + /* Forcefully deinit. */ + return pThis->pPort->pfnDeinit(pThis->pPort, fFlags); +} + +#define GET_PORT(hDev) \ + PPCIRAWDEV pDev = (PPCIRAWDEV)RTHandleTableLookupWithCtx(g_State.hHtDevs, hDev, pSession); \ + if (!pDev) \ + return VERR_INVALID_HANDLE; \ + PRAWPCIDEVPORT pDevPort = pDev->pPort; \ + AssertReturn(pDevPort != NULL, VERR_INVALID_PARAMETER); \ + AssertReturn(pDevPort->u32Version == RAWPCIDEVPORT_VERSION, VERR_INVALID_PARAMETER); \ + AssertReturn(pDevPort->u32VersionEnd == RAWPCIDEVPORT_VERSION, VERR_INVALID_PARAMETER); + +#define PUT_PORT() if (pDev->pvObj) SUPR0ObjRelease(pDev->pvObj, pSession) + +#ifdef DEBUG_nike + +/* Code to perform debugging without host driver. */ +typedef struct DUMMYRAWPCIINS +{ + /* Host PCI address of this device. */ + uint32_t HostPciAddress; + /* Padding */ + uint32_t pad0; + + uint8_t aPciCfg[256]; + + /** Port, given to the outside world. */ + RAWPCIDEVPORT DevPort; +} DUMMYRAWPCIINS; +typedef struct DUMMYRAWPCIINS *PDUMMYRAWPCIINS; + +#define DEVPORT_2_DUMMYRAWPCIINS(pPort) \ + ( (PDUMMYRAWPCIINS)((uint8_t *)pPort - RT_UOFFSETOF(DUMMYRAWPCIINS, DevPort)) ) + +static uint8_t dummyPciGetByte(PDUMMYRAWPCIINS pThis, uint32_t iRegister) +{ + return pThis->aPciCfg[iRegister]; +} + +static void dummyPciSetByte(PDUMMYRAWPCIINS pThis, uint32_t iRegister, uint8_t u8) +{ + pThis->aPciCfg[iRegister] = u8; +} + +static uint16_t dummyPciGetWord(PDUMMYRAWPCIINS pThis, uint32_t iRegister) +{ + uint16_t u16Value = *(uint16_t*)&pThis->aPciCfg[iRegister]; + return RT_H2LE_U16(u16Value); +} + +static void dummyPciSetWord(PDUMMYRAWPCIINS pThis, uint32_t iRegister, uint16_t u16) +{ + *(uint16_t*)&pThis->aPciCfg[iRegister] = RT_H2LE_U16(u16); +} + +static uint32_t dummyPciGetDWord(PDUMMYRAWPCIINS pThis, uint32_t iRegister) +{ + uint32_t u32Value = *(uint32_t*)&pThis->aPciCfg[iRegister]; + return RT_H2LE_U32(u32Value); +} + +static void dummyPciSetDWord(PDUMMYRAWPCIINS pThis, uint32_t iRegister, uint32_t u32) +{ + *(uint32_t*)&pThis->aPciCfg[iRegister] = RT_H2LE_U32(u32); +} + +/** + * @copydoc RAWPCIDEVPORT:: pfnInit + */ +static DECLCALLBACK(int) dummyPciDevInit(PRAWPCIDEVPORT pPort, uint32_t fFlags) +{ + PDUMMYRAWPCIINS pThis = DEVPORT_2_DUMMYRAWPCIINS(pPort); + + dummyPciSetWord(pThis, VBOX_PCI_VENDOR_ID, 0xccdd); + dummyPciSetWord(pThis, VBOX_PCI_DEVICE_ID, 0xeeff); + dummyPciSetWord(pThis, VBOX_PCI_COMMAND, PCI_COMMAND_IOACCESS | PCI_COMMAND_MEMACCESS | PCI_COMMAND_BUSMASTER); + dummyPciSetByte(pThis, VBOX_PCI_INTERRUPT_PIN, 1); + + return VINF_SUCCESS; +} + +/** + * @copydoc RAWPCIDEVPORT:: pfnDeinit + */ +static DECLCALLBACK(int) dummyPciDevDeinit(PRAWPCIDEVPORT pPort, uint32_t fFlags) +{ + PDUMMYRAWPCIINS pThis = DEVPORT_2_DUMMYRAWPCIINS(pPort); + + return VINF_SUCCESS; +} + +/** + * @copydoc RAWPCIDEVPORT:: pfnDestroy + */ +static DECLCALLBACK(int) dummyPciDevDestroy(PRAWPCIDEVPORT pPort) +{ + PDUMMYRAWPCIINS pThis = DEVPORT_2_DUMMYRAWPCIINS(pPort); + + RTMemFree(pThis); + + return VINF_SUCCESS; +} + + +/** + * @copydoc RAWPCIDEVPORT:: pfnGetRegionInfo + */ +static DECLCALLBACK(int) dummyPciDevGetRegionInfo(PRAWPCIDEVPORT pPort, + int32_t iRegion, + RTHCPHYS *pRegionStart, + uint64_t *pu64RegionSize, + bool *pfPresent, + uint32_t *pfFlags) +{ + PDUMMYRAWPCIINS pThis = DEVPORT_2_DUMMYRAWPCIINS(pPort); + + if (iRegion == 0) + { + *pfPresent = true; + *pRegionStart = 0xfef0; + *pu64RegionSize = 0x10; + *pfFlags = PCIRAW_ADDRESS_SPACE_IO; + } + else if (iRegion == 2) + { + *pfPresent = true; + *pRegionStart = 0xffff0000; + *pu64RegionSize = 0x1000; + *pfFlags = PCIRAW_ADDRESS_SPACE_BAR64 | PCIRAW_ADDRESS_SPACE_MEM; + } + else + *pfPresent = false; + + return VINF_SUCCESS; +} + +/** + * @copydoc RAWPCIDEVPORT:: pfnMapRegion + */ +static DECLCALLBACK(int) dummyPciDevMapRegion(PRAWPCIDEVPORT pPort, + int32_t iRegion, + RTHCPHYS HCRegionStart, + uint64_t u64RegionSize, + int32_t fFlags, + RTR0PTR *pRegionBase) +{ + PDUMMYRAWPCIINS pThis = DEVPORT_2_DUMMYRAWPCIINS(pPort); + return VINF_SUCCESS; +} + +/** + * @copydoc RAWPCIDEVPORT:: pfnUnapRegion + */ +static DECLCALLBACK(int) dummyPciDevUnmapRegion(PRAWPCIDEVPORT pPort, + int32_t iRegion, + RTHCPHYS HCRegionStart, + uint64_t u64RegionSize, + RTR0PTR RegionBase) +{ + PDUMMYRAWPCIINS pThis = DEVPORT_2_DUMMYRAWPCIINS(pPort); + return VINF_SUCCESS; +} + +/** + * @copydoc RAWPCIDEVPORT:: pfnPciCfgRead + */ +static DECLCALLBACK(int) dummyPciDevPciCfgRead(PRAWPCIDEVPORT pPort, + uint32_t Register, + PCIRAWMEMLOC *pValue) +{ + PDUMMYRAWPCIINS pThis = DEVPORT_2_DUMMYRAWPCIINS(pPort); + + switch (pValue->cb) + { + case 1: + pValue->u.u8 = dummyPciGetByte(pThis, Register); + break; + case 2: + pValue->u.u16 = dummyPciGetWord(pThis, Register); + break; + case 4: + pValue->u.u32 = dummyPciGetDWord(pThis, Register); + break; + } + + return VINF_SUCCESS; +} + +/** + * @copydoc RAWPCIDEVPORT:: pfnPciCfgWrite + */ +static DECLCALLBACK(int) dummyPciDevPciCfgWrite(PRAWPCIDEVPORT pPort, + uint32_t Register, + PCIRAWMEMLOC *pValue) +{ + PDUMMYRAWPCIINS pThis = DEVPORT_2_DUMMYRAWPCIINS(pPort); + + switch (pValue->cb) + { + case 1: + dummyPciSetByte(pThis, Register, pValue->u.u8); + break; + case 2: + dummyPciSetWord(pThis, Register, pValue->u.u16); + break; + case 4: + dummyPciSetDWord(pThis, Register, pValue->u.u32); + break; + } + + return VINF_SUCCESS; +} + +static DECLCALLBACK(int) dummyPciDevRegisterIrqHandler(PRAWPCIDEVPORT pPort, + PFNRAWPCIISR pfnHandler, + void* pIrqContext, + PCIRAWISRHANDLE *phIsr) +{ + PDUMMYRAWPCIINS pThis = DEVPORT_2_DUMMYRAWPCIINS(pPort); + return VINF_SUCCESS; +} + +static DECLCALLBACK(int) dummyPciDevUnregisterIrqHandler(PRAWPCIDEVPORT pPort, + PCIRAWISRHANDLE hIsr) +{ + PDUMMYRAWPCIINS pThis = DEVPORT_2_DUMMYRAWPCIINS(pPort); + return VINF_SUCCESS; +} + +static DECLCALLBACK(int) dummyPciDevPowerStateChange(PRAWPCIDEVPORT pPort, + PCIRAWPOWERSTATE aState, + uint64_t *pu64Param) +{ + PDUMMYRAWPCIINS pThis = DEVPORT_2_DUMMYRAWPCIINS(pPort); + return VINF_SUCCESS; +} + +static PRAWPCIDEVPORT pcirawr0CreateDummyDevice(uint32_t HostDevice, uint32_t fFlags) +{ + PDUMMYRAWPCIINS pNew = (PDUMMYRAWPCIINS)RTMemAllocZ(sizeof(*pNew)); + if (!pNew) + return NULL; + + pNew->HostPciAddress = HostDevice; + + pNew->DevPort.u32Version = RAWPCIDEVPORT_VERSION; + pNew->DevPort.pfnInit = dummyPciDevInit; + pNew->DevPort.pfnDeinit = dummyPciDevDeinit; + pNew->DevPort.pfnDestroy = dummyPciDevDestroy; + pNew->DevPort.pfnGetRegionInfo = dummyPciDevGetRegionInfo; + pNew->DevPort.pfnMapRegion = dummyPciDevMapRegion; + pNew->DevPort.pfnUnmapRegion = dummyPciDevUnmapRegion; + pNew->DevPort.pfnPciCfgRead = dummyPciDevPciCfgRead; + pNew->DevPort.pfnPciCfgWrite = dummyPciDevPciCfgWrite; + pNew->DevPort.pfnRegisterIrqHandler = dummyPciDevRegisterIrqHandler; + pNew->DevPort.pfnUnregisterIrqHandler = dummyPciDevUnregisterIrqHandler; + pNew->DevPort.pfnPowerStateChange = dummyPciDevPowerStateChange; + + pNew->DevPort.u32VersionEnd = RAWPCIDEVPORT_VERSION; + + return &pNew->DevPort; +} + +#endif /* DEBUG_nike */ + +static DECLCALLBACK(void) pcirawr0DevObjDestructor(void *pvObj, void *pvIns, void *pvUnused) +{ + PPCIRAWDEV pThis = (PPCIRAWDEV)pvIns; + NOREF(pvObj); NOREF(pvUnused); + + /* Forcefully deinit. */ + pcirawr0DevTerm(pThis, 0); + + /* And destroy. */ + pThis->pPort->pfnDestroy(pThis->pPort); + + RTMemFree(pThis); +} + + +static int pcirawr0OpenDevice(PGVM pGVM, PSUPDRVSESSION pSession, + uint32_t HostDevice, + uint32_t fFlags, + PCIRAWDEVHANDLE *pHandle, + uint32_t *pfDevFlags) +{ + + int rc = GVMMR0ValidateGVMandEMT(pGVM, 0 /*idCpu*/); + if (RT_FAILURE(rc)) + return rc; + + /* + * Query the factory we want, then use it create and connect the host device. + */ + PPCIRAWDEV pNew = (PPCIRAWDEV)RTMemAllocZ(sizeof(*pNew)); + if (!pNew) + return VERR_NO_MEMORY; + + PRAWPCIFACTORY pFactory = NULL; + rc = SUPR0ComponentQueryFactory(pSession, "VBoxRawPci", RAWPCIFACTORY_UUID_STR, (void **)&pFactory); + /* No host driver registered, provide some fake implementation + for debugging purposes. */ + PRAWPCIDEVPORT pDevPort = NULL; +#ifdef DEBUG_nike + if (rc == VERR_SUPDRV_COMPONENT_NOT_FOUND) + { + pDevPort = pcirawr0CreateDummyDevice(HostDevice, fFlags); + if (pDevPort) + { + pDevPort->pfnInit(pDevPort, fFlags); + rc = VINF_SUCCESS; + } + else + rc = VERR_NO_MEMORY; + } +#endif + + if (RT_SUCCESS(rc)) + { + if (pFactory) + { + rc = pFactory->pfnCreateAndConnect(pFactory, + HostDevice, + fFlags, + &pGVM->rawpci.s, + &pDevPort, + pfDevFlags); + pFactory->pfnRelease(pFactory); + } + + if (RT_SUCCESS(rc)) + { + rc = RTSpinlockCreate(&pNew->hSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "PciRaw"); + AssertRC(rc); + if (RT_SUCCESS(rc)) + { + rc = RTSemEventCreate(&pNew->hIrqEvent); + AssertRC(rc); + if (RT_SUCCESS(rc)) + { + pNew->pSession = pSession; + pNew->pPort = pDevPort; + pNew->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_RAW_PCI_DEVICE, + pcirawr0DevObjDestructor, pNew, NULL); + if (pNew->pvObj) + { + + uint32_t hHandle = 0; + rc = RTHandleTableAllocWithCtx(g_State.hHtDevs, pNew, pSession, &hHandle); + if (RT_SUCCESS(rc)) + { + pNew->hHandle = (PCIRAWDEVHANDLE)hHandle; + *pHandle = pNew->hHandle; + return rc; + } + SUPR0ObjRelease(pNew->pvObj, pSession); + } + RTSemEventDestroy(pNew->hIrqEvent); + } + RTSpinlockDestroy(pNew->hSpinlock); + } + } + } + + if (RT_FAILURE(rc)) + RTMemFree(pNew); + + return rc; +} + +static int pcirawr0CloseDevice(PSUPDRVSESSION pSession, + PCIRAWDEVHANDLE TargetDevice, + uint32_t fFlags) +{ + GET_PORT(TargetDevice); + int rc; + + pDevPort->pfnUnregisterIrqHandler(pDevPort, pDev->hIsr); + pDev->hIsr = 0; + + rc = pcirawr0DevTerm(pDev, fFlags); + + RTHandleTableFreeWithCtx(g_State.hHtDevs, TargetDevice, pSession); + + PUT_PORT(); + + return rc; +} + +/* We may want to call many functions here directly, so no static */ +static int pcirawr0GetRegionInfo(PSUPDRVSESSION pSession, + PCIRAWDEVHANDLE TargetDevice, + int32_t iRegion, + RTHCPHYS *pRegionStart, + uint64_t *pu64RegionSize, + bool *pfPresent, + uint32_t *pfFlags) +{ + LogFlow(("pcirawr0GetRegionInfo: %d\n", iRegion)); + GET_PORT(TargetDevice); + + int rc = pDevPort->pfnGetRegionInfo(pDevPort, iRegion, pRegionStart, pu64RegionSize, pfPresent, pfFlags); + + PUT_PORT(); + + return rc; +} + +static int pcirawr0MapRegion(PSUPDRVSESSION pSession, + PCIRAWDEVHANDLE TargetDevice, + int32_t iRegion, + RTHCPHYS HCRegionStart, + uint64_t u64RegionSize, + uint32_t fFlags, + RTR3PTR *ppvAddressR3, + RTR0PTR *ppvAddressR0) +{ + LogFlow(("pcirawr0MapRegion\n")); + GET_PORT(TargetDevice); + int rc; + + rc = pDevPort->pfnMapRegion(pDevPort, iRegion, HCRegionStart, u64RegionSize, fFlags, ppvAddressR0); + if (RT_SUCCESS(rc)) + { + Assert(*ppvAddressR0 != NULL); + + /* Do we need to do something to help with R3 mapping, if ((fFlags & PCIRAWRFLAG_ALLOW_R3MAP) != 0) */ + } + + *ppvAddressR3 = 0; + + PUT_PORT(); + + return rc; +} + +static int pcirawr0UnmapRegion(PSUPDRVSESSION pSession, + PCIRAWDEVHANDLE TargetDevice, + int32_t iRegion, + RTHCPHYS HCRegionStart, + uint64_t u64RegionSize, + RTR3PTR pvAddressR3, + RTR0PTR pvAddressR0) +{ + LogFlow(("pcirawr0UnmapRegion\n")); + int rc; + NOREF(pSession); NOREF(pvAddressR3); + + GET_PORT(TargetDevice); + + rc = pDevPort->pfnUnmapRegion(pDevPort, iRegion, HCRegionStart, u64RegionSize, pvAddressR0); + + PUT_PORT(); + + return rc; +} + +static int pcirawr0PioWrite(PSUPDRVSESSION pSession, + PCIRAWDEVHANDLE TargetDevice, + uint16_t Port, + uint32_t u32, + unsigned cb) +{ + NOREF(pSession); NOREF(TargetDevice); + /// @todo add check that port fits into device range + switch (cb) + { + case 1: + ASMOutU8 (Port, u32); + break; + case 2: + ASMOutU16(Port, u32); + break; + case 4: + ASMOutU32(Port, u32); + break; + default: + AssertMsgFailed(("Unhandled port write: %d\n", cb)); + } + + return VINF_SUCCESS; +} + + +static int pcirawr0PioRead(PSUPDRVSESSION pSession, + PCIRAWDEVHANDLE TargetDevice, + uint16_t Port, + uint32_t *pu32, + unsigned cb) +{ + NOREF(pSession); NOREF(TargetDevice); + /// @todo add check that port fits into device range + switch (cb) + { + case 1: + *pu32 = ASMInU8 (Port); + break; + case 2: + *pu32 = ASMInU16(Port); + break; + case 4: + *pu32 = ASMInU32(Port); + break; + default: + AssertMsgFailed(("Unhandled port read: %d\n", cb)); + } + + return VINF_SUCCESS; +} + + +static int pcirawr0MmioRead(PSUPDRVSESSION pSession, + PCIRAWDEVHANDLE TargetDevice, + RTR0PTR Address, + PCIRAWMEMLOC *pValue) +{ + NOREF(pSession); NOREF(TargetDevice); + /// @todo add check that address fits into device range +#if 1 + switch (pValue->cb) + { + case 1: + pValue->u.u8 = *(uint8_t*)Address; + break; + case 2: + pValue->u.u16 = *(uint16_t*)Address; + break; + case 4: + pValue->u.u32 = *(uint32_t*)Address; + break; + case 8: + pValue->u.u64 = *(uint64_t*)Address; + break; + } +#else + memset(&pValue->u.u64, 0, 8); +#endif + return VINF_SUCCESS; +} + +static int pcirawr0MmioWrite(PSUPDRVSESSION pSession, + PCIRAWDEVHANDLE TargetDevice, + RTR0PTR Address, + PCIRAWMEMLOC *pValue) +{ + NOREF(pSession); NOREF(TargetDevice); + /// @todo add check that address fits into device range +#if 1 + switch (pValue->cb) + { + case 1: + *(uint8_t*)Address = pValue->u.u8; + break; + case 2: + *(uint16_t*)Address = pValue->u.u16; + break; + case 4: + *(uint32_t*)Address = pValue->u.u32; + break; + case 8: + *(uint64_t*)Address = pValue->u.u64; + break; + } +#endif + return VINF_SUCCESS; +} + +static int pcirawr0PciCfgRead(PSUPDRVSESSION pSession, + PCIRAWDEVHANDLE TargetDevice, + uint32_t Register, + PCIRAWMEMLOC *pValue) +{ + GET_PORT(TargetDevice); + + return pDevPort->pfnPciCfgRead(pDevPort, Register, pValue); +} + +static int pcirawr0PciCfgWrite(PSUPDRVSESSION pSession, + PCIRAWDEVHANDLE TargetDevice, + uint32_t Register, + PCIRAWMEMLOC *pValue) +{ + int rc; + + GET_PORT(TargetDevice); + + rc = pDevPort->pfnPciCfgWrite(pDevPort, Register, pValue); + + PUT_PORT(); + + return rc; +} + +static int pcirawr0EnableIrq(PSUPDRVSESSION pSession, + PCIRAWDEVHANDLE TargetDevice) +{ + int rc = VINF_SUCCESS; + GET_PORT(TargetDevice); + + rc = pDevPort->pfnRegisterIrqHandler(pDevPort, pcirawr0Isr, pDev, + &pDev->hIsr); + + PUT_PORT(); + return rc; +} + +static int pcirawr0DisableIrq(PSUPDRVSESSION pSession, + PCIRAWDEVHANDLE TargetDevice) +{ + int rc = VINF_SUCCESS; + GET_PORT(TargetDevice); + + rc = pDevPort->pfnUnregisterIrqHandler(pDevPort, pDev->hIsr); + pDev->hIsr = 0; + + PUT_PORT(); + return rc; +} + +static int pcirawr0GetIrq(PSUPDRVSESSION pSession, + PCIRAWDEVHANDLE TargetDevice, + int64_t iTimeout, + int32_t *piIrq) +{ + int rc = VINF_SUCCESS; + bool fTerminate = false; + int32_t iPendingIrq = 0; + + LogFlow(("pcirawr0GetIrq\n")); + + GET_PORT(TargetDevice); + + RTSpinlockAcquire(pDev->hSpinlock); + iPendingIrq = pDev->iPendingIrq; + pDev->iPendingIrq = 0; + fTerminate = pDev->fTerminate; + RTSpinlockRelease(pDev->hSpinlock); + + /* Block until new IRQs arrives */ + if (!fTerminate) + { + if (iPendingIrq == 0) + { + rc = RTSemEventWaitNoResume(pDev->hIrqEvent, iTimeout); + if (RT_SUCCESS(rc)) + { + /** @todo racy */ + if (!ASMAtomicReadBool(&pDev->fTerminate)) + { + RTSpinlockAcquire(pDev->hSpinlock); + iPendingIrq = pDev->iPendingIrq; + pDev->iPendingIrq = 0; + RTSpinlockRelease(pDev->hSpinlock); + } + else + rc = VERR_INTERRUPTED; + } + } + + if (RT_SUCCESS(rc)) + *piIrq = iPendingIrq; + } + else + rc = VERR_INTERRUPTED; + + PUT_PORT(); + + return rc; +} + +static int pcirawr0PowerStateChange(PSUPDRVSESSION pSession, + PCIRAWDEVHANDLE TargetDevice, + PCIRAWPOWERSTATE aState, + uint64_t *pu64Param) +{ + LogFlow(("pcirawr0PowerStateChange\n")); + GET_PORT(TargetDevice); + + int rc = pDevPort->pfnPowerStateChange(pDevPort, aState, pu64Param); + + PUT_PORT(); + + return rc; +} + +/** + * Process PCI raw request + * + * @returns VBox status code. + */ +PCIRAWR0DECL(int) PciRawR0ProcessReq(PGVM pGVM, PSUPDRVSESSION pSession, PPCIRAWSENDREQ pReq) +{ + LogFlow(("PciRawR0ProcessReq: %d for %x\n", pReq->iRequest, pReq->TargetDevice)); + int rc = VINF_SUCCESS; + + /* Route request to the host driver */ + switch (pReq->iRequest) + { + case PCIRAWR0_DO_OPEN_DEVICE: + rc = pcirawr0OpenDevice(pGVM, pSession, + pReq->u.aOpenDevice.PciAddress, + pReq->u.aOpenDevice.fFlags, + &pReq->u.aOpenDevice.Device, + &pReq->u.aOpenDevice.fDevFlags); + break; + case PCIRAWR0_DO_CLOSE_DEVICE: + rc = pcirawr0CloseDevice(pSession, + pReq->TargetDevice, + pReq->u.aCloseDevice.fFlags); + break; + case PCIRAWR0_DO_GET_REGION_INFO: + rc = pcirawr0GetRegionInfo(pSession, + pReq->TargetDevice, + pReq->u.aGetRegionInfo.iRegion, + &pReq->u.aGetRegionInfo.RegionStart, + &pReq->u.aGetRegionInfo.u64RegionSize, + &pReq->u.aGetRegionInfo.fPresent, + &pReq->u.aGetRegionInfo.fFlags); + break; + case PCIRAWR0_DO_MAP_REGION: + rc = pcirawr0MapRegion(pSession, + pReq->TargetDevice, + pReq->u.aMapRegion.iRegion, + pReq->u.aMapRegion.StartAddress, + pReq->u.aMapRegion.iRegionSize, + pReq->u.aMapRegion.fFlags, + &pReq->u.aMapRegion.pvAddressR3, + &pReq->u.aMapRegion.pvAddressR0); + break; + case PCIRAWR0_DO_UNMAP_REGION: + rc = pcirawr0UnmapRegion(pSession, + pReq->TargetDevice, + pReq->u.aUnmapRegion.iRegion, + pReq->u.aUnmapRegion.StartAddress, + pReq->u.aUnmapRegion.iRegionSize, + pReq->u.aUnmapRegion.pvAddressR3, + pReq->u.aUnmapRegion.pvAddressR0); + break; + case PCIRAWR0_DO_PIO_WRITE: + rc = pcirawr0PioWrite(pSession, + pReq->TargetDevice, + pReq->u.aPioWrite.iPort, + pReq->u.aPioWrite.iValue, + pReq->u.aPioWrite.cb); + break; + case PCIRAWR0_DO_PIO_READ: + rc = pcirawr0PioRead(pSession, + pReq->TargetDevice, + pReq->u.aPioRead.iPort, + &pReq->u.aPioWrite.iValue, + pReq->u.aPioRead.cb); + break; + case PCIRAWR0_DO_MMIO_WRITE: + rc = pcirawr0MmioWrite(pSession, + pReq->TargetDevice, + pReq->u.aMmioWrite.Address, + &pReq->u.aMmioWrite.Value); + break; + case PCIRAWR0_DO_MMIO_READ: + rc = pcirawr0MmioRead(pSession, + pReq->TargetDevice, + pReq->u.aMmioRead.Address, + &pReq->u.aMmioRead.Value); + break; + case PCIRAWR0_DO_PCICFG_WRITE: + rc = pcirawr0PciCfgWrite(pSession, + pReq->TargetDevice, + pReq->u.aPciCfgWrite.iOffset, + &pReq->u.aPciCfgWrite.Value); + break; + case PCIRAWR0_DO_PCICFG_READ: + rc = pcirawr0PciCfgRead(pSession, + pReq->TargetDevice, + pReq->u.aPciCfgRead.iOffset, + &pReq->u.aPciCfgRead.Value); + break; + case PCIRAWR0_DO_ENABLE_IRQ: + rc = pcirawr0EnableIrq(pSession, + pReq->TargetDevice); + break; + case PCIRAWR0_DO_DISABLE_IRQ: + rc = pcirawr0DisableIrq(pSession, + pReq->TargetDevice); + break; + case PCIRAWR0_DO_GET_IRQ: + rc = pcirawr0GetIrq(pSession, + pReq->TargetDevice, + pReq->u.aGetIrq.iTimeout, + &pReq->u.aGetIrq.iIrq); + break; + case PCIRAWR0_DO_POWER_STATE_CHANGE: + rc = pcirawr0PowerStateChange(pSession, + pReq->TargetDevice, + (PCIRAWPOWERSTATE)pReq->u.aPowerStateChange.iState, + &pReq->u.aPowerStateChange.u64Param); + break; + default: + rc = VERR_NOT_SUPPORTED; + } + + LogFlow(("PciRawR0ProcessReq: returns %Rrc\n", rc)); + return rc; +} + |