232 files changed, 14158 insertions, 3024 deletions
diff --git a/drivers/gpu/drm/xe/.kunitconfig b/drivers/gpu/drm/xe/.kunitconfig
index 9590eac91a..ad4b9b4a9f 100644
--- a/drivers/gpu/drm/xe/.kunitconfig
+++ b/drivers/gpu/drm/xe/.kunitconfig
@@ -11,3 +11,8 @@ CONFIG_DRM_XE_DISPLAY=n
 CONFIG_EXPERT=y
 CONFIG_FB=y
 CONFIG_DRM_XE_KUNIT_TEST=y
+CONFIG_LOCK_DEBUGGING_SUPPORT=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_DEBUG_MUTEXES=y
+CONFIG_LOCKDEP=y
+CONFIG_DEBUG_LOCKDEP=y
diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig
index e36ae1f0d8..63f1e2d164 100644
--- a/drivers/gpu/drm/xe/Kconfig
+++ b/drivers/gpu/drm/xe/Kconfig
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 config DRM_XE
 	tristate "Intel Xe Graphics"
-	depends on DRM && PCI && MMU && (m || (y && KUNIT=y)) && 64BIT
+	depends on DRM && PCI && MMU && (m || (y && KUNIT=y))
 	select INTERVAL_TREE
 	# we need shmfs for the swappable backing store, and in particular
 	# the shmem_readpage() which depends upon tmpfs
@@ -10,6 +10,7 @@ config DRM_XE
 	select DRM_BUDDY
 	select DRM_EXEC
 	select DRM_KMS_HELPER
+	select DRM_KUNIT_TEST_HELPERS if DRM_XE_KUNIT_TEST != n
 	select DRM_PANEL
 	select DRM_SUBALLOC_HELPER
 	select DRM_DISPLAY_DP_HELPER
@@ -25,6 +26,7 @@ config DRM_XE
 	select INPUT if ACPI
 	select ACPI_VIDEO if X86 && ACPI
 	select ACPI_BUTTON if ACPI
+	select X86_PLATFORM_DEVICES if X86 && ACPI
 	select ACPI_WMI if X86 && ACPI
 	select SYNC_FILE
 	select IOSF_MBI
@@ -40,6 +42,7 @@ config DRM_XE
 	select MMU_NOTIFIER
 	select WANT_DEV_COREDUMP
 	select AUXILIARY_BUS
+	select HMM_MIRROR
 	help
 	  Experimental driver for Intel Xe series GPUs
 
diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug
index 549065f57a..df02e5d17d 100644
--- a/drivers/gpu/drm/xe/Kconfig.debug
+++ b/drivers/gpu/drm/xe/Kconfig.debug
@@ -76,7 +76,6 @@ config DRM_XE_KUNIT_TEST
 	depends on DRM_XE && KUNIT && DEBUG_FS
 	default KUNIT_ALL_TESTS
 	select DRM_EXPORT_FOR_TESTS if m
-	select DRM_KUNIT_TEST_HELPERS
 	help
 	  Choose this option to allow the driver to perform selftests under
 	  the kunit framework
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 1c9e6906b0..b165bbf52a 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -32,7 +32,7 @@ endif
 # Enable -Werror in CI and development
 subdir-ccflags-$(CONFIG_DRM_XE_WERROR) += -Werror
 
-subdir-ccflags-y += -I$(obj) -I$(srctree)/$(src)
+subdir-ccflags-y += -I$(obj) -I$(src)
 
 # generated sources
 hostprogs := xe_gen_wa_oob
@@ -43,12 +43,13 @@ quiet_cmd_wa_oob = GEN     $(notdir $(generated_oob))
       cmd_wa_oob = mkdir -p $(@D); $^ $(generated_oob)
 
 $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \
-		 $(srctree)/$(src)/xe_wa_oob.rules
+		 $(src)/xe_wa_oob.rules
 	$(call cmd,wa_oob)
 
 uses_generated_oob := \
 	$(obj)/xe_gsc.o \
 	$(obj)/xe_guc.o \
+	$(obj)/xe_guc_ads.o \
 	$(obj)/xe_migrate.o \
 	$(obj)/xe_ring_ops.o \
 	$(obj)/xe_vm.o \
@@ -77,6 +78,7 @@ xe-y += xe_bb.o \
 	xe_ggtt.o \
 	xe_gpu_scheduler.o \
 	xe_gsc.o \
+	xe_gsc_proxy.o \
 	xe_gsc_submit.o \
 	xe_gt.o \
 	xe_gt_ccs_mode.o \
@@ -93,8 +95,11 @@ xe-y += xe_bb.o \
 	xe_guc.o \
 	xe_guc_ads.o \
 	xe_guc_ct.o \
+	xe_guc_db_mgr.o \
 	xe_guc_debugfs.o \
 	xe_guc_hwconfig.o \
+	xe_guc_id_mgr.o \
+	xe_guc_klv_helpers.o \
 	xe_guc_log.o \
 	xe_guc_pc.o \
 	xe_guc_submit.o \
@@ -138,33 +143,45 @@ xe-y += xe_bb.o \
 	xe_uc_debugfs.o \
 	xe_uc_fw.o \
 	xe_vm.o \
+	xe_vram_freq.o \
 	xe_wait_user_fence.o \
 	xe_wa.o \
 	xe_wopcm.o
 
+xe-$(CONFIG_HMM_MIRROR) += xe_hmm.o
+
 # graphics hardware monitoring (HWMON) support
 xe-$(CONFIG_HWMON) += xe_hwmon.o
 
 # graphics virtualization (SR-IOV) support
-xe-y += xe_sriov.o
+xe-y += \
+	xe_guc_relay.o \
+	xe_memirq.o \
+	xe_sriov.o
 
 xe-$(CONFIG_PCI_IOV) += \
+	xe_gt_sriov_pf.o \
+	xe_gt_sriov_pf_config.o \
+	xe_gt_sriov_pf_control.o \
+	xe_gt_sriov_pf_policy.o \
 	xe_lmtt.o \
 	xe_lmtt_2l.o \
-	xe_lmtt_ml.o
+	xe_lmtt_ml.o \
+	xe_sriov_pf.o
+
+# include helpers for tests even when XE is built-in
+ifdef CONFIG_DRM_XE_KUNIT_TEST
+xe-y += tests/xe_kunit_helpers.o
+endif
 
 # i915 Display compat #defines and #includes
 subdir-ccflags-$(CONFIG_DRM_XE_DISPLAY) += \
-	-I$(srctree)/$(src)/display/ext \
-	-I$(srctree)/$(src)/compat-i915-headers \
-	-I$(srctree)/drivers/gpu/drm/xe/display/ \
+	-I$(src)/display/ext \
+	-I$(src)/compat-i915-headers \
 	-I$(srctree)/drivers/gpu/drm/i915/display/ \
 	-Ddrm_i915_gem_object=xe_bo \
 	-Ddrm_i915_private=xe_device
 
-CFLAGS_i915-display/intel_fbdev.o = -Wno-override-init
-CFLAGS_i915-display/intel_display_device.o = -Wno-override-init
-
 # Rule to build SOC code shared with i915
 $(obj)/i915-soc/%.o: $(srctree)/drivers/gpu/drm/i915/soc/%.c FORCE
 	$(call cmd,force_checksrc)
@@ -177,17 +194,17 @@ $(obj)/i915-display/%.o: $(srctree)/drivers/gpu/drm/i915/display/%.c FORCE
 
 # Display code specific to xe
 xe-$(CONFIG_DRM_XE_DISPLAY) += \
-	xe_display.o \
-	display/xe_fb_pin.o \
-	display/xe_hdcp_gsc.o \
-	display/xe_plane_initial.o \
-	display/xe_display_rps.o \
+	display/ext/i915_irq.o \
+	display/ext/i915_utils.o \
+	display/intel_fb_bo.o \
+	display/intel_fbdev_fb.o \
+	display/xe_display.o \
 	display/xe_display_misc.o \
+	display/xe_display_rps.o \
 	display/xe_dsb_buffer.o \
-	display/intel_fbdev_fb.o \
-	display/intel_fb_bo.o \
-	display/ext/i915_irq.o \
-	display/ext/i915_utils.o
+	display/xe_fb_pin.o \
+	display/xe_hdcp_gsc.o \
+	display/xe_plane_initial.o
 
 # SOC code shared with i915
 xe-$(CONFIG_DRM_XE_DISPLAY) += \
@@ -214,8 +231,6 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \
 	i915-display/intel_ddi.o \
 	i915-display/intel_ddi_buf_trans.o \
 	i915-display/intel_display.o \
-	i915-display/intel_display_debugfs.o \
-	i915-display/intel_display_debugfs_params.o \
 	i915-display/intel_display_device.o \
 	i915-display/intel_display_driver.o \
 	i915-display/intel_display_irq.o \
@@ -249,6 +264,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \
 	i915-display/intel_global_state.o \
 	i915-display/intel_gmbus.o \
 	i915-display/intel_hdcp.o \
+	i915-display/intel_hdcp_gsc_message.o \
 	i915-display/intel_hdmi.o \
 	i915-display/intel_hotplug.o \
 	i915-display/intel_hotplug_irq.o \
@@ -259,7 +275,6 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \
 	i915-display/intel_modeset_setup.o \
 	i915-display/intel_modeset_verify.o \
 	i915-display/intel_panel.o \
-	i915-display/intel_pipe_crc.o \
 	i915-display/intel_pmdemand.o \
 	i915-display/intel_pps.o \
 	i915-display/intel_psr.o \
@@ -271,6 +286,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \
 	i915-display/intel_vdsc.o \
 	i915-display/intel_vga.o \
 	i915-display/intel_vrr.o \
+	i915-display/intel_dmc_wl.o \
 	i915-display/intel_wm.o \
 	i915-display/skl_scaler.o \
 	i915-display/skl_universal_plane.o \
@@ -286,6 +302,13 @@ ifeq ($(CONFIG_DRM_FBDEV_EMULATION),y)
 	xe-$(CONFIG_DRM_XE_DISPLAY) += i915-display/intel_fbdev.o
 endif
 
+ifeq ($(CONFIG_DEBUG_FS),y)
+	xe-$(CONFIG_DRM_XE_DISPLAY) += \
+		i915-display/intel_display_debugfs.o \
+		i915-display/intel_display_debugfs_params.o \
+		i915-display/intel_pipe_crc.o
+endif
+
 obj-$(CONFIG_DRM_XE) += xe.o
 obj-$(CONFIG_DRM_XE_KUNIT_TEST) += tests/
 
@@ -296,7 +319,7 @@ ifneq ($(CONFIG_DRM_XE_DISPLAY),y)
 endif
 
 always-$(CONFIG_DRM_XE_WERROR) += \
-	$(patsubst %.h,%.hdrtest, $(shell cd $(srctree)/$(src) && find * -name '*.h' $(hdrtest_find_args)))
+	$(patsubst %.h,%.hdrtest, $(shell cd $(src) && find * -name '*.h' $(hdrtest_find_args)))
 
 quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@)
       cmd_hdrtest = $(CC) -DHDRTEST $(filter-out $(CFLAGS_GCOV), $(c_flags)) -S -o /dev/null -x c /dev/null -include $<; touch $@
diff --git a/drivers/gpu/drm/xe/abi/gsc_proxy_commands_abi.h b/drivers/gpu/drm/xe/abi/gsc_proxy_commands_abi.h
new file mode 100644
index 0000000000..80bbf06a3e
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/gsc_proxy_commands_abi.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _ABI_GSC_PROXY_COMMANDS_ABI_H
+#define _ABI_GSC_PROXY_COMMANDS_ABI_H
+
+#include <linux/types.h>
+
+/* Heci client ID for proxy commands */
+#define HECI_MEADDRESS_PROXY 10
+
+/* FW-defined proxy header */
+struct xe_gsc_proxy_header {
+	/*
+	 * hdr:
+	 * Bits 0-7: type of the proxy message (see enum xe_gsc_proxy_type)
+	 * Bits 8-15: rsvd
+	 * Bits 16-31: length in bytes of the payload following the proxy header
+	 */
+	u32 hdr;
+#define GSC_PROXY_TYPE		 GENMASK(7, 0)
+#define GSC_PROXY_PAYLOAD_LENGTH GENMASK(31, 16)
+
+	u32 source;		/* Source of the Proxy message */
+	u32 destination;	/* Destination of the Proxy message */
+#define GSC_PROXY_ADDRESSING_KMD  0x10000
+#define GSC_PROXY_ADDRESSING_GSC  0x20000
+#define GSC_PROXY_ADDRESSING_CSME 0x30000
+
+	u32 status;		/* Command status */
+} __packed;
+
+/* FW-defined proxy types */
+enum xe_gsc_proxy_type {
+	GSC_PROXY_MSG_TYPE_PROXY_INVALID = 0,
+	GSC_PROXY_MSG_TYPE_PROXY_QUERY = 1,
+	GSC_PROXY_MSG_TYPE_PROXY_PAYLOAD = 2,
+	GSC_PROXY_MSG_TYPE_PROXY_END = 3,
+	GSC_PROXY_MSG_TYPE_PROXY_NOTIFICATION = 4,
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h
new file mode 100644
index 0000000000..c1ad09b364
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h
@@ -0,0 +1,370 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_ACTIONS_SRIOV_ABI_H
+#define _ABI_GUC_ACTIONS_SRIOV_ABI_H
+
+#include "guc_communication_ctb_abi.h"
+
+/**
+ * DOC: GUC2PF_RELAY_FROM_VF
+ *
+ * This message is used by the GuC firmware to forward a VF2PF `Relay Message`_
+ * received from the Virtual Function (VF) driver to this Physical Function (PF)
+ * driver.
+ *
+ * This message is always sent as `CTB HXG Message`_.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_GUC_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_EVENT_                                   |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | MBZ                                                          |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | ACTION = _`XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF` = 0x5100      |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 | **VFID** - source VF identifier                              |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 2 |  31:0 | **RELAY_ID** - VF/PF message ID                              |
+ *  +---+-------+-----------------+--------------------------------------------+
+ *  | 3 |  31:0 | **RELAY_DATA1** |                                            |
+ *  +---+-------+-----------------+                                            |
+ *  |...|       |                 |       [Embedded `Relay Message`_]          |
+ *  +---+-------+-----------------+                                            |
+ *  | n |  31:0 | **RELAY_DATAx** |                                            |
+ *  +---+-------+-----------------+--------------------------------------------+
+ */
+#define XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF		0x5100
+
+#define GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN		(GUC_HXG_EVENT_MSG_MIN_LEN + 2u)
+#define GUC2PF_RELAY_FROM_VF_EVENT_MSG_MAX_LEN \
+	(GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN + GUC_RELAY_MSG_MAX_LEN)
+#define GUC2PF_RELAY_FROM_VF_EVENT_MSG_0_MBZ		GUC_HXG_EVENT_MSG_0_DATA0
+#define GUC2PF_RELAY_FROM_VF_EVENT_MSG_1_VFID		GUC_HXG_EVENT_MSG_n_DATAn
+#define GUC2PF_RELAY_FROM_VF_EVENT_MSG_2_RELAY_ID	GUC_HXG_EVENT_MSG_n_DATAn
+#define GUC2PF_RELAY_FROM_VF_EVENT_MSG_3_RELAY_DATA1	GUC_HXG_EVENT_MSG_n_DATAn
+#define GUC2PF_RELAY_FROM_VF_EVENT_MSG_n_RELAY_DATAx	GUC_HXG_EVENT_MSG_n_DATAn
+#define GUC2PF_RELAY_FROM_VF_EVENT_MSG_NUM_RELAY_DATA	GUC_RELAY_MSG_MAX_LEN
+
+/**
+ * DOC: PF2GUC_RELAY_TO_VF
+ *
+ * This H2G message is used by the Physical Function (PF) driver to send embedded
+ * VF2PF `Relay Message`_ to the VF.
+ *
+ * This action message must be sent over CTB as `CTB HXG Message`_.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_HOST_                                |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = `GUC_HXG_TYPE_FAST_REQUEST`_                          |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | MBZ                                                          |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | ACTION = _`XE_GUC_ACTION_PF2GUC_RELAY_TO_VF` = 0x5101        |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 | **VFID** - target VF identifier                              |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 2 |  31:0 | **RELAY_ID** - VF/PF message ID                              |
+ *  +---+-------+-----------------+--------------------------------------------+
+ *  | 3 |  31:0 | **RELAY_DATA1** |                                            |
+ *  +---+-------+-----------------+                                            |
+ *  |...|       |                 |       [Embedded `Relay Message`_]          |
+ *  +---+-------+-----------------+                                            |
+ *  | n |  31:0 | **RELAY_DATAx** |                                            |
+ *  +---+-------+-----------------+--------------------------------------------+
+ */
+#define XE_GUC_ACTION_PF2GUC_RELAY_TO_VF		0x5101
+
+#define PF2GUC_RELAY_TO_VF_REQUEST_MSG_MIN_LEN		(GUC_HXG_REQUEST_MSG_MIN_LEN + 2u)
+#define PF2GUC_RELAY_TO_VF_REQUEST_MSG_MAX_LEN \
+	(PF2GUC_RELAY_TO_VF_REQUEST_MSG_MIN_LEN + GUC_RELAY_MSG_MAX_LEN)
+#define PF2GUC_RELAY_TO_VF_REQUEST_MSG_0_MBZ		GUC_HXG_REQUEST_MSG_0_DATA0
+#define PF2GUC_RELAY_TO_VF_REQUEST_MSG_1_VFID		GUC_HXG_REQUEST_MSG_n_DATAn
+#define PF2GUC_RELAY_TO_VF_REQUEST_MSG_2_RELAY_ID	GUC_HXG_REQUEST_MSG_n_DATAn
+#define PF2GUC_RELAY_TO_VF_REQUEST_MSG_3_RELAY_DATA1	GUC_HXG_REQUEST_MSG_n_DATAn
+#define PF2GUC_RELAY_TO_VF_REQUEST_MSG_n_RELAY_DATAx	GUC_HXG_REQUEST_MSG_n_DATAn
+#define PF2GUC_RELAY_TO_VF_REQUEST_MSG_NUM_RELAY_DATA	GUC_RELAY_MSG_MAX_LEN
+
+/**
+ * DOC: GUC2VF_RELAY_FROM_PF
+ *
+ * This message is used by the GuC firmware to deliver `Relay Message`_ from the
+ * Physical Function (PF) driver to this Virtual Function (VF) driver.
+ * See `GuC Relay Communication`_ for details.
+ *
+ * This message is always sent over CTB.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_GUC_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_EVENT_                                   |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | MBZ                                                          |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | ACTION = _`XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF` = 0x5102      |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 | **RELAY_ID** - VF/PF message ID                              |
+ *  +---+-------+-----------------+--------------------------------------------+
+ *  | 2 |  31:0 | **RELAY_DATA1** |                                            |
+ *  +---+-------+-----------------+                                            |
+ *  |...|       |                 |       [Embedded `Relay Message`_]          |
+ *  +---+-------+-----------------+                                            |
+ *  | n |  31:0 | **RELAY_DATAx** |                                            |
+ *  +---+-------+-----------------+--------------------------------------------+
+ */
+#define XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF		0x5102
+
+#define GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN		(GUC_HXG_EVENT_MSG_MIN_LEN + 1u)
+#define GUC2VF_RELAY_FROM_PF_EVENT_MSG_MAX_LEN \
+	(GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN + GUC_RELAY_MSG_MAX_LEN)
+#define GUC2VF_RELAY_FROM_PF_EVENT_MSG_0_MBZ		GUC_HXG_EVENT_MSG_0_DATA0
+#define GUC2VF_RELAY_FROM_PF_EVENT_MSG_1_RELAY_ID	GUC_HXG_EVENT_MSG_n_DATAn
+#define GUC2VF_RELAY_FROM_PF_EVENT_MSG_n_RELAY_DATAx	GUC_HXG_EVENT_MSG_n_DATAn
+#define GUC2VF_RELAY_FROM_PF_EVENT_MSG_NUM_RELAY_DATA	GUC_RELAY_MSG_MAX_LEN
+
+/**
+ * DOC: VF2GUC_RELAY_TO_PF
+ *
+ * This message is used by the Virtual Function (VF) drivers to communicate with
+ * the Physical Function (PF) driver and send `Relay Message`_ to the PF driver.
+ * See `GuC Relay Communication`_ for details.
+ *
+ * This message must be sent over CTB.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_HOST_                                |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ or GUC_HXG_TYPE_FAST_REQUEST_   |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | MBZ                                                          |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | ACTION = _`XE_GUC_ACTION_VF2GUC_RELAY_TO_PF` = 0x5103        |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 | **RELAY_ID** - VF/PF message ID                              |
+ *  +---+-------+-----------------+--------------------------------------------+
+ *  | 2 |  31:0 | **RELAY_DATA1** |                                            |
+ *  +---+-------+-----------------+                                            |
+ *  |...|       |                 |       [Embedded `Relay Message`_]          |
+ *  +---+-------+-----------------+                                            |
+ *  | n |  31:0 | **RELAY_DATAx** |                                            |
+ *  +---+-------+-----------------+--------------------------------------------+
+ */
+#define XE_GUC_ACTION_VF2GUC_RELAY_TO_PF		0x5103
+
+#define VF2GUC_RELAY_TO_PF_REQUEST_MSG_MIN_LEN		(GUC_HXG_REQUEST_MSG_MIN_LEN + 1u)
+#define VF2GUC_RELAY_TO_PF_REQUEST_MSG_MAX_LEN \
+	(VF2GUC_RELAY_TO_PF_REQUEST_MSG_MIN_LEN + GUC_RELAY_MSG_MAX_LEN)
+#define VF2GUC_RELAY_TO_PF_REQUEST_MSG_0_MBZ		GUC_HXG_REQUEST_MSG_0_DATA0
+#define VF2GUC_RELAY_TO_PF_REQUEST_MSG_1_RELAY_ID	GUC_HXG_REQUEST_MSG_n_DATAn
+#define VF2GUC_RELAY_TO_PF_REQUEST_MSG_n_RELAY_DATAx	GUC_HXG_REQUEST_MSG_n_DATAn
+#define VF2GUC_RELAY_TO_PF_REQUEST_MSG_NUM_RELAY_DATA	GUC_RELAY_MSG_MAX_LEN
+
+/**
+ * DOC: GUC2PF_VF_STATE_NOTIFY
+ *
+ * The GUC2PF_VF_STATE_NOTIFY message is used by the GuC to notify PF about change
+ * of the VF state.
+ *
+ * This G2H message is sent as `CTB HXG Message`_.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_GUC_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_EVENT_                                   |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | DATA0 = MBZ                                                  |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | ACTION = _`GUC_ACTION_GUC2PF_VF_STATE_NOTIFY` = 0x5106       |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 | DATA1 = **VFID** - VF identifier                             |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 2 |  31:0 | DATA2 = **EVENT** - notification event:                      |
+ *  |   |       |                                                              |
+ *  |   |       |   - _`GUC_PF_NOTIFY_VF_ENABLE` = 1 (only if VFID = 0)        |
+ *  |   |       |   - _`GUC_PF_NOTIFY_VF_FLR` = 1                              |
+ *  |   |       |   - _`GUC_PF_NOTIFY_VF_FLR_DONE` = 2                         |
+ *  |   |       |   - _`GUC_PF_NOTIFY_VF_PAUSE_DONE` = 3                       |
+ *  |   |       |   - _`GUC_PF_NOTIFY_VF_FIXUP_DONE` = 4                       |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+#define GUC_ACTION_GUC2PF_VF_STATE_NOTIFY		0x5106u
+
+#define GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_LEN		(GUC_HXG_EVENT_MSG_MIN_LEN + 2u)
+#define GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_0_MBZ		GUC_HXG_EVENT_MSG_0_DATA0
+#define GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_1_VFID		GUC_HXG_EVENT_MSG_n_DATAn
+#define GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_2_EVENT	GUC_HXG_EVENT_MSG_n_DATAn
+#define   GUC_PF_NOTIFY_VF_ENABLE			1u
+#define   GUC_PF_NOTIFY_VF_FLR				1u
+#define   GUC_PF_NOTIFY_VF_FLR_DONE			2u
+#define   GUC_PF_NOTIFY_VF_PAUSE_DONE			3u
+#define   GUC_PF_NOTIFY_VF_FIXUP_DONE			4u
+
+/**
+ * DOC: PF2GUC_UPDATE_VGT_POLICY
+ *
+ * This message is used by the PF to set `GuC VGT Policy KLVs`_.
+ *
+ * This message must be sent as `CTB HXG Message`_.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_HOST_                                |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | MBZ                                                          |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | ACTION = _`GUC_ACTION_PF2GUC_UPDATE_VGT_POLICY` = 0x5502     |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 | **CFG_ADDR_LO** - dword aligned GGTT offset that             |
+ *  |   |       | represents the start of `GuC VGT Policy KLVs`_ list.         |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 2 |  31:0 | **CFG_ADDR_HI** - upper 32 bits of above offset.             |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 3 |  31:0 | **CFG_SIZE** - size (in dwords) of the config buffer         |
+ *  +---+-------+--------------------------------------------------------------+
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_GUC_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_                        |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  27:0 | **COUNT** - number of KLVs successfully applied              |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+#define GUC_ACTION_PF2GUC_UPDATE_VGT_POLICY			0x5502u
+
+#define PF2GUC_UPDATE_VGT_POLICY_REQUEST_MSG_LEN		(GUC_HXG_REQUEST_MSG_MIN_LEN + 3u)
+#define PF2GUC_UPDATE_VGT_POLICY_REQUEST_MSG_0_MBZ		GUC_HXG_REQUEST_MSG_0_DATA0
+#define PF2GUC_UPDATE_VGT_POLICY_REQUEST_MSG_1_CFG_ADDR_LO	GUC_HXG_REQUEST_MSG_n_DATAn
+#define PF2GUC_UPDATE_VGT_POLICY_REQUEST_MSG_2_CFG_ADDR_HI	GUC_HXG_REQUEST_MSG_n_DATAn
+#define PF2GUC_UPDATE_VGT_POLICY_REQUEST_MSG_3_CFG_SIZE		GUC_HXG_REQUEST_MSG_n_DATAn
+
+#define PF2GUC_UPDATE_VGT_POLICY_RESPONSE_MSG_LEN		GUC_HXG_RESPONSE_MSG_MIN_LEN
+#define PF2GUC_UPDATE_VGT_POLICY_RESPONSE_MSG_0_COUNT		GUC_HXG_RESPONSE_MSG_0_DATA0
+
+/**
+ * DOC: PF2GUC_UPDATE_VF_CFG
+ *
+ * The `PF2GUC_UPDATE_VF_CFG`_ message is used by PF to provision single VF in GuC.
+ *
+ * This message must be sent as `CTB HXG Message`_.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_HOST_                                |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | MBZ                                                          |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | ACTION = _`GUC_ACTION_PF2GUC_UPDATE_VF_CFG` = 0x5503         |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 | **VFID** - identifier of the VF that the KLV                 |
+ *  |   |       | configurations are being applied to                          |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 2 |  31:0 | **CFG_ADDR_LO** - dword aligned GGTT offset that represents  |
+ *  |   |       | the start of a list of virtualization related KLV configs    |
+ *  |   |       | that are to be applied to the VF.                            |
+ *  |   |       | If this parameter is zero, the list is not parsed.           |
+ *  |   |       | If full configs address parameter is zero and configs_size is|
+ *  |   |       | zero associated VF config shall be reset to its default state|
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 3 |  31:0 | **CFG_ADDR_HI** - upper 32 bits of configs address.          |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 4 |  31:0 | **CFG_SIZE** - size (in dwords) of the config buffer         |
+ *  +---+-------+--------------------------------------------------------------+
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_GUC_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_                        |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  27:0 | **COUNT** - number of KLVs successfully applied              |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+#define GUC_ACTION_PF2GUC_UPDATE_VF_CFG			0x5503u
+
+#define PF2GUC_UPDATE_VF_CFG_REQUEST_MSG_LEN		(GUC_HXG_REQUEST_MSG_MIN_LEN + 4u)
+#define PF2GUC_UPDATE_VF_CFG_REQUEST_MSG_0_MBZ		GUC_HXG_REQUEST_MSG_0_DATA0
+#define PF2GUC_UPDATE_VF_CFG_REQUEST_MSG_1_VFID		GUC_HXG_REQUEST_MSG_n_DATAn
+#define PF2GUC_UPDATE_VF_CFG_REQUEST_MSG_2_CFG_ADDR_LO	GUC_HXG_REQUEST_MSG_n_DATAn
+#define PF2GUC_UPDATE_VF_CFG_REQUEST_MSG_3_CFG_ADDR_HI	GUC_HXG_REQUEST_MSG_n_DATAn
+#define PF2GUC_UPDATE_VF_CFG_REQUEST_MSG_4_CFG_SIZE	GUC_HXG_REQUEST_MSG_n_DATAn
+
+#define PF2GUC_UPDATE_VF_CFG_RESPONSE_MSG_LEN		GUC_HXG_RESPONSE_MSG_MIN_LEN
+#define PF2GUC_UPDATE_VF_CFG_RESPONSE_MSG_0_COUNT	GUC_HXG_RESPONSE_MSG_0_DATA0
+
+/**
+ * DOC: PF2GUC_VF_CONTROL
+ *
+ * The PF2GUC_VF_CONTROL message is used by the PF to trigger VF state change
+ * maintained by the GuC.
+ *
+ * This H2G message must be sent as `CTB HXG Message`_.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_HOST_                                |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | DATA0 = MBZ                                                  |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | ACTION = _`GUC_ACTION_PF2GUC_VF_CONTROL_CMD` = 0x5506        |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 | DATA1 = **VFID** - VF identifier                             |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 2 |  31:0 | DATA2 = **COMMAND** - control command:                       |
+ *  |   |       |                                                              |
+ *  |   |       |   - _`GUC_PF_TRIGGER_VF_PAUSE` = 1                           |
+ *  |   |       |   - _`GUC_PF_TRIGGER_VF_RESUME` = 2                          |
+ *  |   |       |   - _`GUC_PF_TRIGGER_VF_STOP` = 3                            |
+ *  |   |       |   - _`GUC_PF_TRIGGER_VF_FLR_START` = 4                       |
+ *  |   |       |   - _`GUC_PF_TRIGGER_VF_FLR_FINISH` = 5                      |
+ *  +---+-------+--------------------------------------------------------------+
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_GUC_                                 |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_                        |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  27:0 | DATA0 = MBZ                                                  |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+#define GUC_ACTION_PF2GUC_VF_CONTROL			0x5506u
+
+#define PF2GUC_VF_CONTROL_REQUEST_MSG_LEN		(GUC_HXG_EVENT_MSG_MIN_LEN + 2u)
+#define PF2GUC_VF_CONTROL_REQUEST_MSG_0_MBZ		GUC_HXG_EVENT_MSG_0_DATA0
+#define PF2GUC_VF_CONTROL_REQUEST_MSG_1_VFID		GUC_HXG_EVENT_MSG_n_DATAn
+#define PF2GUC_VF_CONTROL_REQUEST_MSG_2_COMMAND		GUC_HXG_EVENT_MSG_n_DATAn
+#define   GUC_PF_TRIGGER_VF_PAUSE			1u
+#define   GUC_PF_TRIGGER_VF_RESUME			2u
+#define   GUC_PF_TRIGGER_VF_STOP			3u
+#define   GUC_PF_TRIGGER_VF_FLR_START			4u
+#define   GUC_PF_TRIGGER_VF_FLR_FINISH			5u
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
index 0b1146d0c9..8f86a16dc5 100644
--- a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
@@ -81,12 +81,13 @@ static_assert(sizeof(struct guc_ct_buffer_desc) == 64);
 
 #define GUC_CTB_HDR_LEN				1u
 #define GUC_CTB_MSG_MIN_LEN			GUC_CTB_HDR_LEN
-#define GUC_CTB_MSG_MAX_LEN			256u
+#define GUC_CTB_MSG_MAX_LEN			(GUC_CTB_MSG_MIN_LEN + GUC_CTB_MAX_DWORDS)
 #define GUC_CTB_MSG_0_FENCE			(0xffffu << 16)
 #define GUC_CTB_MSG_0_FORMAT			(0xfu << 12)
 #define   GUC_CTB_FORMAT_HXG			0u
 #define GUC_CTB_MSG_0_RESERVED			(0xfu << 8)
 #define GUC_CTB_MSG_0_NUM_DWORDS		(0xffu << 0)
+#define   GUC_CTB_MAX_DWORDS			255
 
 /**
  * DOC: CTB HXG Message
diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
index 0400bc0fcc..511cf974d5 100644
--- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
@@ -319,4 +319,14 @@ enum  {
 #define GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_KEY	0x8a0b
 #define GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_LEN	1u
 
+/*
+ * Workaround keys:
+ */
+enum xe_guc_klv_ids {
+	GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED				= 0x9002,
+	GUC_WORKAROUND_KLV_ID_GAM_PFQ_SHADOW_TAIL_POLLING				= 0x9005,
+	GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE				= 0x9007,
+	GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE			= 0x9008,
+};
+
 #endif
diff --git a/drivers/gpu/drm/xe/abi/guc_messages_abi.h b/drivers/gpu/drm/xe/abi/guc_messages_abi.h
index 29e414c82d..534a39db77 100644
--- a/drivers/gpu/drm/xe/abi/guc_messages_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_messages_abi.h
@@ -24,6 +24,7 @@
  *  |   | 30:28 | **TYPE** - message type                                      |
  *  |   |       |   - _`GUC_HXG_TYPE_REQUEST` = 0                              |
  *  |   |       |   - _`GUC_HXG_TYPE_EVENT` = 1                                |
+ *  |   |       |   - _`GUC_HXG_TYPE_FAST_REQUEST` = 2                         |
  *  |   |       |   - _`GUC_HXG_TYPE_NO_RESPONSE_BUSY` = 3                     |
  *  |   |       |   - _`GUC_HXG_TYPE_NO_RESPONSE_RETRY` = 5                    |
  *  |   |       |   - _`GUC_HXG_TYPE_RESPONSE_FAILURE` = 6                     |
@@ -46,6 +47,7 @@
 #define GUC_HXG_MSG_0_TYPE			(0x7u << 28)
 #define   GUC_HXG_TYPE_REQUEST			0u
 #define   GUC_HXG_TYPE_EVENT			1u
+#define   GUC_HXG_TYPE_FAST_REQUEST		2u
 #define   GUC_HXG_TYPE_NO_RESPONSE_BUSY		3u
 #define   GUC_HXG_TYPE_NO_RESPONSE_RETRY	5u
 #define   GUC_HXG_TYPE_RESPONSE_FAILURE		6u
diff --git a/drivers/gpu/drm/xe/abi/guc_relay_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_relay_actions_abi.h
new file mode 100644
index 0000000000..747e428de4
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_relay_actions_abi.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_RELAY_ACTIONS_ABI_H_
+#define _ABI_GUC_RELAY_ACTIONS_ABI_H_
+
+/**
+ * DOC: GuC Relay Debug Actions
+ *
+ * This range of action codes is reserved for debugging purposes only and should
+ * be used only on debug builds. These actions may not be supported by the
+ * production drivers. Their definitions could be changed in the future.
+ *
+ *  _`GUC_RELAY_ACTION_DEBUG_ONLY_START` = 0xDEB0
+ *  _`GUC_RELAY_ACTION_DEBUG_ONLY_END` = 0xDEFF
+ */
+
+#define GUC_RELAY_ACTION_DEBUG_ONLY_START	0xDEB0
+#define GUC_RELAY_ACTION_DEBUG_ONLY_END		0xDEFF
+
+/**
+ * DOC: VFXPF_TESTLOOP
+ *
+ * This `Relay Message`_ is used to selftest the `GuC Relay Communication`_.
+ *
+ * The following opcodes are defined:
+ * VFXPF_TESTLOOP_OPCODE_NOP_ will return no data.
+ * VFXPF_TESTLOOP_OPCODE_BUSY_ will reply with BUSY response first.
+ * VFXPF_TESTLOOP_OPCODE_RETRY_ will reply with RETRY response instead.
+ * VFXPF_TESTLOOP_OPCODE_ECHO_ will return same data as received.
+ * VFXPF_TESTLOOP_OPCODE_FAIL_ will always fail with error.
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_HOST_                                |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ or GUC_HXG_TYPE_FAST_REQUEST_   |
+ *  |   |       | or GUC_HXG_TYPE_EVENT_                                       |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 27:16 | **OPCODE**                                                   |
+ *  |   |       |    - _`VFXPF_TESTLOOP_OPCODE_NOP` = 0x0                      |
+ *  |   |       |    - _`VFXPF_TESTLOOP_OPCODE_BUSY` = 0xB                     |
+ *  |   |       |    - _`VFXPF_TESTLOOP_OPCODE_RETRY` = 0xD                    |
+ *  |   |       |    - _`VFXPF_TESTLOOP_OPCODE_ECHO` = 0xE                     |
+ *  |   |       |    - _`VFXPF_TESTLOOP_OPCODE_FAIL` = 0xF                     |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | ACTION = _`IOV_ACTION_SELFTEST_RELAY`                        |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 | **DATA1** = optional, depends on **OPCODE**:                 |
+ *  |   |       | for VFXPF_TESTLOOP_OPCODE_BUSY_: time in ms for reply        |
+ *  |   |       | for VFXPF_TESTLOOP_OPCODE_FAIL_: expected error              |
+ *  |   |       | for VFXPF_TESTLOOP_OPCODE_ECHO_: payload                     |
+ *  +---+-------+--------------------------------------------------------------+
+ *  |...|  31:0 | **DATAn** = only for **OPCODE** VFXPF_TESTLOOP_OPCODE_ECHO_  |
+ *  +---+-------+--------------------------------------------------------------+
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 |    31 | ORIGIN = GUC_HXG_ORIGIN_HOST_                                |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_                        |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  27:0 | DATA0 = MBZ                                                  |
+ *  +---+-------+--------------------------------------------------------------+
+ *  |...|  31:0 | DATAn = only for **OPCODE** VFXPF_TESTLOOP_OPCODE_ECHO_      |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+#define GUC_RELAY_ACTION_VFXPF_TESTLOOP		(GUC_RELAY_ACTION_DEBUG_ONLY_START + 1)
+#define   VFXPF_TESTLOOP_OPCODE_NOP		0x0
+#define   VFXPF_TESTLOOP_OPCODE_BUSY		0xB
+#define   VFXPF_TESTLOOP_OPCODE_RETRY		0xD
+#define   VFXPF_TESTLOOP_OPCODE_ECHO		0xE
+#define   VFXPF_TESTLOOP_OPCODE_FAIL		0xF
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_relay_communication_abi.h b/drivers/gpu/drm/xe/abi/guc_relay_communication_abi.h
new file mode 100644
index 0000000000..f92625f047
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_relay_communication_abi.h
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_RELAY_COMMUNICATION_ABI_H
+#define _ABI_GUC_RELAY_COMMUNICATION_ABI_H
+
+#include <linux/build_bug.h>
+
+#include "guc_actions_sriov_abi.h"
+#include "guc_communication_ctb_abi.h"
+#include "guc_messages_abi.h"
+
+/**
+ * DOC: GuC Relay Communication
+ *
+ * The communication between Virtual Function (VF) drivers and Physical Function
+ * (PF) drivers is based on the GuC firmware acting as a proxy (relay) agent.
+ *
+ * To communicate with the PF driver, VF's drivers use `VF2GUC_RELAY_TO_PF`_
+ * action that takes the `Relay Message`_ as opaque payload and requires the
+ * relay message identifier (RID) as additional parameter.
+ *
+ * This identifier is used by the drivers to match related messages.
+ *
+ * The GuC forwards this `Relay Message`_ and its identifier to the PF driver
+ * in `GUC2PF_RELAY_FROM_VF`_ action. This event message additionally contains
+ * the identifier of the origin VF (VFID).
+ *
+ * Likewise, to communicate with the VF drivers, PF driver use
+ * `VF2GUC_RELAY_TO_PF`_ action that in addition to the `Relay Message`_
+ * and the relay message identifier (RID) also takes the target VF identifier.
+ *
+ * The GuC uses this target VFID from the message to select where to send the
+ * `GUC2VF_RELAY_FROM_PF`_ with the embedded `Relay Message`_ with response::
+ *
+ *      VF                             GuC                              PF
+ *      |                               |                               |
+ *     [ ] VF2GUC_RELAY_TO_PF           |                               |
+ *     [ ]---------------------------> [ ]                              |
+ *     [ ] { rid, msg }                [ ]                              |
+ *     [ ]                             [ ] GUC2PF_RELAY_FROM_VF         |
+ *     [ ]                             [ ]---------------------------> [ ]
+ *     [ ]                              |  { VFID, rid, msg }          [ ]
+ *     [ ]                              |                              [ ]
+ *     [ ]                              |           PF2GUC_RELAY_TO_VF [ ]
+ *     [ ]                             [ ] <---------------------------[ ]
+ *     [ ]                             [ ]        { VFID, rid, reply }  |
+ *     [ ]        GUC2VF_RELAY_FROM_PF [ ]                              |
+ *     [ ] <---------------------------[ ]                              |
+ *      |               { rid, reply }  |                               |
+ *      |                               |                               |
+ *
+ * It is also possible that PF driver will initiate communication with the
+ * selected VF driver. The same GuC action messages will be used::
+ *
+ *      VF                             GuC                              PF
+ *      |                               |                               |
+ *      |                               |           PF2GUC_RELAY_TO_VF [ ]
+ *      |                              [ ] <---------------------------[ ]
+ *      |                              [ ]          { VFID, rid, msg } [ ]
+ *      |         GUC2VF_RELAY_FROM_PF [ ]                             [ ]
+ *     [ ] <---------------------------[ ]                             [ ]
+ *     [ ]                { rid, msg }  |                              [ ]
+ *     [ ]                              |                              [ ]
+ *     [ ] VF2GUC_RELAY_TO_PF           |                              [ ]
+ *     [ ]---------------------------> [ ]                             [ ]
+ *      |  { rid, reply }              [ ]                             [ ]
+ *      |                              [ ] GUC2PF_RELAY_FROM_VF        [ ]
+ *      |                              [ ]---------------------------> [ ]
+ *      |                               | { VFID, rid, reply }          |
+ *      |                               |                               |
+ */
+
+/**
+ * DOC: Relay Message
+ *
+ * The `Relay Message`_ is used by Physical Function (PF) driver and Virtual
+ * Function (VF) drivers to communicate using `GuC Relay Communication`_.
+ *
+ * Format of the `Relay Message`_ follows format of the generic `HXG Message`_.
+ *
+ *  +--------------------------------------------------------------------------+
+ *  |  `Relay Message`_                                                        |
+ *  +==========================================================================+
+ *  |  `HXG Message`_                                                          |
+ *  +--------------------------------------------------------------------------+
+ *
+ * Maximum length of the `Relay Message`_ is limited by the maximum length of
+ * the `CTB HXG Message`_ and format of the `GUC2PF_RELAY_FROM_VF`_ message.
+ */
+
+#define GUC_RELAY_MSG_MIN_LEN GUC_HXG_MSG_MIN_LEN
+#define GUC_RELAY_MSG_MAX_LEN \
+	(GUC_CTB_MAX_DWORDS - GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN)
+
+static_assert(PF2GUC_RELAY_TO_VF_REQUEST_MSG_MIN_LEN >
+	      VF2GUC_RELAY_TO_PF_REQUEST_MSG_MIN_LEN);
+
+/**
+ * DOC: Relay Error Codes
+ *
+ * The `GuC Relay Communication`_ can be used to pass `Relay Message`_ between
+ * drivers that run on different Operating Systems. To help in troubleshooting,
+ * `GuC Relay Communication`_ uses error codes that mostly match errno values.
+ */
+
+#define GUC_RELAY_ERROR_UNDISCLOSED			0
+#define GUC_RELAY_ERROR_OPERATION_NOT_PERMITTED		1	/* EPERM */
+#define GUC_RELAY_ERROR_PERMISSION_DENIED		13	/* EACCES */
+#define GUC_RELAY_ERROR_INVALID_ARGUMENT		22	/* EINVAL */
+#define GUC_RELAY_ERROR_INVALID_REQUEST_CODE		56	/* EBADRQC */
+#define GUC_RELAY_ERROR_NO_DATA_AVAILABLE		61	/* ENODATA */
+#define GUC_RELAY_ERROR_PROTOCOL_ERROR			71	/* EPROTO */
+#define GUC_RELAY_ERROR_MESSAGE_SIZE			90	/* EMSGSIZE */
+
+#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
index d9a81e6a4b..cd46322761 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
@@ -19,14 +19,12 @@
 #include "xe_bo.h"
 #include "xe_pm.h"
 #include "xe_step.h"
-#include "i915_gem.h"
 #include "i915_gem_stolen.h"
 #include "i915_gpu_error.h"
 #include "i915_reg_defs.h"
 #include "i915_utils.h"
 #include "intel_gt_types.h"
 #include "intel_step.h"
-#include "intel_uc_fw.h"
 #include "intel_uncore.h"
 #include "intel_runtime_pm.h"
 #include <linux/pm_runtime.h>
@@ -41,12 +39,8 @@ static inline struct drm_i915_private *kdev_to_i915(struct device *kdev)
 	return dev_get_drvdata(kdev);
 }
 
-
-#define INTEL_JASPERLAKE 0
-#define INTEL_ELKHARTLAKE 0
 #define IS_PLATFORM(xe, x) ((xe)->info.platform == x)
 #define INTEL_INFO(dev_priv)	(&((dev_priv)->info))
-#define INTEL_DEVID(dev_priv)	((dev_priv)->info.devid)
 #define IS_I830(dev_priv)	(dev_priv && 0)
 #define IS_I845G(dev_priv)	(dev_priv && 0)
 #define IS_I85X(dev_priv)	(dev_priv && 0)
@@ -86,11 +80,10 @@ static inline struct drm_i915_private *kdev_to_i915(struct device *kdev)
 #define IS_ALDERLAKE_S(dev_priv) IS_PLATFORM(dev_priv, XE_ALDERLAKE_S)
 #define IS_ALDERLAKE_P(dev_priv) (IS_PLATFORM(dev_priv, XE_ALDERLAKE_P) || \
 				  IS_PLATFORM(dev_priv, XE_ALDERLAKE_N))
-#define IS_XEHPSDV(dev_priv) (dev_priv && 0)
 #define IS_DG2(dev_priv)	IS_PLATFORM(dev_priv, XE_DG2)
-#define IS_PONTEVECCHIO(dev_priv) IS_PLATFORM(dev_priv, XE_PVC)
 #define IS_METEORLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_METEORLAKE)
 #define IS_LUNARLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_LUNARLAKE)
+#define IS_BATTLEMAGE(dev_priv)  IS_PLATFORM(dev_priv, XE_BATTLEMAGE)
 
 #define IS_HASWELL_ULT(dev_priv) (dev_priv && 0)
 #define IS_BROADWELL_ULT(dev_priv) (dev_priv && 0)
@@ -98,19 +91,12 @@ static inline struct drm_i915_private *kdev_to_i915(struct device *kdev)
 
 #define IP_VER(ver, rel)                ((ver) << 8 | (rel))
 
-#define INTEL_DISPLAY_ENABLED(xe) (HAS_DISPLAY((xe)) && !intel_opregion_headless_sku((xe)))
-
-#define IS_GRAPHICS_VER(xe, first, last) \
-	((xe)->info.graphics_verx100 >= first * 100 && \
-	 (xe)->info.graphics_verx100 <= (last*100 + 99))
 #define IS_MOBILE(xe) (xe && 0)
-#define HAS_LLC(xe) (!IS_DGFX((xe)))
 
 #define HAS_GMD_ID(xe) GRAPHICS_VERx100(xe) >= 1270
 
 /* Workarounds not handled yet */
 #define IS_DISPLAY_STEP(xe, first, last) ({u8 __step = (xe)->info.step.display; first <= __step && __step <= last; })
-#define IS_GRAPHICS_STEP(xe, first, last) ({u8 __step = (xe)->info.step.graphics; first <= __step && __step <= last; })
 
 #define IS_LP(xe) (0)
 #define IS_GEN9_LP(xe) (0)
@@ -127,27 +113,6 @@ static inline struct drm_i915_private *kdev_to_i915(struct device *kdev)
 #define IS_KABYLAKE_ULT(xe) (xe && 0)
 #define IS_SKYLAKE_ULT(xe) (xe && 0)
 
-#define IS_DG1_GRAPHICS_STEP(xe, first, last) (IS_DG1(xe) && IS_GRAPHICS_STEP(xe, first, last))
-#define IS_DG2_GRAPHICS_STEP(xe, variant, first, last) \
-	((xe)->info.subplatform == XE_SUBPLATFORM_DG2_ ## variant && \
-	 IS_GRAPHICS_STEP(xe, first, last))
-#define IS_XEHPSDV_GRAPHICS_STEP(xe, first, last) (IS_XEHPSDV(xe) && IS_GRAPHICS_STEP(xe, first, last))
-
-/* XXX: No basedie stepping support yet */
-#define IS_PVC_BD_STEP(xe, first, last) (!WARN_ON(1) && IS_PONTEVECCHIO(xe))
-
-#define IS_TIGERLAKE_DISPLAY_STEP(xe, first, last) (IS_TIGERLAKE(xe) && IS_DISPLAY_STEP(xe, first, last))
-#define IS_ROCKETLAKE_DISPLAY_STEP(xe, first, last) (IS_ROCKETLAKE(xe) && IS_DISPLAY_STEP(xe, first, last))
-#define IS_DG1_DISPLAY_STEP(xe, first, last) (IS_DG1(xe) && IS_DISPLAY_STEP(xe, first, last))
-#define IS_DG2_DISPLAY_STEP(xe, first, last) (IS_DG2(xe) && IS_DISPLAY_STEP(xe, first, last))
-#define IS_ADLP_DISPLAY_STEP(xe, first, last) (IS_ALDERLAKE_P(xe) && IS_DISPLAY_STEP(xe, first, last))
-#define IS_ADLS_DISPLAY_STEP(xe, first, last) (IS_ALDERLAKE_S(xe) && IS_DISPLAY_STEP(xe, first, last))
-#define IS_JSL_EHL_DISPLAY_STEP(xe, first, last) (IS_JSL_EHL(xe) && IS_DISPLAY_STEP(xe, first, last))
-#define IS_MTL_DISPLAY_STEP(xe, first, last) (IS_METEORLAKE(xe) && IS_DISPLAY_STEP(xe, first, last))
-
-/* FIXME: Add subplatform here */
-#define IS_MTL_GRAPHICS_STEP(xe, sub, first, last) (IS_METEORLAKE(xe) && IS_DISPLAY_STEP(xe, first, last))
-
 #define IS_DG2_G10(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G10)
 #define IS_DG2_G11(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G11)
 #define IS_DG2_G12(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G12)
@@ -155,30 +120,31 @@ static inline struct drm_i915_private *kdev_to_i915(struct device *kdev)
 #define IS_ICL_WITH_PORT_F(xe) (xe && 0)
 #define HAS_FLAT_CCS(xe) (xe_device_has_flat_ccs(xe))
 #define to_intel_bo(x) gem_to_xe_bo((x))
-#define mkwrite_device_info(xe) (INTEL_INFO(xe))
 
 #define HAS_128_BYTE_Y_TILING(xe) (xe || 1)
 
-#define intel_has_gpu_reset(a) (a && 0)
-
 #include "intel_wakeref.h"
 
-static inline bool intel_runtime_pm_get(struct xe_runtime_pm *pm)
+static inline intel_wakeref_t intel_runtime_pm_get(struct xe_runtime_pm *pm)
 {
 	struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm);
 
-	if (xe_pm_runtime_get(xe) < 0) {
-		xe_pm_runtime_put(xe);
-		return false;
-	}
-	return true;
+	return xe_pm_runtime_resume_and_get(xe);
 }
 
-static inline bool intel_runtime_pm_get_if_in_use(struct xe_runtime_pm *pm)
+static inline intel_wakeref_t intel_runtime_pm_get_if_in_use(struct xe_runtime_pm *pm)
 {
 	struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm);
 
-	return xe_pm_runtime_get_if_active(xe);
+	return xe_pm_runtime_get_if_in_use(xe);
+}
+
+static inline intel_wakeref_t intel_runtime_pm_get_noresume(struct xe_runtime_pm *pm)
+{
+	struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm);
+
+	xe_pm_runtime_get_noresume(xe);
+	return true;
 }
 
 static inline void intel_runtime_pm_put_unchecked(struct xe_runtime_pm *pm)
@@ -188,7 +154,7 @@ static inline void intel_runtime_pm_put_unchecked(struct xe_runtime_pm *pm)
 	xe_pm_runtime_put(xe);
 }
 
-static inline void intel_runtime_pm_put(struct xe_runtime_pm *pm, bool wakeref)
+static inline void intel_runtime_pm_put(struct xe_runtime_pm *pm, intel_wakeref_t wakeref)
 {
 	if (wakeref)
 		intel_runtime_pm_put_unchecked(pm);
@@ -218,7 +184,6 @@ struct i915_sched_attr {
 #define RUNTIME_INFO(xe)		(&(xe)->info.i915_runtime)
 
 #define FORCEWAKE_ALL XE_FORCEWAKE_ALL
-#define HPD_STORM_DEFAULT_THRESHOLD 50
 
 #ifdef CONFIG_ARM64
 /*
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_fixed.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_fixed.h
deleted file mode 100644
index 12c671fd52..0000000000
--- a/drivers/gpu/drm/xe/compat-i915-headers/i915_fixed.h
+++ /dev/null
@@ -1,6 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2023 Intel Corporation
- */
-
-#include "../../i915/i915_fixed.h"
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_gem.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem.h
deleted file mode 100644
index 06b723a479..0000000000
--- a/drivers/gpu/drm/xe/compat-i915-headers/i915_gem.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2023 Intel Corporation
- */
-
-#ifndef __I915_GEM_H__
-#define __I915_GEM_H__
-#define GEM_BUG_ON
-#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h
index 888e7a87a9..cb6c759882 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h
@@ -17,7 +17,15 @@ static inline int i915_gem_stolen_insert_node_in_range(struct xe_device *xe,
 {
 	struct xe_bo *bo;
 	int err;
-	u32 flags = XE_BO_CREATE_PINNED_BIT | XE_BO_CREATE_STOLEN_BIT;
+	u32 flags = XE_BO_FLAG_PINNED | XE_BO_FLAG_STOLEN;
+
+	if (start < SZ_4K)
+		start = SZ_4K;
+
+	if (align) {
+		size = ALIGN(size, align);
+		start = ALIGN(start, align);
+	}
 
 	bo = xe_bo_create_locked_range(xe, xe_device_get_root_tile(xe),
 				       NULL, size, start, end,
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_vgpu.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_vgpu.h
index 80b024d435..4931c7198f 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/i915_vgpu.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_vgpu.h
@@ -9,36 +9,10 @@
 #include <linux/types.h>
 
 struct drm_i915_private;
-struct i915_ggtt;
 
-static inline void intel_vgpu_detect(struct drm_i915_private *i915)
-{
-}
 static inline bool intel_vgpu_active(struct drm_i915_private *i915)
 {
 	return false;
 }
-static inline void intel_vgpu_register(struct drm_i915_private *i915)
-{
-}
-static inline bool intel_vgpu_has_full_ppgtt(struct drm_i915_private *i915)
-{
-	return false;
-}
-static inline bool intel_vgpu_has_hwsp_emulation(struct drm_i915_private *i915)
-{
-	return false;
-}
-static inline bool intel_vgpu_has_huge_gtt(struct drm_i915_private *i915)
-{
-	return false;
-}
-static inline int intel_vgt_balloon(struct i915_ggtt *ggtt)
-{
-	return 0;
-}
-static inline void intel_vgt_deballoon(struct i915_ggtt *ggtt)
-{
-}
 
 #endif /* _I915_VGPU_H_ */
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_uc_fw.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_uc_fw.h
deleted file mode 100644
index 0097453289..0000000000
--- a/drivers/gpu/drm/xe/compat-i915-headers/intel_uc_fw.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2023 Intel Corporation
- */
-
-#ifndef _INTEL_UC_FW_H_
-#define _INTEL_UC_FW_H_
-
-#define INTEL_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git"
-
-#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h
index cd26ddc0f6..ef79793caa 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h
@@ -25,15 +25,15 @@ static inline u32 intel_uncore_read(struct intel_uncore *uncore,
 	return xe_mmio_read32(__compat_uncore_to_gt(uncore), reg);
 }
 
-static inline u32 intel_uncore_read8(struct intel_uncore *uncore,
-				     i915_reg_t i915_reg)
+static inline u8 intel_uncore_read8(struct intel_uncore *uncore,
+				    i915_reg_t i915_reg)
 {
 	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
 
 	return xe_mmio_read8(__compat_uncore_to_gt(uncore), reg);
 }
 
-static inline u32 intel_uncore_read16(struct intel_uncore *uncore,
+static inline u16 intel_uncore_read16(struct intel_uncore *uncore,
 				      i915_reg_t i915_reg)
 {
 	struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
diff --git a/drivers/gpu/drm/xe/display/intel_fb_bo.c b/drivers/gpu/drm/xe/display/intel_fb_bo.c
index a9c1f9885c..e18521acc5 100644
--- a/drivers/gpu/drm/xe/display/intel_fb_bo.c
+++ b/drivers/gpu/drm/xe/display/intel_fb_bo.c
@@ -11,7 +11,7 @@
 
 void intel_fb_bo_framebuffer_fini(struct xe_bo *bo)
 {
-	if (bo->flags & XE_BO_CREATE_PINNED_BIT) {
+	if (bo->flags & XE_BO_FLAG_PINNED) {
 		/* Unpin our kernel fb first */
 		xe_bo_lock(bo, false);
 		xe_bo_unpin(bo);
@@ -33,9 +33,9 @@ int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb,
 	if (ret)
 		goto err;
 
-	if (!(bo->flags & XE_BO_SCANOUT_BIT)) {
+	if (!(bo->flags & XE_BO_FLAG_SCANOUT)) {
 		/*
-		 * XE_BO_SCANOUT_BIT should ideally be set at creation, or is
+		 * XE_BO_FLAG_SCANOUT should ideally be set at creation, or is
 		 * automatically set when creating FB. We cannot change caching
 		 * mode when the boect is VM_BINDed, so we can only set
 		 * coherency with display when unbound.
@@ -45,7 +45,7 @@ int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb,
 			ret = -EINVAL;
 			goto err;
 		}
-		bo->flags |= XE_BO_SCANOUT_BIT;
+		bo->flags |= XE_BO_FLAG_SCANOUT;
 	}
 	ttm_bo_unreserve(&bo->ttm);
 	return 0;
diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c
index 51ae3561fd..9e4bcfdbc7 100644
--- a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c
+++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c
@@ -42,9 +42,9 @@ struct drm_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper,
 	if (!IS_DGFX(dev_priv)) {
 		obj = xe_bo_create_pin_map(dev_priv, xe_device_get_root_tile(dev_priv),
 					   NULL, size,
-					   ttm_bo_type_kernel, XE_BO_SCANOUT_BIT |
-					   XE_BO_CREATE_STOLEN_BIT |
-					   XE_BO_CREATE_PINNED_BIT);
+					   ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT |
+					   XE_BO_FLAG_STOLEN |
+					   XE_BO_FLAG_PINNED);
 		if (!IS_ERR(obj))
 			drm_info(&dev_priv->drm, "Allocated fbdev into stolen\n");
 		else
@@ -52,9 +52,9 @@ struct drm_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper,
 	}
 	if (IS_ERR(obj)) {
 		obj = xe_bo_create_pin_map(dev_priv, xe_device_get_root_tile(dev_priv), NULL, size,
-					  ttm_bo_type_kernel, XE_BO_SCANOUT_BIT |
-					  XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(dev_priv)) |
-					  XE_BO_CREATE_PINNED_BIT);
+					  ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT |
+					  XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(dev_priv)) |
+					  XE_BO_FLAG_PINNED);
 	}
 
 	if (IS_ERR(obj)) {
@@ -81,8 +81,8 @@ int intel_fbdev_fb_fill_info(struct drm_i915_private *i915, struct fb_info *info
 {
 	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
 
-	if (!(obj->flags & XE_BO_CREATE_SYSTEM_BIT)) {
-		if (obj->flags & XE_BO_CREATE_STOLEN_BIT)
+	if (!(obj->flags & XE_BO_FLAG_SYSTEM)) {
+		if (obj->flags & XE_BO_FLAG_STOLEN)
 			info->fix.smem_start = xe_ttm_stolen_io_offset(obj, 0);
 		else
 			info->fix.smem_start =
diff --git a/drivers/gpu/drm/xe/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c
index 6ec375c1c4..0de0566e5b 100644
--- a/drivers/gpu/drm/xe/xe_display.c
+++ b/drivers/gpu/drm/xe/display/xe_display.c
@@ -51,14 +51,6 @@ bool xe_display_driver_probe_defer(struct pci_dev *pdev)
 	return intel_display_driver_probe_defer(pdev);
 }
 
-static void xe_display_last_close(struct drm_device *dev)
-{
-	struct xe_device *xe = to_xe_device(dev);
-
-	if (xe->info.enable_display)
-		intel_fbdev_restore_mode(to_xe_device(dev));
-}
-
 /**
  * xe_display_driver_set_hooks - Add driver flags and hooks for display
  * @driver: DRM device driver
@@ -73,7 +65,6 @@ void xe_display_driver_set_hooks(struct drm_driver *driver)
 		return;
 
 	driver->driver_features |= DRIVER_MODESET | DRIVER_ATOMIC;
-	driver->lastclose = xe_display_last_close;
 }
 
 static void unset_display_features(struct xe_device *xe)
@@ -101,8 +92,6 @@ static void display_destroy(struct drm_device *dev, void *dummy)
  */
 int xe_display_create(struct xe_device *xe)
 {
-	int err;
-
 	spin_lock_init(&xe->display.fb_tracking.lock);
 
 	xe->display.hotplug.dp_wq = alloc_ordered_workqueue("xe-dp", 0);
@@ -110,11 +99,7 @@ int xe_display_create(struct xe_device *xe)
 	drmm_mutex_init(&xe->drm, &xe->sb_lock);
 	xe->enabled_irq_mask = ~0;
 
-	err = drmm_add_action_or_reset(&xe->drm, display_destroy, NULL);
-	if (err)
-		return err;
-
-	return 0;
+	return drmm_add_action_or_reset(&xe->drm, display_destroy, NULL);
 }
 
 static void xe_display_fini_nommio(struct drm_device *dev, void *dummy)
@@ -218,9 +203,7 @@ void xe_display_fini(struct xe_device *xe)
 	if (!xe->info.enable_display)
 		return;
 
-	/* poll work can call into fbdev, hence clean that up afterwards */
 	intel_hpd_poll_fini(xe);
-	intel_fbdev_fini(xe);
 
 	intel_hdcp_component_fini(xe);
 	intel_audio_deinit(xe);
diff --git a/drivers/gpu/drm/xe/xe_display.h b/drivers/gpu/drm/xe/display/xe_display.h
index 710e56180b..710e56180b 100644
--- a/drivers/gpu/drm/xe/xe_display.h
+++ b/drivers/gpu/drm/xe/display/xe_display.h
diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
index 27c2fb1c00..44c9fd2143 100644
--- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
+++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
@@ -45,8 +45,8 @@ bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *d
 	obj = xe_bo_create_pin_map(i915, xe_device_get_root_tile(i915),
 				   NULL, PAGE_ALIGN(size),
 				   ttm_bo_type_kernel,
-				   XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(i915)) |
-				   XE_BO_CREATE_GGTT_BIT);
+				   XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(i915)) |
+				   XE_BO_FLAG_GGTT);
 	if (IS_ERR(obj)) {
 		kfree(vma);
 		return false;
diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c
index 722c84a566..3e1ae37c4c 100644
--- a/drivers/gpu/drm/xe/display/xe_fb_pin.c
+++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c
@@ -10,6 +10,7 @@
 #include "intel_fb_pin.h"
 #include "xe_ggtt.h"
 #include "xe_gt.h"
+#include "xe_pm.h"
 
 #include <drm/ttm/ttm_bo.h>
 
@@ -30,7 +31,7 @@ write_dpt_rotated(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, u32 bo_
 
 		for (row = 0; row < height; row++) {
 			u64 pte = ggtt->pt_ops->pte_encode_bo(bo, src_idx * XE_PAGE_SIZE,
-							      xe->pat.idx[XE_CACHE_WB]);
+							      xe->pat.idx[XE_CACHE_NONE]);
 
 			iosys_map_wr(map, *dpt_ofs, u64, pte);
 			*dpt_ofs += 8;
@@ -62,7 +63,7 @@ write_dpt_remapped(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs,
 		for (column = 0; column < width; column++) {
 			iosys_map_wr(map, *dpt_ofs, u64,
 				     pte_encode_bo(bo, src_idx * XE_PAGE_SIZE,
-				     xe->pat.idx[XE_CACHE_WB]));
+				     xe->pat.idx[XE_CACHE_NONE]));
 
 			*dpt_ofs += 8;
 			src_idx++;
@@ -99,18 +100,21 @@ static int __xe_pin_fb_vma_dpt(struct intel_framebuffer *fb,
 	if (IS_DGFX(xe))
 		dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size,
 					   ttm_bo_type_kernel,
-					   XE_BO_CREATE_VRAM0_BIT |
-					   XE_BO_CREATE_GGTT_BIT);
+					   XE_BO_FLAG_VRAM0 |
+					   XE_BO_FLAG_GGTT |
+					   XE_BO_FLAG_PAGETABLE);
 	else
 		dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size,
 					   ttm_bo_type_kernel,
-					   XE_BO_CREATE_STOLEN_BIT |
-					   XE_BO_CREATE_GGTT_BIT);
+					   XE_BO_FLAG_STOLEN |
+					   XE_BO_FLAG_GGTT |
+					   XE_BO_FLAG_PAGETABLE);
 	if (IS_ERR(dpt))
 		dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size,
 					   ttm_bo_type_kernel,
-					   XE_BO_CREATE_SYSTEM_BIT |
-					   XE_BO_CREATE_GGTT_BIT);
+					   XE_BO_FLAG_SYSTEM |
+					   XE_BO_FLAG_GGTT |
+					   XE_BO_FLAG_PAGETABLE);
 	if (IS_ERR(dpt))
 		return PTR_ERR(dpt);
 
@@ -119,7 +123,7 @@ static int __xe_pin_fb_vma_dpt(struct intel_framebuffer *fb,
 
 		for (x = 0; x < size / XE_PAGE_SIZE; x++) {
 			u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x * XE_PAGE_SIZE,
-							      xe->pat.idx[XE_CACHE_WB]);
+							      xe->pat.idx[XE_CACHE_NONE]);
 
 			iosys_map_wr(&dpt->vmap, x * 8, u64, pte);
 		}
@@ -165,7 +169,7 @@ write_ggtt_rotated(struct xe_bo *bo, struct xe_ggtt *ggtt, u32 *ggtt_ofs, u32 bo
 
 		for (row = 0; row < height; row++) {
 			u64 pte = ggtt->pt_ops->pte_encode_bo(bo, src_idx * XE_PAGE_SIZE,
-							      xe->pat.idx[XE_CACHE_WB]);
+							      xe->pat.idx[XE_CACHE_NONE]);
 
 			xe_ggtt_set_pte(ggtt, *ggtt_ofs, pte);
 			*ggtt_ofs += XE_PAGE_SIZE;
@@ -190,7 +194,7 @@ static int __xe_pin_fb_vma_ggtt(struct intel_framebuffer *fb,
 	/* TODO: Consider sharing framebuffer mapping?
 	 * embed i915_vma inside intel_framebuffer
 	 */
-	xe_device_mem_access_get(tile_to_xe(ggtt->tile));
+	xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile));
 	ret = mutex_lock_interruptible(&ggtt->lock);
 	if (ret)
 		goto out;
@@ -211,7 +215,7 @@ static int __xe_pin_fb_vma_ggtt(struct intel_framebuffer *fb,
 
 		for (x = 0; x < size; x += XE_PAGE_SIZE) {
 			u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x,
-							      xe->pat.idx[XE_CACHE_WB]);
+							      xe->pat.idx[XE_CACHE_NONE]);
 
 			xe_ggtt_set_pte(ggtt, vma->node.start + x, pte);
 		}
@@ -238,11 +242,10 @@ static int __xe_pin_fb_vma_ggtt(struct intel_framebuffer *fb,
 					   rot_info->plane[i].dst_stride);
 	}
 
-	xe_ggtt_invalidate(ggtt);
 out_unlock:
 	mutex_unlock(&ggtt->lock);
 out:
-	xe_device_mem_access_put(tile_to_xe(ggtt->tile));
+	xe_pm_runtime_put(tile_to_xe(ggtt->tile));
 	return ret;
 }
 
@@ -260,7 +263,7 @@ static struct i915_vma *__xe_pin_fb_vma(struct intel_framebuffer *fb,
 
 	if (IS_DGFX(to_xe_device(bo->ttm.base.dev)) &&
 	    intel_fb_rc_ccs_cc_plane(&fb->base) >= 0 &&
-	    !(bo->flags & XE_BO_NEEDS_CPU_ACCESS)) {
+	    !(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS)) {
 		struct xe_tile *tile = xe_device_get_root_tile(xe);
 
 		/*
@@ -321,7 +324,7 @@ static void __xe_unpin_fb_vma(struct i915_vma *vma)
 		xe_bo_unpin_map_no_vm(vma->dpt);
 	else if (!drm_mm_node_allocated(&vma->bo->ggtt_node) ||
 		 vma->bo->ggtt_node.start != vma->node.start)
-		xe_ggtt_remove_node(ggtt, &vma->node);
+		xe_ggtt_remove_node(ggtt, &vma->node, false);
 
 	ttm_bo_reserve(&vma->bo->ttm, false, false, NULL);
 	ttm_bo_unpin(&vma->bo->ttm);
@@ -353,7 +356,7 @@ int intel_plane_pin_fb(struct intel_plane_state *plane_state)
 	struct i915_vma *vma;
 
 	/* We reject creating !SCANOUT fb's, so this is weird.. */
-	drm_WARN_ON(bo->ttm.base.dev, !(bo->flags & XE_BO_SCANOUT_BIT));
+	drm_WARN_ON(bo->ttm.base.dev, !(bo->flags & XE_BO_FLAG_SCANOUT));
 
 	vma = __xe_pin_fb_vma(to_intel_framebuffer(fb), &plane_state->view.gtt);
 	if (IS_ERR(vma))
@@ -381,4 +384,4 @@ struct i915_address_space *intel_dpt_create(struct intel_framebuffer *fb)
 void intel_dpt_destroy(struct i915_address_space *vm)
 {
 	return;
-}
-\ No newline at end of file
+}
diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
index 0f11a39333..b3d3c065dd 100644
--- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
+++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
@@ -3,32 +3,254 @@
  * Copyright 2023, Intel Corporation.
  */
 
-#include "i915_drv.h"
+#include <drm/drm_print.h>
+#include <drm/i915_hdcp_interface.h>
+#include <linux/delay.h>
+
+#include "abi/gsc_command_header_abi.h"
 #include "intel_hdcp_gsc.h"
+#include "intel_hdcp_gsc_message.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_device_types.h"
+#include "xe_gsc_proxy.h"
+#include "xe_gsc_submit.h"
+#include "xe_gt.h"
+#include "xe_map.h"
+#include "xe_pm.h"
+#include "xe_uc_fw.h"
+
+#define HECI_MEADDRESS_HDCP 18
+
+struct intel_hdcp_gsc_message {
+	struct xe_bo *hdcp_bo;
+	u64 hdcp_cmd_in;
+	u64 hdcp_cmd_out;
+};
 
-bool intel_hdcp_gsc_cs_required(struct drm_i915_private *i915)
+#define HDCP_GSC_HEADER_SIZE sizeof(struct intel_gsc_mtl_header)
+
+bool intel_hdcp_gsc_cs_required(struct xe_device *xe)
 {
-	return true;
+	return DISPLAY_VER(xe) >= 14;
 }
 
-bool intel_hdcp_gsc_check_status(struct drm_i915_private *i915)
+bool intel_hdcp_gsc_check_status(struct xe_device *xe)
 {
-	return false;
+	struct xe_tile *tile = xe_device_get_root_tile(xe);
+	struct xe_gt *gt = tile->media_gt;
+	bool ret = true;
+
+	if (!xe_uc_fw_is_enabled(&gt->uc.gsc.fw))
+		return false;
+
+	xe_pm_runtime_get(xe);
+	if (xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC)) {
+		drm_dbg_kms(&xe->drm,
+			    "failed to get forcewake to check proxy status\n");
+		ret = false;
+		goto out;
+	}
+
+	if (!xe_gsc_proxy_init_done(&gt->uc.gsc))
+		ret = false;
+
+	xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC);
+out:
+	xe_pm_runtime_put(xe);
+	return ret;
 }
 
-int intel_hdcp_gsc_init(struct drm_i915_private *i915)
+/*This function helps allocate memory for the command that we will send to gsc cs */
+static int intel_hdcp_gsc_initialize_message(struct xe_device *xe,
+					     struct intel_hdcp_gsc_message *hdcp_message)
 {
-	drm_info(&i915->drm, "HDCP support not yet implemented\n");
-	return -ENODEV;
+	struct xe_bo *bo = NULL;
+	u64 cmd_in, cmd_out;
+	int ret = 0;
+
+	/* allocate object of two page for HDCP command memory and store it */
+	bo = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe), NULL, PAGE_SIZE * 2,
+				  ttm_bo_type_kernel,
+				  XE_BO_FLAG_SYSTEM |
+				  XE_BO_FLAG_GGTT);
+
+	if (IS_ERR(bo)) {
+		drm_err(&xe->drm, "Failed to allocate bo for HDCP streaming command!\n");
+		ret = PTR_ERR(bo);
+		goto out;
+	}
+
+	cmd_in = xe_bo_ggtt_addr(bo);
+	cmd_out = cmd_in + PAGE_SIZE;
+	xe_map_memset(xe, &bo->vmap, 0, 0, bo->size);
+
+	hdcp_message->hdcp_bo = bo;
+	hdcp_message->hdcp_cmd_in = cmd_in;
+	hdcp_message->hdcp_cmd_out = cmd_out;
+out:
+	return ret;
 }
 
-void intel_hdcp_gsc_fini(struct drm_i915_private *i915)
+static int intel_hdcp_gsc_hdcp2_init(struct xe_device *xe)
 {
+	struct intel_hdcp_gsc_message *hdcp_message;
+	int ret;
+
+	hdcp_message = kzalloc(sizeof(*hdcp_message), GFP_KERNEL);
+
+	if (!hdcp_message)
+		return -ENOMEM;
+
+	/*
+	 * NOTE: No need to lock the comp mutex here as it is already
+	 * going to be taken before this function called
+	 */
+	ret = intel_hdcp_gsc_initialize_message(xe, hdcp_message);
+	if (ret) {
+		drm_err(&xe->drm, "Could not initialize hdcp_message\n");
+		kfree(hdcp_message);
+		return ret;
+	}
+
+	xe->display.hdcp.hdcp_message = hdcp_message;
+	return ret;
 }
 
-ssize_t intel_hdcp_gsc_msg_send(struct drm_i915_private *i915, u8 *msg_in,
+static const struct i915_hdcp_ops gsc_hdcp_ops = {
+	.initiate_hdcp2_session = intel_hdcp_gsc_initiate_session,
+	.verify_receiver_cert_prepare_km =
+				intel_hdcp_gsc_verify_receiver_cert_prepare_km,
+	.verify_hprime = intel_hdcp_gsc_verify_hprime,
+	.store_pairing_info = intel_hdcp_gsc_store_pairing_info,
+	.initiate_locality_check = intel_hdcp_gsc_initiate_locality_check,
+	.verify_lprime = intel_hdcp_gsc_verify_lprime,
+	.get_session_key = intel_hdcp_gsc_get_session_key,
+	.repeater_check_flow_prepare_ack =
+				intel_hdcp_gsc_repeater_check_flow_prepare_ack,
+	.verify_mprime = intel_hdcp_gsc_verify_mprime,
+	.enable_hdcp_authentication = intel_hdcp_gsc_enable_authentication,
+	.close_hdcp_session = intel_hdcp_gsc_close_session,
+};
+
+int intel_hdcp_gsc_init(struct xe_device *xe)
+{
+	struct i915_hdcp_arbiter *data;
+	int ret;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	mutex_lock(&xe->display.hdcp.hdcp_mutex);
+	xe->display.hdcp.arbiter = data;
+	xe->display.hdcp.arbiter->hdcp_dev = xe->drm.dev;
+	xe->display.hdcp.arbiter->ops = &gsc_hdcp_ops;
+	ret = intel_hdcp_gsc_hdcp2_init(xe);
+	if (ret)
+		kfree(data);
+
+	mutex_unlock(&xe->display.hdcp.hdcp_mutex);
+
+	return ret;
+}
+
+void intel_hdcp_gsc_fini(struct xe_device *xe)
+{
+	struct intel_hdcp_gsc_message *hdcp_message =
+					xe->display.hdcp.hdcp_message;
+	struct i915_hdcp_arbiter *arb = xe->display.hdcp.arbiter;
+
+	if (hdcp_message) {
+		xe_bo_unpin_map_no_vm(hdcp_message->hdcp_bo);
+		kfree(hdcp_message);
+		xe->display.hdcp.hdcp_message = NULL;
+	}
+
+	kfree(arb);
+	xe->display.hdcp.arbiter = NULL;
+}
+
+static int xe_gsc_send_sync(struct xe_device *xe,
+			    struct intel_hdcp_gsc_message *hdcp_message,
+			    u32 msg_size_in, u32 msg_size_out,
+			    u32 addr_out_off)
+{
+	struct xe_gt *gt = hdcp_message->hdcp_bo->tile->media_gt;
+	struct iosys_map *map = &hdcp_message->hdcp_bo->vmap;
+	struct xe_gsc *gsc = &gt->uc.gsc;
+	int ret;
+
+	ret = xe_gsc_pkt_submit_kernel(gsc, hdcp_message->hdcp_cmd_in, msg_size_in,
+				       hdcp_message->hdcp_cmd_out, msg_size_out);
+	if (ret) {
+		drm_err(&xe->drm, "failed to send gsc HDCP msg (%d)\n", ret);
+		return ret;
+	}
+
+	if (xe_gsc_check_and_update_pending(xe, map, 0, map, addr_out_off))
+		return -EAGAIN;
+
+	ret = xe_gsc_read_out_header(xe, map, addr_out_off,
+				     sizeof(struct hdcp_cmd_header), NULL);
+
+	return ret;
+}
+
+ssize_t intel_hdcp_gsc_msg_send(struct xe_device *xe, u8 *msg_in,
 				size_t msg_in_len, u8 *msg_out,
 				size_t msg_out_len)
 {
-	return -ENODEV;
+	const size_t max_msg_size = PAGE_SIZE - HDCP_GSC_HEADER_SIZE;
+	struct intel_hdcp_gsc_message *hdcp_message;
+	u64 host_session_id;
+	u32 msg_size_in, msg_size_out;
+	u32 addr_out_off, addr_in_wr_off = 0;
+	int ret, tries = 0;
+
+	if (msg_in_len > max_msg_size || msg_out_len > max_msg_size) {
+		ret = -ENOSPC;
+		goto out;
+	}
+
+	msg_size_in = msg_in_len + HDCP_GSC_HEADER_SIZE;
+	msg_size_out = msg_out_len + HDCP_GSC_HEADER_SIZE;
+	hdcp_message = xe->display.hdcp.hdcp_message;
+	addr_out_off = PAGE_SIZE;
+
+	host_session_id = xe_gsc_create_host_session_id();
+	xe_pm_runtime_get_noresume(xe);
+	addr_in_wr_off = xe_gsc_emit_header(xe, &hdcp_message->hdcp_bo->vmap,
+					    addr_in_wr_off, HECI_MEADDRESS_HDCP,
+					    host_session_id, msg_in_len);
+	xe_map_memcpy_to(xe, &hdcp_message->hdcp_bo->vmap, addr_in_wr_off,
+			 msg_in, msg_in_len);
+	/*
+	 * Keep sending request in case the pending bit is set no need to add
+	 * message handle as we are using same address hence loc. of header is
+	 * same and it will contain the message handle. we will send the message
+	 * 20 times each message 50 ms apart
+	 */
+	do {
+		ret = xe_gsc_send_sync(xe, hdcp_message, msg_size_in, msg_size_out,
+				       addr_out_off);
+
+		/* Only try again if gsc says so */
+		if (ret != -EAGAIN)
+			break;
+
+		msleep(50);
+
+	} while (++tries < 20);
+
+	if (ret)
+		goto out;
+
+	xe_map_memcpy_from(xe, msg_out, &hdcp_message->hdcp_bo->vmap,
+			   addr_out_off + HDCP_GSC_HEADER_SIZE,
+			   msg_out_len);
+
+out:
+	xe_pm_runtime_put(xe);
+	return ret;
 }
diff --git a/drivers/gpu/drm/xe/display/xe_plane_initial.c b/drivers/gpu/drm/xe/display/xe_plane_initial.c
index ccf83c12b5..9693c56d38 100644
--- a/drivers/gpu/drm/xe/display/xe_plane_initial.c
+++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c
@@ -6,10 +6,12 @@
 /* for ioread64 */
 #include <linux/io-64-nonatomic-lo-hi.h>
 
+#include "regs/xe_gtt_defs.h"
 #include "xe_ggtt.h"
 
 #include "i915_drv.h"
 #include "intel_atomic_plane.h"
+#include "intel_crtc.h"
 #include "intel_display.h"
 #include "intel_display_types.h"
 #include "intel_fb.h"
@@ -18,19 +20,20 @@
 #include "intel_plane_initial.h"
 
 static bool
-intel_reuse_initial_plane_obj(struct drm_i915_private *i915,
-			      const struct intel_initial_plane_config *plane_config,
+intel_reuse_initial_plane_obj(struct intel_crtc *this,
+			      const struct intel_initial_plane_config plane_configs[],
 			      struct drm_framebuffer **fb)
 {
+	struct drm_i915_private *i915 = to_i915(this->base.dev);
 	struct intel_crtc *crtc;
 
 	for_each_intel_crtc(&i915->drm, crtc) {
-		struct intel_crtc_state *crtc_state =
-			to_intel_crtc_state(crtc->base.state);
 		struct intel_plane *plane =
 			to_intel_plane(crtc->base.primary);
-		struct intel_plane_state *plane_state =
+		const struct intel_plane_state *plane_state =
 			to_intel_plane_state(plane->base.state);
+		const struct intel_crtc_state *crtc_state =
+			to_intel_crtc_state(crtc->base.state);
 
 		if (!crtc_state->uapi.active)
 			continue;
@@ -38,7 +41,7 @@ intel_reuse_initial_plane_obj(struct drm_i915_private *i915,
 		if (!plane_state->ggtt_vma)
 			continue;
 
-		if (intel_plane_ggtt_offset(plane_state) == plane_config->base) {
+		if (plane_configs[this->pipe].base == plane_configs[crtc->pipe].base) {
 			*fb = plane_state->hw.fb;
 			return true;
 		}
@@ -60,7 +63,7 @@ initial_plane_bo(struct xe_device *xe,
 	if (plane_config->size == 0)
 		return NULL;
 
-	flags = XE_BO_CREATE_PINNED_BIT | XE_BO_SCANOUT_BIT | XE_BO_CREATE_GGTT_BIT;
+	flags = XE_BO_FLAG_PINNED | XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT;
 
 	base = round_down(plane_config->base, page_size);
 	if (IS_DGFX(xe)) {
@@ -77,7 +80,7 @@ initial_plane_bo(struct xe_device *xe,
 		}
 
 		phys_base = pte & ~(page_size - 1);
-		flags |= XE_BO_CREATE_VRAM0_BIT;
+		flags |= XE_BO_FLAG_VRAM0;
 
 		/*
 		 * We don't currently expect this to ever be placed in the
@@ -99,7 +102,7 @@ initial_plane_bo(struct xe_device *xe,
 		if (!stolen)
 			return NULL;
 		phys_base = base;
-		flags |= XE_BO_CREATE_STOLEN_BIT;
+		flags |= XE_BO_FLAG_STOLEN;
 
 		/*
 		 * If the FB is too big, just don't use it since fbdev is not very
@@ -178,10 +181,10 @@ err_bo:
 
 static void
 intel_find_initial_plane_obj(struct intel_crtc *crtc,
-			     struct intel_initial_plane_config *plane_config)
+			     struct intel_initial_plane_config plane_configs[])
 {
-	struct drm_device *dev = crtc->base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct intel_initial_plane_config *plane_config =
+		&plane_configs[crtc->pipe];
 	struct intel_plane *plane =
 		to_intel_plane(crtc->base.primary);
 	struct intel_plane_state *plane_state =
@@ -201,7 +204,7 @@ intel_find_initial_plane_obj(struct intel_crtc *crtc,
 
 	if (intel_alloc_initial_plane_obj(crtc, plane_config))
 		fb = &plane_config->fb->base;
-	else if (!intel_reuse_initial_plane_obj(dev_priv, plane_config, &fb))
+	else if (!intel_reuse_initial_plane_obj(crtc, plane_configs, &fb))
 		goto nofb;
 
 	plane_state->uapi.rotation = plane_config->rotation;
@@ -267,25 +270,36 @@ static void plane_config_fini(struct intel_initial_plane_config *plane_config)
 	}
 }
 
-void intel_crtc_initial_plane_config(struct intel_crtc *crtc)
+void intel_initial_plane_config(struct drm_i915_private *i915)
 {
-	struct xe_device *xe = to_xe_device(crtc->base.dev);
-	struct intel_initial_plane_config plane_config = {};
+	struct intel_initial_plane_config plane_configs[I915_MAX_PIPES] = {};
+	struct intel_crtc *crtc;
 
-	/*
-	 * Note that reserving the BIOS fb up front prevents us
-	 * from stuffing other stolen allocations like the ring
-	 * on top.  This prevents some ugliness at boot time, and
-	 * can even allow for smooth boot transitions if the BIOS
-	 * fb is large enough for the active pipe configuration.
-	 */
-	xe->display.funcs.display->get_initial_plane_config(crtc, &plane_config);
+	for_each_intel_crtc(&i915->drm, crtc) {
+		struct intel_initial_plane_config *plane_config =
+			&plane_configs[crtc->pipe];
 
-	/*
-	 * If the fb is shared between multiple heads, we'll
-	 * just get the first one.
-	 */
-	intel_find_initial_plane_obj(crtc, &plane_config);
+		if (!to_intel_crtc_state(crtc->base.state)->uapi.active)
+			continue;
 
-	plane_config_fini(&plane_config);
+		/*
+		 * Note that reserving the BIOS fb up front prevents us
+		 * from stuffing other stolen allocations like the ring
+		 * on top.  This prevents some ugliness at boot time, and
+		 * can even allow for smooth boot transitions if the BIOS
+		 * fb is large enough for the active pipe configuration.
+		 */
+		i915->display.funcs.display->get_initial_plane_config(crtc, plane_config);
+
+		/*
+		 * If the fb is shared between multiple heads, we'll
+		 * just get the first one.
+		 */
+		intel_find_initial_plane_obj(crtc, plane_configs);
+
+		if (i915->display.funcs.display->fixup_initial_plane_config(crtc, plane_config))
+			intel_crtc_wait_for_next_vblank(crtc);
+
+		plane_config_fini(plane_config);
+	}
 }
diff --git a/drivers/gpu/drm/xe/instructions/xe_gfx_state_commands.h b/drivers/gpu/drm/xe/instructions/xe_gfx_state_commands.h
new file mode 100644
index 0000000000..dca62af5a5
--- /dev/null
+++ b/drivers/gpu/drm/xe/instructions/xe_gfx_state_commands.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef _XE_GFX_STATE_COMMANDS_H_
+#define _XE_GFX_STATE_COMMANDS_H_
+
+#include "instructions/xe_instr_defs.h"
+
+#define GFX_STATE_OPCODE			REG_GENMASK(28, 26)
+
+#define GFX_STATE_CMD(opcode) \
+	(XE_INSTR_GFX_STATE | REG_FIELD_PREP(GFX_STATE_OPCODE, opcode))
+
+#define STATE_WRITE_INLINE			GFX_STATE_CMD(0x0)
+
+#endif
diff --git a/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h b/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h
index 8e6dd061f2..31d28a67ef 100644
--- a/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h
+++ b/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h
@@ -47,6 +47,8 @@
 #define GPGPU_CSR_BASE_ADDRESS			GFXPIPE_COMMON_CMD(0x1, 0x4)
 #define STATE_COMPUTE_MODE			GFXPIPE_COMMON_CMD(0x1, 0x5)
 #define CMD_3DSTATE_BTD				GFXPIPE_COMMON_CMD(0x1, 0x6)
+#define STATE_SYSTEM_MEM_FENCE_ADDRESS		GFXPIPE_COMMON_CMD(0x1, 0x9)
+#define STATE_CONTEXT_DATA_BASE_ADDRESS		GFXPIPE_COMMON_CMD(0x1, 0xB)
 
 #define CMD_3DSTATE_VF_STATISTICS		GFXPIPE_SINGLE_DW_CMD(0x0, 0xB)
 
@@ -71,6 +73,7 @@
 #define CMD_3DSTATE_WM				GFXPIPE_3D_CMD(0x0, 0x14)
 #define CMD_3DSTATE_CONSTANT_VS			GFXPIPE_3D_CMD(0x0, 0x15)
 #define CMD_3DSTATE_CONSTANT_GS			GFXPIPE_3D_CMD(0x0, 0x16)
+#define CMD_3DSTATE_CONSTANT_PS			GFXPIPE_3D_CMD(0x0, 0x17)
 #define CMD_3DSTATE_SAMPLE_MASK			GFXPIPE_3D_CMD(0x0, 0x18)
 #define CMD_3DSTATE_CONSTANT_HS			GFXPIPE_3D_CMD(0x0, 0x19)
 #define CMD_3DSTATE_CONSTANT_DS			GFXPIPE_3D_CMD(0x0, 0x1A)
diff --git a/drivers/gpu/drm/xe/instructions/xe_instr_defs.h b/drivers/gpu/drm/xe/instructions/xe_instr_defs.h
index 04179b2a48..fd2ce7ace5 100644
--- a/drivers/gpu/drm/xe/instructions/xe_instr_defs.h
+++ b/drivers/gpu/drm/xe/instructions/xe_instr_defs.h
@@ -17,6 +17,7 @@
 #define   XE_INSTR_MI			REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x0)
 #define   XE_INSTR_GSC			REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x2)
 #define   XE_INSTR_GFXPIPE		REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x3)
+#define   XE_INSTR_GFX_STATE		REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x4)
 
 /*
  * Most (but not all) instructions have a "length" field in the instruction
diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
index 1cfa96167f..c74ceb550d 100644
--- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
+++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
@@ -56,6 +56,9 @@
 #define   MI_FLUSH_IMM_QW		REG_FIELD_PREP(MI_FLUSH_DW_LEN_DW, 5 - 2)
 #define   MI_FLUSH_DW_USE_GTT		REG_BIT(2)
 
+#define MI_LOAD_REGISTER_MEM		(__MI_INSTR(0x29) | XE_INSTR_NUM_DW(4))
+#define   MI_LRM_USE_GGTT		REG_BIT(22)
+
 #define MI_BATCH_BUFFER_START		__MI_INSTR(0x31)
 
 #endif
diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index 81b8362e93..03c6d4d50a 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -44,9 +44,10 @@
 #define GSCCS_RING_BASE				0x11a000
 
 #define RING_TAIL(base)				XE_REG((base) + 0x30)
+#define   TAIL_ADDR				REG_GENMASK(20, 3)
 
 #define RING_HEAD(base)				XE_REG((base) + 0x34)
-#define   HEAD_ADDR				0x001FFFFC
+#define   HEAD_ADDR				REG_GENMASK(20, 2)
 
 #define RING_START(base)			XE_REG((base) + 0x38)
 
@@ -75,12 +76,17 @@
 #define FF_THREAD_MODE(base)			XE_REG((base) + 0xa0)
 #define   FF_TESSELATION_DOP_GATE_DISABLE	BIT(19)
 
+#define RING_INT_SRC_RPT_PTR(base)		XE_REG((base) + 0xa4)
 #define RING_IMR(base)				XE_REG((base) + 0xa8)
+#define RING_INT_STATUS_RPT_PTR(base)		XE_REG((base) + 0xac)
 
 #define RING_EIR(base)				XE_REG((base) + 0xb0)
 #define RING_EMR(base)				XE_REG((base) + 0xb4)
 #define RING_ESR(base)				XE_REG((base) + 0xb8)
 
+#define INSTPM(base)				XE_REG((base) + 0xc0, XE_REG_OPTION_MASKED)
+#define   ENABLE_SEMAPHORE_POLL_BIT		REG_BIT(13)
+
 #define RING_CMD_CCTL(base)			XE_REG((base) + 0xc4, XE_REG_OPTION_MASKED)
 /*
  * CMD_CCTL read/write fields take a MOCS value and _not_ a table index.
@@ -99,9 +105,6 @@
 #define FF_SLICE_CS_CHICKEN1(base)		XE_REG((base) + 0xe0, XE_REG_OPTION_MASKED)
 #define   FFSC_PERCTX_PREEMPT_CTRL		REG_BIT(14)
 
-#define FF_SLICE_CS_CHICKEN2(base)		XE_REG((base) + 0xe4, XE_REG_OPTION_MASKED)
-#define   PERF_FIX_BALANCING_CFE_DISABLE	REG_BIT(15)
-
 #define CS_DEBUG_MODE1(base)			XE_REG((base) + 0xec, XE_REG_OPTION_MASKED)
 #define   FF_DOP_CLOCK_GATE_DISABLE		REG_BIT(1)
 #define   REPLAY_MODE_GRANULARITY		REG_BIT(0)
@@ -133,9 +136,9 @@
 #define   RING_VALID_MASK			0x00000001
 #define   RING_VALID				0x00000001
 #define   STOP_RING				REG_BIT(8)
-#define   TAIL_ADDR				0x001FFFF8
 
 #define RING_CTX_TIMESTAMP(base)		XE_REG((base) + 0x3a8)
+#define CSBE_DEBUG_STATUS(base)			XE_REG((base) + 0x3fc)
 
 #define RING_FORCE_TO_NONPRIV(base, i)		XE_REG(((base) + 0x4d0) + (i) * 4)
 #define   RING_FORCE_TO_NONPRIV_DENY		REG_BIT(30)
diff --git a/drivers/gpu/drm/xe/regs/xe_gsc_regs.h b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h
index 9886ec9cb0..e2a925be13 100644
--- a/drivers/gpu/drm/xe/regs/xe_gsc_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h
@@ -38,4 +38,11 @@
 #define HECI_H_GS1(base)	XE_REG((base) + 0xc4c)
 #define   HECI_H_GS1_ER_PREP	REG_BIT(0)
 
+#define GSCI_TIMER_STATUS				XE_REG(0x11ca28)
+#define   GSCI_TIMER_STATUS_VALUE			REG_GENMASK(1, 0)
+#define   GSCI_TIMER_STATUS_RESET_IN_PROGRESS		0
+#define   GSCI_TIMER_STATUS_TIMER_EXPIRED		1
+#define   GSCI_TIMER_STATUS_RESET_COMPLETE		2
+#define   GSCI_TIMER_STATUS_OUT_OF_RESET		3
+
 #endif
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 1dd361046b..94445810cc 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -69,10 +69,14 @@
 
 #define XEHP_TILE_ADDR_RANGE(_idx)		XE_REG_MCR(0x4900 + (_idx) * 4)
 #define XEHP_FLAT_CCS_BASE_ADDR			XE_REG_MCR(0x4910)
+#define XEHP_FLAT_CCS_PTR			REG_GENMASK(31, 8)
 
 #define WM_CHICKEN3				XE_REG_MCR(0x5588, XE_REG_OPTION_MASKED)
 #define   HIZ_PLANE_COMPRESSION_DIS		REG_BIT(10)
 
+#define CHICKEN_RASTER_1			XE_REG_MCR(0x6204, XE_REG_OPTION_MASKED)
+#define   DIS_SF_ROUND_NEAREST_EVEN		REG_BIT(8)
+
 #define CHICKEN_RASTER_2			XE_REG_MCR(0x6208, XE_REG_OPTION_MASKED)
 #define   TBIMR_FAST_CLIP			REG_BIT(5)
 
@@ -97,7 +101,8 @@
 #define CACHE_MODE_1				XE_REG(0x7004, XE_REG_OPTION_MASKED)
 #define   MSAA_OPTIMIZATION_REDUC_DISABLE	REG_BIT(11)
 
-#define COMMON_SLICE_CHICKEN1			XE_REG(0x7010)
+#define COMMON_SLICE_CHICKEN1			XE_REG(0x7010, XE_REG_OPTION_MASKED)
+#define   DISABLE_BOTTOM_CLIP_RECTANGLE_TEST	REG_BIT(14)
 
 #define HIZ_CHICKEN					XE_REG(0x7018, XE_REG_OPTION_MASKED)
 #define   DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE	REG_BIT(14)
@@ -141,18 +146,29 @@
 
 #define XE2_FLAT_CCS_BASE_RANGE_LOWER		XE_REG_MCR(0x8800)
 #define   XE2_FLAT_CCS_ENABLE			REG_BIT(0)
+#define XE2_FLAT_CCS_BASE_LOWER_ADDR_MASK	REG_GENMASK(31, 6)
+
+#define XE2_FLAT_CCS_BASE_RANGE_UPPER		XE_REG_MCR(0x8804)
+#define XE2_FLAT_CCS_BASE_UPPER_ADDR_MASK	REG_GENMASK(7, 0)
 
 #define GSCPSMI_BASE				XE_REG(0x880c)
 
+#define CCCHKNREG1				XE_REG_MCR(0x8828)
+#define   ENCOMPPERFFIX				REG_BIT(18)
+
 /* Fuse readout registers for GT */
 #define XEHP_FUSE4				XE_REG(0x9114)
+#define   CFEG_WMTP_DISABLE			REG_BIT(20)
 #define   CCS_EN_MASK				REG_GENMASK(19, 16)
 #define   GT_L3_EXC_MASK			REG_GENMASK(6, 4)
 
 #define	MIRROR_FUSE3				XE_REG(0x9118)
 #define   XE2_NODE_ENABLE_MASK			REG_GENMASK(31, 16)
 #define   L3BANK_PAIR_COUNT			4
+#define   XEHPC_GT_L3_MODE_MASK			REG_GENMASK(7, 4)
+#define   XE2_GT_L3_MODE_MASK			REG_GENMASK(7, 4)
 #define   L3BANK_MASK				REG_GENMASK(3, 0)
+#define   XELP_GT_L3_MODE_MASK			REG_GENMASK(7, 0)
 /* on Xe_HP the same fuses indicates mslices instead of L3 banks */
 #define   MAX_MSLICES				4
 #define   MEML3_EN_MASK				REG_GENMASK(3, 0)
@@ -267,6 +283,10 @@
 #define FORCEWAKE_GT				XE_REG(0xa188)
 
 #define PG_ENABLE				XE_REG(0xa210)
+#define   VD2_MFXVDENC_POWERGATE_ENABLE		REG_BIT(8)
+#define   VD2_HCP_POWERGATE_ENABLE		REG_BIT(7)
+#define   VD0_MFXVDENC_POWERGATE_ENABLE		REG_BIT(4)
+#define   VD0_HCP_POWERGATE_ENABLE		REG_BIT(3)
 
 #define CTC_MODE				XE_REG(0xa26c)
 #define   CTC_SHIFT_PARAMETER_MASK		REG_GENMASK(2, 1)
@@ -288,6 +308,9 @@
 #define XEHP_L3NODEARBCFG			XE_REG_MCR(0xb0b4)
 #define   XEHP_LNESPARE				REG_BIT(19)
 
+#define L3SQCREG3				XE_REG_MCR(0xb108)
+#define   COMPPWOVERFETCHEN			REG_BIT(28)
+
 #define XEHP_L3SQCREG5				XE_REG_MCR(0xb158)
 #define   L3_PWM_TIMER_INIT_VAL_MASK		REG_GENMASK(9, 0)
 
@@ -342,8 +365,12 @@
 #define   THREAD_EX_ARB_MODE_RR_AFTER_DEP	REG_FIELD_PREP(THREAD_EX_ARB_MODE, 0x2)
 
 #define ROW_CHICKEN3				XE_REG_MCR(0xe49c, XE_REG_OPTION_MASKED)
+#define   XE2_EUPEND_CHK_FLUSH_DIS		REG_BIT(14)
 #define   DIS_FIX_EOT1_FLUSH			REG_BIT(9)
 
+#define TDL_TSL_CHICKEN				XE_REG_MCR(0xe4c4, XE_REG_OPTION_MASKED)
+#define   SLM_WMTP_RESTORE			REG_BIT(11)
+
 #define ROW_CHICKEN				XE_REG_MCR(0xe4f0, XE_REG_OPTION_MASKED)
 #define   UGM_BACKUP_MODE			REG_BIT(13)
 #define   MDQ_ARBITRATION_MODE			REG_BIT(12)
@@ -354,17 +381,22 @@
 #define   DISABLE_EARLY_READ			REG_BIT(14)
 #define   ENABLE_LARGE_GRF_MODE			REG_BIT(12)
 #define   PUSH_CONST_DEREF_HOLD_DIS		REG_BIT(8)
+#define   DISABLE_TDL_SVHS_GATING		REG_BIT(1)
 #define   DISABLE_DOP_GATING			REG_BIT(0)
 
 #define RT_CTRL					XE_REG_MCR(0xe530)
 #define   DIS_NULL_QUERY			REG_BIT(10)
 
+#define EU_SYSTOLIC_LIC_THROTTLE_CTL_WITH_LOCK	XE_REG_MCR(0xe534)
+#define   EU_SYSTOLIC_LIC_THROTTLE_CTL_LOCK_BIT	REG_BIT(31)
+
 #define XEHP_HDC_CHICKEN0					XE_REG_MCR(0xe5f0, XE_REG_OPTION_MASKED)
 #define   LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK	REG_GENMASK(13, 11)
 #define   DIS_ATOMIC_CHAINING_TYPED_WRITES	REG_BIT(3)
 
 #define LSC_CHICKEN_BIT_0			XE_REG_MCR(0xe7c8)
 #define   DISABLE_D8_D16_COASLESCE		REG_BIT(30)
+#define   WR_REQ_CHAINING_DIS			REG_BIT(26)
 #define   TGM_WRITE_EOM_FORCE			REG_BIT(17)
 #define   FORCE_1_SUB_MESSAGE_PER_FRAGMENT	REG_BIT(15)
 #define   SEQUENTIAL_ACCESS_UPGRADE_DISABLE	REG_BIT(13)
@@ -429,34 +461,51 @@
 #define GT_PERF_STATUS				XE_REG(0x1381b4)
 #define   VOLTAGE_MASK				REG_GENMASK(10, 0)
 
-#define GT_INTR_DW(x)				XE_REG(0x190018 + ((x) * 4))
+/*
+ * Note: Interrupt registers 1900xx are VF accessible only until version 12.50.
+ *       On newer platforms, VFs are using memory-based interrupts instead.
+ *       However, for simplicity we keep this XE_REG_OPTION_VF tag intact.
+ */
 
-#define RENDER_COPY_INTR_ENABLE			XE_REG(0x190030)
-#define VCS_VECS_INTR_ENABLE			XE_REG(0x190034)
-#define GUC_SG_INTR_ENABLE			XE_REG(0x190038)
+#define GT_INTR_DW(x)				XE_REG(0x190018 + ((x) * 4), XE_REG_OPTION_VF)
+#define   INTR_GSC				REG_BIT(31)
+#define   INTR_GUC				REG_BIT(25)
+#define   INTR_MGUC				REG_BIT(24)
+#define   INTR_BCS8				REG_BIT(23)
+#define   INTR_BCS(x)				REG_BIT(15 - (x))
+#define   INTR_CCS(x)				REG_BIT(4 + (x))
+#define   INTR_RCS0				REG_BIT(0)
+#define   INTR_VECS(x)				REG_BIT(31 - (x))
+#define   INTR_VCS(x)				REG_BIT(x)
+
+#define RENDER_COPY_INTR_ENABLE			XE_REG(0x190030, XE_REG_OPTION_VF)
+#define VCS_VECS_INTR_ENABLE			XE_REG(0x190034, XE_REG_OPTION_VF)
+#define GUC_SG_INTR_ENABLE			XE_REG(0x190038, XE_REG_OPTION_VF)
 #define   ENGINE1_MASK				REG_GENMASK(31, 16)
 #define   ENGINE0_MASK				REG_GENMASK(15, 0)
-#define GPM_WGBOXPERF_INTR_ENABLE		XE_REG(0x19003c)
-#define GUNIT_GSC_INTR_ENABLE			XE_REG(0x190044)
-#define CCS_RSVD_INTR_ENABLE			XE_REG(0x190048)
+#define GPM_WGBOXPERF_INTR_ENABLE		XE_REG(0x19003c, XE_REG_OPTION_VF)
+#define GUNIT_GSC_INTR_ENABLE			XE_REG(0x190044, XE_REG_OPTION_VF)
+#define CCS_RSVD_INTR_ENABLE			XE_REG(0x190048, XE_REG_OPTION_VF)
 
-#define INTR_IDENTITY_REG(x)			XE_REG(0x190060 + ((x) * 4))
+#define INTR_IDENTITY_REG(x)			XE_REG(0x190060 + ((x) * 4), XE_REG_OPTION_VF)
 #define   INTR_DATA_VALID			REG_BIT(31)
 #define   INTR_ENGINE_INSTANCE(x)		REG_FIELD_GET(GENMASK(25, 20), x)
 #define   INTR_ENGINE_CLASS(x)			REG_FIELD_GET(GENMASK(18, 16), x)
 #define   INTR_ENGINE_INTR(x)			REG_FIELD_GET(GENMASK(15, 0), x)
 #define   OTHER_GUC_INSTANCE			0
+#define   OTHER_GSC_HECI2_INSTANCE		3
 #define   OTHER_GSC_INSTANCE			6
 
-#define IIR_REG_SELECTOR(x)			XE_REG(0x190070 + ((x) * 4))
-#define RCS0_RSVD_INTR_MASK			XE_REG(0x190090)
-#define BCS_RSVD_INTR_MASK			XE_REG(0x1900a0)
-#define VCS0_VCS1_INTR_MASK			XE_REG(0x1900a8)
-#define VCS2_VCS3_INTR_MASK			XE_REG(0x1900ac)
-#define VECS0_VECS1_INTR_MASK			XE_REG(0x1900d0)
-#define GUC_SG_INTR_MASK			XE_REG(0x1900e8)
-#define GPM_WGBOXPERF_INTR_MASK			XE_REG(0x1900ec)
-#define GUNIT_GSC_INTR_MASK			XE_REG(0x1900f4)
+#define IIR_REG_SELECTOR(x)			XE_REG(0x190070 + ((x) * 4), XE_REG_OPTION_VF)
+#define RCS0_RSVD_INTR_MASK			XE_REG(0x190090, XE_REG_OPTION_VF)
+#define BCS_RSVD_INTR_MASK			XE_REG(0x1900a0, XE_REG_OPTION_VF)
+#define VCS0_VCS1_INTR_MASK			XE_REG(0x1900a8, XE_REG_OPTION_VF)
+#define VCS2_VCS3_INTR_MASK			XE_REG(0x1900ac, XE_REG_OPTION_VF)
+#define VECS0_VECS1_INTR_MASK			XE_REG(0x1900d0, XE_REG_OPTION_VF)
+#define HECI2_RSVD_INTR_MASK			XE_REG(0x1900e4)
+#define GUC_SG_INTR_MASK			XE_REG(0x1900e8, XE_REG_OPTION_VF)
+#define GPM_WGBOXPERF_INTR_MASK			XE_REG(0x1900ec, XE_REG_OPTION_VF)
+#define GUNIT_GSC_INTR_MASK			XE_REG(0x1900f4, XE_REG_OPTION_VF)
 #define CCS0_CCS1_INTR_MASK			XE_REG(0x190100)
 #define CCS2_CCS3_INTR_MASK			XE_REG(0x190104)
 #define XEHPC_BCS1_BCS2_INTR_MASK		XE_REG(0x190110)
@@ -465,14 +514,9 @@
 #define XEHPC_BCS7_BCS8_INTR_MASK		XE_REG(0x19011c)
 #define   GT_WAIT_SEMAPHORE_INTERRUPT		REG_BIT(11)
 #define   GT_CONTEXT_SWITCH_INTERRUPT		REG_BIT(8)
+#define   GSC_ER_COMPLETE			REG_BIT(5)
 #define   GT_RENDER_PIPECTL_NOTIFY_INTERRUPT	REG_BIT(4)
 #define   GT_CS_MASTER_ERROR_INTERRUPT		REG_BIT(3)
 #define   GT_RENDER_USER_INTERRUPT		REG_BIT(0)
 
-#define PVC_GT0_PACKAGE_ENERGY_STATUS		XE_REG(0x281004)
-#define PVC_GT0_PACKAGE_RAPL_LIMIT		XE_REG(0x281008)
-#define PVC_GT0_PACKAGE_POWER_SKU_UNIT		XE_REG(0x281068)
-#define PVC_GT0_PLATFORM_ENERGY_STATUS		XE_REG(0x28106c)
-#define PVC_GT0_PACKAGE_POWER_SKU		XE_REG(0x281080)
-
 #endif
diff --git a/drivers/gpu/drm/xe/regs/xe_gtt_defs.h b/drivers/gpu/drm/xe/regs/xe_gtt_defs.h
new file mode 100644
index 0000000000..4389e5a76f
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_gtt_defs.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef _XE_GTT_DEFS_H_
+#define _XE_GTT_DEFS_H_
+
+#define XELPG_GGTT_PTE_PAT0	BIT_ULL(52)
+#define XELPG_GGTT_PTE_PAT1	BIT_ULL(53)
+
+#define GGTT_PTE_VFID		GENMASK_ULL(11, 2)
+
+#define GUC_GGTT_TOP		0xFEE00000
+
+#define XELPG_PPGTT_PTE_PAT3		BIT_ULL(62)
+#define XE2_PPGTT_PTE_PAT4		BIT_ULL(61)
+#define XE_PPGTT_PDE_PDPE_PAT2		BIT_ULL(12)
+#define XE_PPGTT_PTE_PAT2		BIT_ULL(7)
+#define XE_PPGTT_PTE_PAT1		BIT_ULL(4)
+#define XE_PPGTT_PTE_PAT0		BIT_ULL(3)
+
+#define XE_PDE_PS_2M			BIT_ULL(7)
+#define XE_PDPE_PS_1G			BIT_ULL(7)
+#define XE_PDE_IPS_64K			BIT_ULL(11)
+
+#define XE_GGTT_PTE_DM			BIT_ULL(1)
+#define XE_USM_PPGTT_PTE_AE		BIT_ULL(10)
+#define XE_PPGTT_PTE_DM			BIT_ULL(11)
+#define XE_PDE_64K			BIT_ULL(6)
+#define XE_PTE_PS64			BIT_ULL(8)
+#define XE_PTE_NULL			BIT_ULL(9)
+
+#define XE_PAGE_PRESENT			BIT_ULL(0)
+#define XE_PAGE_RW			BIT_ULL(1)
+
+#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_guc_regs.h b/drivers/gpu/drm/xe/regs/xe_guc_regs.h
index 92320bbc9d..11682e675e 100644
--- a/drivers/gpu/drm/xe/regs/xe_guc_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_guc_regs.h
@@ -100,16 +100,23 @@
 #define GT_PM_CONFIG				XE_REG(0x13816c)
 #define   GT_DOORBELL_ENABLE			REG_BIT(0)
 
-#define GUC_HOST_INTERRUPT			XE_REG(0x1901f0)
+#define GUC_HOST_INTERRUPT			XE_REG(0x1901f0, XE_REG_OPTION_VF)
 
-#define VF_SW_FLAG(n)				XE_REG(0x190240 + (n) * 4)
+#define VF_SW_FLAG(n)				XE_REG(0x190240 + (n) * 4, XE_REG_OPTION_VF)
 #define VF_SW_FLAG_COUNT			4
 
-#define MED_GUC_HOST_INTERRUPT			XE_REG(0x190304)
+#define MED_GUC_HOST_INTERRUPT			XE_REG(0x190304, XE_REG_OPTION_VF)
 
-#define MED_VF_SW_FLAG(n)			XE_REG(0x190310 + (n) * 4)
+#define MED_VF_SW_FLAG(n)			XE_REG(0x190310 + (n) * 4, XE_REG_OPTION_VF)
 #define MED_VF_SW_FLAG_COUNT			4
 
+#define GUC_TLB_INV_CR				XE_REG(0xcee8)
+#define   GUC_TLB_INV_CR_INVALIDATE		REG_BIT(0)
+#define PVC_GUC_TLB_INV_DESC0			XE_REG(0xcf7c)
+#define   PVC_GUC_TLB_INV_DESC0_VALID		REG_BIT(0)
+#define PVC_GUC_TLB_INV_DESC1			XE_REG(0xcf80)
+#define   PVC_GUC_TLB_INV_DESC1_INVALIDATE	REG_BIT(6)
+
 /* GuC Interrupt Vector */
 #define GUC_INTR_GUC2HOST			REG_BIT(15)
 #define GUC_INTR_EXEC_ERROR			REG_BIT(14)
diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
index 4be81abc86..1825d8f79d 100644
--- a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
+++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
@@ -14,4 +14,13 @@
 #define CTX_PDP0_UDW			(0x30 + 1)
 #define CTX_PDP0_LDW			(0x32 + 1)
 
+#define CTX_LRM_INT_MASK_ENABLE		0x50
+#define CTX_INT_MASK_ENABLE_REG		(CTX_LRM_INT_MASK_ENABLE + 1)
+#define CTX_INT_MASK_ENABLE_PTR		(CTX_LRM_INT_MASK_ENABLE + 2)
+#define CTX_LRI_INT_REPORT_PTR		0x55
+#define CTX_INT_STATUS_REPORT_REG	(CTX_LRI_INT_REPORT_PTR + 1)
+#define CTX_INT_STATUS_REPORT_PTR	(CTX_LRI_INT_REPORT_PTR + 2)
+#define CTX_INT_SRC_REPORT_REG		(CTX_LRI_INT_REPORT_PTR + 3)
+#define CTX_INT_SRC_REPORT_PTR		(CTX_LRI_INT_REPORT_PTR + 4)
+
 #endif
diff --git a/drivers/gpu/drm/xe/regs/xe_pcode_regs.h b/drivers/gpu/drm/xe/regs/xe_pcode_regs.h
new file mode 100644
index 0000000000..3dae858508
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_pcode_regs.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef _XE_PCODE_REGS_H_
+#define _XE_PCODE_REGS_H_
+
+#include "regs/xe_reg_defs.h"
+
+/*
+ * This file contains addresses of PCODE registers visible through GT MMIO space.
+ */
+
+#define PVC_GT0_PACKAGE_ENERGY_STATUS           XE_REG(0x281004)
+#define PVC_GT0_PACKAGE_RAPL_LIMIT              XE_REG(0x281008)
+#define PVC_GT0_PACKAGE_POWER_SKU_UNIT          XE_REG(0x281068)
+#define PVC_GT0_PLATFORM_ENERGY_STATUS          XE_REG(0x28106c)
+#define PVC_GT0_PACKAGE_POWER_SKU               XE_REG(0x281080)
+
+#endif /* _XE_PCODE_REGS_H_ */
diff --git a/drivers/gpu/drm/xe/regs/xe_reg_defs.h b/drivers/gpu/drm/xe/regs/xe_reg_defs.h
index c50e7650c0..23f7dc5bbe 100644
--- a/drivers/gpu/drm/xe/regs/xe_reg_defs.h
+++ b/drivers/gpu/drm/xe/regs/xe_reg_defs.h
@@ -6,6 +6,8 @@
 #ifndef _XE_REG_DEFS_H_
 #define _XE_REG_DEFS_H_
 
+#include <linux/build_bug.h>
+
 #include "compat-i915-headers/i915_reg_defs.h"
 
 /**
@@ -36,6 +38,10 @@ struct xe_reg {
 			 */
 			u32 mcr:1;
 			/**
+			 * @vf: register is accessible from the Virtual Function.
+			 */
+			u32 vf:1;
+			/**
 			 * @ext: access MMIO extension space for current register.
 			 */
 			u32 ext:1;
@@ -44,6 +50,7 @@ struct xe_reg {
 		u32 raw;
 	};
 };
+static_assert(sizeof(struct xe_reg) == sizeof(u32));
 
 /**
  * struct xe_reg_mcr - MCR register definition
@@ -76,6 +83,13 @@ struct xe_reg_mcr {
 #define XE_REG_OPTION_MASKED		.masked = 1
 
 /**
+ * XE_REG_OPTION_VF - Register is "VF" accessible.
+ *
+ * To be used with XE_REG() and XE_REG_INITIALIZER().
+ */
+#define XE_REG_OPTION_VF		.vf = 1
+
+/**
  * XE_REG_INITIALIZER - Initializer for xe_reg_t.
  * @r_: Register offset
  * @...: Additional options like access mode. See struct xe_reg for available
@@ -117,4 +131,9 @@ struct xe_reg_mcr {
 				 .__reg = XE_REG_INITIALIZER(r_,  ##__VA_ARGS__, .mcr = 1)	\
 				 })
 
+static inline bool xe_reg_is_valid(struct xe_reg r)
+{
+	return r.addr;
+}
+
 #endif
diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h
index 2c214bb9b6..722fb6dbb7 100644
--- a/drivers/gpu/drm/xe/regs/xe_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_regs.h
@@ -57,7 +57,7 @@
 #define   DG1_MSTR_IRQ				REG_BIT(31)
 #define   DG1_MSTR_TILE(t)			REG_BIT(t)
 
-#define GFX_MSTR_IRQ				XE_REG(0x190010)
+#define GFX_MSTR_IRQ				XE_REG(0x190010, XE_REG_OPTION_VF)
 #define   MASTER_IRQ				REG_BIT(31)
 #define   GU_MISC_IRQ				REG_BIT(29)
 #define   DISPLAY_IRQ				REG_BIT(16)
diff --git a/drivers/gpu/drm/xe/regs/xe_sriov_regs.h b/drivers/gpu/drm/xe/regs/xe_sriov_regs.h
index 58a4e0fad1..617ddb84b7 100644
--- a/drivers/gpu/drm/xe/regs/xe_sriov_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_sriov_regs.h
@@ -14,4 +14,7 @@
 #define   LMEM_EN			REG_BIT(31)
 #define   LMTT_DIR_PTR			REG_GENMASK(30, 0) /* in multiples of 64KB */
 
+#define VF_CAP_REG			XE_REG(0x1901f8, XE_REG_OPTION_VF)
+#define   VF_CAP			REG_BIT(0)
+
 #endif
diff --git a/drivers/gpu/drm/xe/tests/Makefile b/drivers/gpu/drm/xe/tests/Makefile
index 39d8a08922..8cf2367449 100644
--- a/drivers/gpu/drm/xe/tests/Makefile
+++ b/drivers/gpu/drm/xe/tests/Makefile
@@ -1,10 +1,16 @@
 # SPDX-License-Identifier: GPL-2.0
 
-obj-$(CONFIG_DRM_XE_KUNIT_TEST) += \
+# "live" kunit tests
+obj-$(CONFIG_DRM_XE_KUNIT_TEST) += xe_live_test.o
+xe_live_test-y = xe_live_test_mod.o \
 	xe_bo_test.o \
 	xe_dma_buf_test.o \
 	xe_migrate_test.o \
-	xe_mocs_test.o \
+	xe_mocs_test.o
+
+# Normal kunit tests
+obj-$(CONFIG_DRM_XE_KUNIT_TEST) += xe_test.o
+xe_test-y = xe_test_mod.o \
 	xe_pci_test.o \
 	xe_rtp_test.o \
 	xe_wa_test.o
diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index 3436fd9cf2..9f3c028264 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -116,7 +116,7 @@ static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile,
 	int ret;
 
 	/* TODO: Sanity check */
-	unsigned int bo_flags = XE_BO_CREATE_VRAM_IF_DGFX(tile);
+	unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile);
 
 	if (IS_DGFX(xe))
 		kunit_info(test, "Testing vram id %u\n", tile->id);
@@ -163,7 +163,7 @@ static int ccs_test_run_device(struct xe_device *xe)
 		return 0;
 	}
 
-	xe_device_mem_access_get(xe);
+	xe_pm_runtime_get(xe);
 
 	for_each_tile(tile, xe, id) {
 		/* For igfx run only for primary tile */
@@ -172,7 +172,7 @@ static int ccs_test_run_device(struct xe_device *xe)
 		ccs_test_run_tile(xe, tile, test);
 	}
 
-	xe_device_mem_access_put(xe);
+	xe_pm_runtime_put(xe);
 
 	return 0;
 }
@@ -186,7 +186,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_ccs_migrate_kunit);
 static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struct kunit *test)
 {
 	struct xe_bo *bo, *external;
-	unsigned int bo_flags = XE_BO_CREATE_VRAM_IF_DGFX(tile);
+	unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile);
 	struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->migrate);
 	struct xe_gt *__gt;
 	int err, i, id;
@@ -335,12 +335,12 @@ static int evict_test_run_device(struct xe_device *xe)
 		return 0;
 	}
 
-	xe_device_mem_access_get(xe);
+	xe_pm_runtime_get(xe);
 
 	for_each_tile(tile, xe, id)
 		evict_test_run_tile(xe, tile, test);
 
-	xe_device_mem_access_put(xe);
+	xe_pm_runtime_put(xe);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/xe/tests/xe_bo_test.c b/drivers/gpu/drm/xe/tests/xe_bo_test.c
index f408f17f21..a324cde77d 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo_test.c
@@ -19,8 +19,3 @@ static struct kunit_suite xe_bo_test_suite = {
 };
 
 kunit_test_suite(xe_bo_test_suite);
-
-MODULE_AUTHOR("Intel Corporation");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("xe_bo kunit test");
-MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
index 9f6d571d7f..e7f9b531c4 100644
--- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
@@ -12,6 +12,7 @@
 #include "tests/xe_pci_test.h"
 
 #include "xe_pci.h"
+#include "xe_pm.h"
 
 static bool p2p_enabled(struct dma_buf_test_params *params)
 {
@@ -36,14 +37,14 @@ static void check_residency(struct kunit *test, struct xe_bo *exported,
 	xe_bo_assert_held(imported);
 
 	mem_type = XE_PL_VRAM0;
-	if (!(params->mem_mask & XE_BO_CREATE_VRAM0_BIT))
+	if (!(params->mem_mask & XE_BO_FLAG_VRAM0))
 		/* No VRAM allowed */
 		mem_type = XE_PL_TT;
 	else if (params->force_different_devices && !p2p_enabled(params))
 		/* No P2P */
 		mem_type = XE_PL_TT;
 	else if (params->force_different_devices && !is_dynamic(params) &&
-		 (params->mem_mask & XE_BO_CREATE_SYSTEM_BIT))
+		 (params->mem_mask & XE_BO_FLAG_SYSTEM))
 		/* Pin migrated to TT */
 		mem_type = XE_PL_TT;
 
@@ -93,7 +94,7 @@ static void check_residency(struct kunit *test, struct xe_bo *exported,
 	 * possible, saving a migration step as the transfer is just
 	 * likely as fast from system memory.
 	 */
-	if (params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)
+	if (params->mem_mask & XE_BO_FLAG_SYSTEM)
 		KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, XE_PL_TT));
 	else
 		KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, mem_type));
@@ -115,17 +116,17 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe)
 
 	/* No VRAM on this device? */
 	if (!ttm_manager_type(&xe->ttm, XE_PL_VRAM0) &&
-	    (params->mem_mask & XE_BO_CREATE_VRAM0_BIT))
+	    (params->mem_mask & XE_BO_FLAG_VRAM0))
 		return;
 
 	size = PAGE_SIZE;
-	if ((params->mem_mask & XE_BO_CREATE_VRAM0_BIT) &&
+	if ((params->mem_mask & XE_BO_FLAG_VRAM0) &&
 	    xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
 		size = SZ_64K;
 
 	kunit_info(test, "running %s\n", __func__);
 	bo = xe_bo_create_user(xe, NULL, NULL, size, DRM_XE_GEM_CPU_CACHING_WC,
-			       ttm_bo_type_device, XE_BO_CREATE_USER_BIT | params->mem_mask);
+			       ttm_bo_type_device, params->mem_mask);
 	if (IS_ERR(bo)) {
 		KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
 			   PTR_ERR(bo));
@@ -148,7 +149,7 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe)
 		 */
 		if (params->force_different_devices &&
 		    !p2p_enabled(params) &&
-		    !(params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)) {
+		    !(params->mem_mask & XE_BO_FLAG_SYSTEM)) {
 			KUNIT_FAIL(test,
 				   "xe_gem_prime_import() succeeded when it shouldn't have\n");
 		} else {
@@ -161,7 +162,7 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe)
 			/* Pinning in VRAM is not allowed. */
 			if (!is_dynamic(params) &&
 			    params->force_different_devices &&
-			    !(params->mem_mask & XE_BO_CREATE_SYSTEM_BIT))
+			    !(params->mem_mask & XE_BO_FLAG_SYSTEM))
 				KUNIT_EXPECT_EQ(test, err, -EINVAL);
 			/* Otherwise only expect interrupts or success. */
 			else if (err && err != -EINTR && err != -ERESTARTSYS)
@@ -180,7 +181,7 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe)
 			   PTR_ERR(import));
 	} else if (!params->force_different_devices ||
 		   p2p_enabled(params) ||
-		   (params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)) {
+		   (params->mem_mask & XE_BO_FLAG_SYSTEM)) {
 		/* Shouldn't fail if we can reuse same bo, use p2p or use system */
 		KUNIT_FAIL(test, "dynamic p2p attachment failed with err=%ld\n",
 			   PTR_ERR(import));
@@ -203,52 +204,52 @@ static const struct dma_buf_attach_ops nop2p_attach_ops = {
  * gem object.
  */
 static const struct dma_buf_test_params test_params[] = {
-	{.mem_mask = XE_BO_CREATE_VRAM0_BIT,
+	{.mem_mask = XE_BO_FLAG_VRAM0,
 	 .attach_ops = &xe_dma_buf_attach_ops},
-	{.mem_mask = XE_BO_CREATE_VRAM0_BIT,
+	{.mem_mask = XE_BO_FLAG_VRAM0,
 	 .attach_ops = &xe_dma_buf_attach_ops,
 	 .force_different_devices = true},
 
-	{.mem_mask = XE_BO_CREATE_VRAM0_BIT,
+	{.mem_mask = XE_BO_FLAG_VRAM0,
 	 .attach_ops = &nop2p_attach_ops},
-	{.mem_mask = XE_BO_CREATE_VRAM0_BIT,
+	{.mem_mask = XE_BO_FLAG_VRAM0,
 	 .attach_ops = &nop2p_attach_ops,
 	 .force_different_devices = true},
 
-	{.mem_mask = XE_BO_CREATE_VRAM0_BIT},
-	{.mem_mask = XE_BO_CREATE_VRAM0_BIT,
+	{.mem_mask = XE_BO_FLAG_VRAM0},
+	{.mem_mask = XE_BO_FLAG_VRAM0,
 	 .force_different_devices = true},
 
-	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT,
+	{.mem_mask = XE_BO_FLAG_SYSTEM,
 	 .attach_ops = &xe_dma_buf_attach_ops},
-	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT,
+	{.mem_mask = XE_BO_FLAG_SYSTEM,
 	 .attach_ops = &xe_dma_buf_attach_ops,
 	 .force_different_devices = true},
 
-	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT,
+	{.mem_mask = XE_BO_FLAG_SYSTEM,
 	 .attach_ops = &nop2p_attach_ops},
-	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT,
+	{.mem_mask = XE_BO_FLAG_SYSTEM,
 	 .attach_ops = &nop2p_attach_ops,
 	 .force_different_devices = true},
 
-	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT},
-	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT,
+	{.mem_mask = XE_BO_FLAG_SYSTEM},
+	{.mem_mask = XE_BO_FLAG_SYSTEM,
 	 .force_different_devices = true},
 
-	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT,
+	{.mem_mask = XE_BO_FLAG_SYSTEM | XE_BO_FLAG_VRAM0,
 	 .attach_ops = &xe_dma_buf_attach_ops},
-	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT,
+	{.mem_mask = XE_BO_FLAG_SYSTEM | XE_BO_FLAG_VRAM0,
 	 .attach_ops = &xe_dma_buf_attach_ops,
 	 .force_different_devices = true},
 
-	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT,
+	{.mem_mask = XE_BO_FLAG_SYSTEM | XE_BO_FLAG_VRAM0,
 	 .attach_ops = &nop2p_attach_ops},
-	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT,
+	{.mem_mask = XE_BO_FLAG_SYSTEM | XE_BO_FLAG_VRAM0,
 	 .attach_ops = &nop2p_attach_ops,
 	 .force_different_devices = true},
 
-	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT},
-	{.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT,
+	{.mem_mask = XE_BO_FLAG_SYSTEM | XE_BO_FLAG_VRAM0},
+	{.mem_mask = XE_BO_FLAG_SYSTEM | XE_BO_FLAG_VRAM0,
 	 .force_different_devices = true},
 
 	{}
@@ -259,6 +260,7 @@ static int dma_buf_run_device(struct xe_device *xe)
 	const struct dma_buf_test_params *params;
 	struct kunit *test = xe_cur_kunit();
 
+	xe_pm_runtime_get(xe);
 	for (params = test_params; params->mem_mask; ++params) {
 		struct dma_buf_test_params p = *params;
 
@@ -266,6 +268,7 @@ static int dma_buf_run_device(struct xe_device *xe)
 		test->priv = &p;
 		xe_test_dmabuf_import_same_driver(xe);
 	}
+	xe_pm_runtime_put(xe);
 
 	/* A non-zero return would halt iteration over driver devices */
 	return 0;
diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c
index 9f5a9cda8c..99cdb718b6 100644
--- a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c
@@ -18,8 +18,3 @@ static struct kunit_suite xe_dma_buf_test_suite = {
 };
 
 kunit_test_suite(xe_dma_buf_test_suite);
-
-MODULE_AUTHOR("Intel Corporation");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("xe_dma_buf kunit test");
-MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/tests/xe_guc_db_mgr_test.c b/drivers/gpu/drm/xe/tests/xe_guc_db_mgr_test.c
new file mode 100644
index 0000000000..a87a7b4b04
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_guc_db_mgr_test.c
@@ -0,0 +1,201 @@
+// SPDX-License-Identifier: GPL-2.0 AND MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <kunit/test.h>
+
+#include "xe_device.h"
+#include "xe_kunit_helpers.h"
+
+static int guc_dbm_test_init(struct kunit *test)
+{
+	struct xe_guc_db_mgr *dbm;
+
+	xe_kunit_helper_xe_device_test_init(test);
+	dbm = &xe_device_get_gt(test->priv, 0)->uc.guc.dbm;
+
+	mutex_init(dbm_mutex(dbm));
+	test->priv = dbm;
+	return 0;
+}
+
+static void test_empty(struct kunit *test)
+{
+	struct xe_guc_db_mgr *dbm = test->priv;
+
+	KUNIT_ASSERT_EQ(test, xe_guc_db_mgr_init(dbm, 0), 0);
+	KUNIT_ASSERT_EQ(test, dbm->count, 0);
+
+	mutex_lock(dbm_mutex(dbm));
+	KUNIT_EXPECT_LT(test, xe_guc_db_mgr_reserve_id_locked(dbm), 0);
+	mutex_unlock(dbm_mutex(dbm));
+
+	KUNIT_EXPECT_LT(test, xe_guc_db_mgr_reserve_range(dbm, 1, 0), 0);
+}
+
+static void test_default(struct kunit *test)
+{
+	struct xe_guc_db_mgr *dbm = test->priv;
+
+	KUNIT_ASSERT_EQ(test, xe_guc_db_mgr_init(dbm, ~0), 0);
+	KUNIT_ASSERT_EQ(test, dbm->count, GUC_NUM_DOORBELLS);
+}
+
+static const unsigned int guc_dbm_params[] = {
+	GUC_NUM_DOORBELLS / 64,
+	GUC_NUM_DOORBELLS / 32,
+	GUC_NUM_DOORBELLS / 8,
+	GUC_NUM_DOORBELLS,
+};
+
+static void uint_param_get_desc(const unsigned int *p, char *desc)
+{
+	snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%u", *p);
+}
+
+KUNIT_ARRAY_PARAM(guc_dbm, guc_dbm_params, uint_param_get_desc);
+
+static void test_size(struct kunit *test)
+{
+	const unsigned int *p = test->param_value;
+	struct xe_guc_db_mgr *dbm = test->priv;
+	unsigned int n;
+	int id;
+
+	KUNIT_ASSERT_EQ(test, xe_guc_db_mgr_init(dbm, *p), 0);
+	KUNIT_ASSERT_EQ(test, dbm->count, *p);
+
+	mutex_lock(dbm_mutex(dbm));
+	for (n = 0; n < *p; n++) {
+		KUNIT_EXPECT_GE(test, id = xe_guc_db_mgr_reserve_id_locked(dbm), 0);
+		KUNIT_EXPECT_LT(test, id, dbm->count);
+	}
+	KUNIT_EXPECT_LT(test, xe_guc_db_mgr_reserve_id_locked(dbm), 0);
+	mutex_unlock(dbm_mutex(dbm));
+
+	mutex_lock(dbm_mutex(dbm));
+	for (n = 0; n < *p; n++)
+		xe_guc_db_mgr_release_id_locked(dbm, n);
+	mutex_unlock(dbm_mutex(dbm));
+}
+
+static void test_reuse(struct kunit *test)
+{
+	const unsigned int *p = test->param_value;
+	struct xe_guc_db_mgr *dbm = test->priv;
+	unsigned int n;
+
+	KUNIT_ASSERT_EQ(test, xe_guc_db_mgr_init(dbm, *p), 0);
+
+	mutex_lock(dbm_mutex(dbm));
+	for (n = 0; n < *p; n++)
+		KUNIT_EXPECT_GE(test, xe_guc_db_mgr_reserve_id_locked(dbm), 0);
+	KUNIT_EXPECT_LT(test, xe_guc_db_mgr_reserve_id_locked(dbm), 0);
+	mutex_unlock(dbm_mutex(dbm));
+
+	mutex_lock(dbm_mutex(dbm));
+	for (n = 0; n < *p; n++) {
+		xe_guc_db_mgr_release_id_locked(dbm, n);
+		KUNIT_EXPECT_EQ(test, xe_guc_db_mgr_reserve_id_locked(dbm), n);
+	}
+	KUNIT_EXPECT_LT(test, xe_guc_db_mgr_reserve_id_locked(dbm), 0);
+	mutex_unlock(dbm_mutex(dbm));
+
+	mutex_lock(dbm_mutex(dbm));
+	for (n = 0; n < *p; n++)
+		xe_guc_db_mgr_release_id_locked(dbm, n);
+	mutex_unlock(dbm_mutex(dbm));
+}
+
+static void test_range_overlap(struct kunit *test)
+{
+	const unsigned int *p = test->param_value;
+	struct xe_guc_db_mgr *dbm = test->priv;
+	int id1, id2, id3;
+	unsigned int n;
+
+	KUNIT_ASSERT_EQ(test, xe_guc_db_mgr_init(dbm, ~0), 0);
+	KUNIT_ASSERT_LE(test, *p, dbm->count);
+
+	KUNIT_ASSERT_GE(test, id1 = xe_guc_db_mgr_reserve_range(dbm, *p, 0), 0);
+	for (n = 0; n < dbm->count - *p; n++) {
+		KUNIT_ASSERT_GE(test, id2 = xe_guc_db_mgr_reserve_range(dbm, 1, 0), 0);
+		KUNIT_ASSERT_NE(test, id2, id1);
+		KUNIT_ASSERT_NE_MSG(test, id2 < id1, id2 > id1 + *p - 1,
+				    "id1=%d id2=%d", id1, id2);
+	}
+	KUNIT_ASSERT_LT(test, xe_guc_db_mgr_reserve_range(dbm, 1, 0), 0);
+	xe_guc_db_mgr_release_range(dbm, 0, dbm->count);
+
+	if (*p >= 1) {
+		KUNIT_ASSERT_GE(test, id1 = xe_guc_db_mgr_reserve_range(dbm, 1, 0), 0);
+		KUNIT_ASSERT_GE(test, id2 = xe_guc_db_mgr_reserve_range(dbm, *p - 1, 0), 0);
+		KUNIT_ASSERT_NE(test, id2, id1);
+		KUNIT_ASSERT_NE_MSG(test, id1 < id2, id1 > id2 + *p - 2,
+				    "id1=%d id2=%d", id1, id2);
+		for (n = 0; n < dbm->count - *p; n++) {
+			KUNIT_ASSERT_GE(test, id3 = xe_guc_db_mgr_reserve_range(dbm, 1, 0), 0);
+			KUNIT_ASSERT_NE(test, id3, id1);
+			KUNIT_ASSERT_NE(test, id3, id2);
+			KUNIT_ASSERT_NE_MSG(test, id3 < id2, id3 > id2 + *p - 2,
+					    "id3=%d id2=%d", id3, id2);
+		}
+		KUNIT_ASSERT_LT(test, xe_guc_db_mgr_reserve_range(dbm, 1, 0), 0);
+		xe_guc_db_mgr_release_range(dbm, 0, dbm->count);
+	}
+}
+
+static void test_range_compact(struct kunit *test)
+{
+	const unsigned int *p = test->param_value;
+	struct xe_guc_db_mgr *dbm = test->priv;
+	unsigned int n;
+
+	KUNIT_ASSERT_EQ(test, xe_guc_db_mgr_init(dbm, ~0), 0);
+	KUNIT_ASSERT_NE(test, *p, 0);
+	KUNIT_ASSERT_LE(test, *p, dbm->count);
+	if (dbm->count % *p)
+		kunit_skip(test, "must be divisible");
+
+	KUNIT_ASSERT_GE(test, xe_guc_db_mgr_reserve_range(dbm, *p, 0), 0);
+	for (n = 1; n < dbm->count / *p; n++)
+		KUNIT_ASSERT_GE(test, xe_guc_db_mgr_reserve_range(dbm, *p, 0), 0);
+	KUNIT_ASSERT_LT(test, xe_guc_db_mgr_reserve_range(dbm, 1, 0), 0);
+	xe_guc_db_mgr_release_range(dbm, 0, dbm->count);
+}
+
+static void test_range_spare(struct kunit *test)
+{
+	const unsigned int *p = test->param_value;
+	struct xe_guc_db_mgr *dbm = test->priv;
+	int id;
+
+	KUNIT_ASSERT_EQ(test, xe_guc_db_mgr_init(dbm, ~0), 0);
+	KUNIT_ASSERT_LE(test, *p, dbm->count);
+
+	KUNIT_ASSERT_LT(test, xe_guc_db_mgr_reserve_range(dbm, *p, dbm->count), 0);
+	KUNIT_ASSERT_LT(test, xe_guc_db_mgr_reserve_range(dbm, *p, dbm->count - *p + 1), 0);
+	KUNIT_ASSERT_EQ(test, id = xe_guc_db_mgr_reserve_range(dbm, *p, dbm->count - *p), 0);
+	KUNIT_ASSERT_LT(test, xe_guc_db_mgr_reserve_range(dbm, 1, dbm->count - *p), 0);
+	xe_guc_db_mgr_release_range(dbm, id, *p);
+}
+
+static struct kunit_case guc_dbm_test_cases[] = {
+	KUNIT_CASE(test_empty),
+	KUNIT_CASE(test_default),
+	KUNIT_CASE_PARAM(test_size, guc_dbm_gen_params),
+	KUNIT_CASE_PARAM(test_reuse, guc_dbm_gen_params),
+	KUNIT_CASE_PARAM(test_range_overlap, guc_dbm_gen_params),
+	KUNIT_CASE_PARAM(test_range_compact, guc_dbm_gen_params),
+	KUNIT_CASE_PARAM(test_range_spare, guc_dbm_gen_params),
+	{}
+};
+
+static struct kunit_suite guc_dbm_suite = {
+	.name = "guc_dbm",
+	.test_cases = guc_dbm_test_cases,
+	.init = guc_dbm_test_init,
+};
+
+kunit_test_suites(&guc_dbm_suite);
diff --git a/drivers/gpu/drm/xe/tests/xe_guc_id_mgr_test.c b/drivers/gpu/drm/xe/tests/xe_guc_id_mgr_test.c
new file mode 100644
index 0000000000..ee30a1939e
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_guc_id_mgr_test.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0 AND MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include <kunit/test.h>
+
+#include "xe_device.h"
+#include "xe_kunit_helpers.h"
+
+static int guc_id_mgr_test_init(struct kunit *test)
+{
+	struct xe_guc_id_mgr *idm;
+
+	xe_kunit_helper_xe_device_test_init(test);
+	idm = &xe_device_get_gt(test->priv, 0)->uc.guc.submission_state.idm;
+
+	mutex_init(idm_mutex(idm));
+	test->priv = idm;
+	return 0;
+}
+
+static void bad_init(struct kunit *test)
+{
+	struct xe_guc_id_mgr *idm = test->priv;
+
+	KUNIT_EXPECT_EQ(test, -EINVAL, xe_guc_id_mgr_init(idm, 0));
+	KUNIT_EXPECT_EQ(test, -ERANGE, xe_guc_id_mgr_init(idm, GUC_ID_MAX + 1));
+}
+
+static void no_init(struct kunit *test)
+{
+	struct xe_guc_id_mgr *idm = test->priv;
+
+	mutex_lock(idm_mutex(idm));
+	KUNIT_EXPECT_EQ(test, -ENODATA, xe_guc_id_mgr_reserve_locked(idm, 0));
+	mutex_unlock(idm_mutex(idm));
+
+	KUNIT_EXPECT_EQ(test, -ENODATA, xe_guc_id_mgr_reserve(idm, 1, 1));
+}
+
+static void init_fini(struct kunit *test)
+{
+	struct xe_guc_id_mgr *idm = test->priv;
+
+	KUNIT_ASSERT_EQ(test, 0, xe_guc_id_mgr_init(idm, -1));
+	KUNIT_EXPECT_NOT_NULL(test, idm->bitmap);
+	KUNIT_EXPECT_EQ(test, idm->total, GUC_ID_MAX);
+	__fini_idm(NULL, idm);
+	KUNIT_EXPECT_NULL(test, idm->bitmap);
+	KUNIT_EXPECT_EQ(test, idm->total, 0);
+}
+
+static void check_used(struct kunit *test)
+{
+	struct xe_guc_id_mgr *idm = test->priv;
+	unsigned int n;
+
+	KUNIT_ASSERT_EQ(test, 0, xe_guc_id_mgr_init(idm, 2));
+
+	mutex_lock(idm_mutex(idm));
+
+	for (n = 0; n < idm->total; n++) {
+		kunit_info(test, "n=%u", n);
+		KUNIT_EXPECT_EQ(test, idm->used, n);
+		KUNIT_EXPECT_GE(test, idm_reserve_chunk_locked(idm, 1, 0), 0);
+		KUNIT_EXPECT_EQ(test, idm->used, n + 1);
+	}
+	KUNIT_EXPECT_EQ(test, idm->used, idm->total);
+	idm_release_chunk_locked(idm, 0, idm->used);
+	KUNIT_EXPECT_EQ(test, idm->used, 0);
+
+	mutex_unlock(idm_mutex(idm));
+}
+
+static void check_quota(struct kunit *test)
+{
+	struct xe_guc_id_mgr *idm = test->priv;
+	unsigned int n;
+
+	KUNIT_ASSERT_EQ(test, 0, xe_guc_id_mgr_init(idm, 2));
+
+	mutex_lock(idm_mutex(idm));
+
+	for (n = 0; n < idm->total - 1; n++) {
+		kunit_info(test, "n=%u", n);
+		KUNIT_EXPECT_EQ(test, idm_reserve_chunk_locked(idm, 1, idm->total), -EDQUOT);
+		KUNIT_EXPECT_EQ(test, idm_reserve_chunk_locked(idm, 1, idm->total - n), -EDQUOT);
+		KUNIT_EXPECT_EQ(test, idm_reserve_chunk_locked(idm, idm->total - n, 1), -EDQUOT);
+		KUNIT_EXPECT_GE(test, idm_reserve_chunk_locked(idm, 1, 1), 0);
+	}
+	KUNIT_EXPECT_LE(test, 0, idm_reserve_chunk_locked(idm, 1, 0));
+	KUNIT_EXPECT_EQ(test, idm->used, idm->total);
+	idm_release_chunk_locked(idm, 0, idm->total);
+	KUNIT_EXPECT_EQ(test, idm->used, 0);
+
+	mutex_unlock(idm_mutex(idm));
+}
+
+static void check_all(struct kunit *test)
+{
+	struct xe_guc_id_mgr *idm = test->priv;
+	unsigned int n;
+
+	KUNIT_ASSERT_EQ(test, 0, xe_guc_id_mgr_init(idm, -1));
+
+	mutex_lock(idm_mutex(idm));
+
+	for (n = 0; n < idm->total; n++)
+		KUNIT_EXPECT_LE(test, 0, idm_reserve_chunk_locked(idm, 1, 0));
+	KUNIT_EXPECT_EQ(test, idm->used, idm->total);
+	for (n = 0; n < idm->total; n++)
+		idm_release_chunk_locked(idm, n, 1);
+
+	mutex_unlock(idm_mutex(idm));
+}
+
+static struct kunit_case guc_id_mgr_test_cases[] = {
+	KUNIT_CASE(bad_init),
+	KUNIT_CASE(no_init),
+	KUNIT_CASE(init_fini),
+	KUNIT_CASE(check_used),
+	KUNIT_CASE(check_quota),
+	KUNIT_CASE_SLOW(check_all),
+	{}
+};
+
+static struct kunit_suite guc_id_mgr_suite = {
+	.name = "guc_idm",
+	.test_cases = guc_id_mgr_test_cases,
+
+	.init = guc_id_mgr_test_init,
+	.exit = NULL,
+};
+
+kunit_test_suites(&guc_id_mgr_suite);
diff --git a/drivers/gpu/drm/xe/tests/xe_guc_relay_test.c b/drivers/gpu/drm/xe/tests/xe_guc_relay_test.c
new file mode 100644
index 0000000000..13701451b9
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_guc_relay_test.c
@@ -0,0 +1,522 @@
+// SPDX-License-Identifier: GPL-2.0 AND MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <kunit/static_stub.h>
+#include <kunit/test.h>
+#include <kunit/test-bug.h>
+
+#include "xe_device.h"
+#include "xe_kunit_helpers.h"
+#include "xe_pci_test.h"
+
+#define TEST_RID	1234
+#define TEST_VFID	5
+#define TEST_LEN	6
+#define TEST_ACTION	0xa
+#define TEST_DATA(n)	(0xd0 + (n))
+
+static int replacement_relay_get_totalvfs(struct xe_guc_relay *relay)
+{
+	return TEST_VFID;
+}
+
+static int relay_test_init(struct kunit *test)
+{
+	struct xe_pci_fake_data fake = {
+		.sriov_mode = XE_SRIOV_MODE_PF,
+		.platform = XE_TIGERLAKE, /* some random platform */
+		.subplatform = XE_SUBPLATFORM_NONE,
+	};
+	struct xe_guc_relay *relay;
+	struct xe_device *xe;
+
+	test->priv = &fake;
+	xe_kunit_helper_xe_device_test_init(test);
+
+	xe = test->priv;
+	KUNIT_ASSERT_EQ(test, xe_sriov_init(xe), 0);
+
+	relay = &xe_device_get_gt(xe, 0)->uc.guc.relay;
+	kunit_activate_static_stub(test, relay_get_totalvfs,
+				   replacement_relay_get_totalvfs);
+
+	KUNIT_ASSERT_EQ(test, xe_guc_relay_init(relay), 0);
+	KUNIT_EXPECT_TRUE(test, relay_is_ready(relay));
+	relay->last_rid = TEST_RID - 1;
+
+	test->priv = relay;
+	return 0;
+}
+
+static const u32 TEST_MSG[TEST_LEN] = {
+	FIELD_PREP_CONST(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+	FIELD_PREP_CONST(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) |
+	FIELD_PREP_CONST(GUC_HXG_EVENT_MSG_0_ACTION, TEST_ACTION) |
+	FIELD_PREP_CONST(GUC_HXG_EVENT_MSG_0_DATA0, TEST_DATA(0)),
+	TEST_DATA(1), TEST_DATA(2), TEST_DATA(3), TEST_DATA(4),
+};
+
+static int replacement_xe_guc_ct_send_recv_always_fails(struct xe_guc_ct *ct,
+							const u32 *msg, u32 len,
+							u32 *response_buffer)
+{
+	struct kunit *test = kunit_get_current_test();
+
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ct);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, msg);
+	KUNIT_ASSERT_GE(test, len, GUC_HXG_MSG_MIN_LEN);
+
+	return -ECOMM;
+}
+
+static int replacement_xe_guc_ct_send_recv_expects_pf2guc_relay(struct xe_guc_ct *ct,
+								const u32 *msg, u32 len,
+								u32 *response_buffer)
+{
+	struct kunit *test = kunit_get_current_test();
+
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ct);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, msg);
+	KUNIT_ASSERT_GE(test, len, PF2GUC_RELAY_TO_VF_REQUEST_MSG_MIN_LEN);
+	KUNIT_ASSERT_EQ(test, len, PF2GUC_RELAY_TO_VF_REQUEST_MSG_MIN_LEN + TEST_LEN);
+	KUNIT_EXPECT_EQ(test, GUC_HXG_ORIGIN_HOST, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]));
+	KUNIT_EXPECT_EQ(test, GUC_HXG_TYPE_REQUEST, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]));
+	KUNIT_EXPECT_EQ(test, XE_GUC_ACTION_PF2GUC_RELAY_TO_VF,
+			FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, msg[0]));
+	KUNIT_EXPECT_EQ(test, TEST_VFID,
+			FIELD_GET(PF2GUC_RELAY_TO_VF_REQUEST_MSG_1_VFID, msg[1]));
+	KUNIT_EXPECT_EQ(test, TEST_RID,
+			FIELD_GET(PF2GUC_RELAY_TO_VF_REQUEST_MSG_2_RELAY_ID, msg[2]));
+	KUNIT_EXPECT_MEMEQ(test, TEST_MSG, msg + PF2GUC_RELAY_TO_VF_REQUEST_MSG_MIN_LEN,
+			   sizeof(u32) * TEST_LEN);
+	return 0;
+}
+
+static const u32 test_guc2pf[GUC2PF_RELAY_FROM_VF_EVENT_MSG_MAX_LEN] = {
+	/* transport */
+	FIELD_PREP_CONST(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_GUC) |
+	FIELD_PREP_CONST(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) |
+	FIELD_PREP_CONST(GUC_HXG_EVENT_MSG_0_ACTION, XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF),
+	FIELD_PREP_CONST(GUC2PF_RELAY_FROM_VF_EVENT_MSG_1_VFID, TEST_VFID),
+	FIELD_PREP_CONST(GUC2PF_RELAY_FROM_VF_EVENT_MSG_2_RELAY_ID, TEST_RID),
+	/* payload */
+	FIELD_PREP_CONST(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+	FIELD_PREP_CONST(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_SUCCESS),
+};
+
+static const u32 test_guc2vf[GUC2VF_RELAY_FROM_PF_EVENT_MSG_MAX_LEN] = {
+	/* transport */
+	FIELD_PREP_CONST(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_GUC) |
+	FIELD_PREP_CONST(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) |
+	FIELD_PREP_CONST(GUC_HXG_EVENT_MSG_0_ACTION, XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF),
+	FIELD_PREP_CONST(GUC2VF_RELAY_FROM_PF_EVENT_MSG_1_RELAY_ID, TEST_RID),
+	/* payload */
+	FIELD_PREP_CONST(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+	FIELD_PREP_CONST(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_SUCCESS),
+};
+
+static void pf_rejects_guc2pf_too_short(struct kunit *test)
+{
+	const u32 len = GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN - 1;
+	struct xe_guc_relay *relay = test->priv;
+	const u32 *msg = test_guc2pf;
+
+	KUNIT_ASSERT_EQ(test, -EPROTO, xe_guc_relay_process_guc2pf(relay, msg, len));
+}
+
+static void pf_rejects_guc2pf_too_long(struct kunit *test)
+{
+	const u32 len = GUC2PF_RELAY_FROM_VF_EVENT_MSG_MAX_LEN + 1;
+	struct xe_guc_relay *relay = test->priv;
+	const u32 *msg = test_guc2pf;
+
+	KUNIT_ASSERT_EQ(test, -EMSGSIZE, xe_guc_relay_process_guc2pf(relay, msg, len));
+}
+
+static void pf_rejects_guc2pf_no_payload(struct kunit *test)
+{
+	const u32 len = GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN;
+	struct xe_guc_relay *relay = test->priv;
+	const u32 *msg = test_guc2pf;
+
+	KUNIT_ASSERT_EQ(test, -EPROTO, xe_guc_relay_process_guc2pf(relay, msg, len));
+}
+
+static void pf_fails_no_payload(struct kunit *test)
+{
+	struct xe_guc_relay *relay = test->priv;
+	const u32 msg = 0;
+
+	KUNIT_ASSERT_EQ(test, -EPROTO, relay_process_msg(relay, TEST_VFID, TEST_RID, &msg, 0));
+}
+
+static void pf_fails_bad_origin(struct kunit *test)
+{
+	struct xe_guc_relay *relay = test->priv;
+	static const u32 msg[] = {
+		FIELD_PREP_CONST(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_GUC) |
+		FIELD_PREP_CONST(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_SUCCESS),
+	};
+	u32 len = ARRAY_SIZE(msg);
+
+	KUNIT_ASSERT_EQ(test, -EPROTO, relay_process_msg(relay, TEST_VFID, TEST_RID, msg, len));
+}
+
+static void pf_fails_bad_type(struct kunit *test)
+{
+	struct xe_guc_relay *relay = test->priv;
+	const u32 msg[] = {
+		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		FIELD_PREP(GUC_HXG_MSG_0_TYPE, 4), /* only 4 is undefined */
+	};
+	u32 len = ARRAY_SIZE(msg);
+
+	KUNIT_ASSERT_EQ(test, -EBADRQC, relay_process_msg(relay, TEST_VFID, TEST_RID, msg, len));
+}
+
+static void pf_txn_reports_error(struct kunit *test)
+{
+	struct xe_guc_relay *relay = test->priv;
+	struct relay_transaction *txn;
+
+	txn = __relay_get_transaction(relay, false, TEST_VFID, TEST_RID,
+				      TEST_MSG, TEST_LEN, NULL, 0);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, txn);
+
+	kunit_activate_static_stub(test, xe_guc_ct_send_recv,
+				   replacement_xe_guc_ct_send_recv_always_fails);
+	KUNIT_EXPECT_EQ(test, -ECOMM, relay_send_transaction(relay, txn));
+
+	relay_release_transaction(relay, txn);
+}
+
+static void pf_txn_sends_pf2guc(struct kunit *test)
+{
+	struct xe_guc_relay *relay = test->priv;
+	struct relay_transaction *txn;
+
+	txn = __relay_get_transaction(relay, false, TEST_VFID, TEST_RID,
+				      TEST_MSG, TEST_LEN, NULL, 0);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, txn);
+
+	kunit_activate_static_stub(test, xe_guc_ct_send_recv,
+				   replacement_xe_guc_ct_send_recv_expects_pf2guc_relay);
+	KUNIT_ASSERT_EQ(test, 0, relay_send_transaction(relay, txn));
+
+	relay_release_transaction(relay, txn);
+}
+
+static void pf_sends_pf2guc(struct kunit *test)
+{
+	struct xe_guc_relay *relay = test->priv;
+
+	kunit_activate_static_stub(test, xe_guc_ct_send_recv,
+				   replacement_xe_guc_ct_send_recv_expects_pf2guc_relay);
+	KUNIT_ASSERT_EQ(test, 0,
+			xe_guc_relay_send_to_vf(relay, TEST_VFID,
+						TEST_MSG, TEST_LEN, NULL, 0));
+}
+
+static int replacement_xe_guc_ct_send_recv_loopback_relay(struct xe_guc_ct *ct,
+							  const u32 *msg, u32 len,
+							  u32 *response_buffer)
+{
+	struct kunit *test = kunit_get_current_test();
+	struct xe_guc_relay *relay = test->priv;
+	u32 *reply = kunit_kzalloc(test, len * sizeof(u32), GFP_KERNEL);
+	int (*guc2relay)(struct xe_guc_relay *, const u32 *, u32);
+	u32 action;
+	int err;
+
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ct);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, msg);
+	KUNIT_ASSERT_GE(test, len, GUC_HXG_MSG_MIN_LEN);
+	KUNIT_ASSERT_EQ(test, GUC_HXG_TYPE_REQUEST,
+			FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]));
+	KUNIT_ASSERT_GE(test, len, GUC_HXG_REQUEST_MSG_MIN_LEN);
+	KUNIT_ASSERT_NOT_NULL(test, reply);
+
+	switch (FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, msg[0])) {
+	case XE_GUC_ACTION_PF2GUC_RELAY_TO_VF:
+		KUNIT_ASSERT_GE(test, len, PF2GUC_RELAY_TO_VF_REQUEST_MSG_MIN_LEN);
+		action = XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF;
+		guc2relay = xe_guc_relay_process_guc2pf;
+		break;
+	case XE_GUC_ACTION_VF2GUC_RELAY_TO_PF:
+		KUNIT_ASSERT_GE(test, len, VF2GUC_RELAY_TO_PF_REQUEST_MSG_MIN_LEN);
+		action = XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF;
+		guc2relay = xe_guc_relay_process_guc2vf;
+		break;
+	default:
+		KUNIT_FAIL(test, "bad RELAY action %#x", msg[0]);
+		return -EINVAL;
+	}
+
+	reply[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_GUC) |
+		   FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) |
+		   FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION, action);
+	memcpy(reply + 1, msg + 1, sizeof(u32) * (len - 1));
+
+	err = guc2relay(relay, reply, len);
+	KUNIT_EXPECT_EQ(test, err, 0);
+
+	return err;
+}
+
+static void test_requires_relay_testloop(struct kunit *test)
+{
+	/*
+	 * The debug relay action GUC_RELAY_ACTION_VFXPF_TESTLOOP is available
+	 * only on builds with CONFIG_DRM_XE_DEBUG_SRIOV enabled.
+	 * See "kunit.py --kconfig_add" option if it's missing.
+	 */
+	if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV))
+		kunit_skip(test, "requires %s\n", __stringify(CONFIG_DRM_XE_DEBUG_SRIOV));
+}
+
+static void pf_loopback_nop(struct kunit *test)
+{
+	struct xe_guc_relay *relay = test->priv;
+	u32 request[] = {
+		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_RELAY_ACTION_VFXPF_TESTLOOP) |
+		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_DATA0, VFXPF_TESTLOOP_OPCODE_NOP),
+	};
+	u32 response[GUC_HXG_RESPONSE_MSG_MIN_LEN];
+	int ret;
+
+	test_requires_relay_testloop(test);
+
+	kunit_activate_static_stub(test, relay_kick_worker, relay_process_incoming_action);
+	kunit_activate_static_stub(test, xe_guc_ct_send_recv,
+				   replacement_xe_guc_ct_send_recv_loopback_relay);
+	ret = xe_guc_relay_send_to_vf(relay, TEST_VFID,
+				      request, ARRAY_SIZE(request),
+				      response, ARRAY_SIZE(response));
+	KUNIT_ASSERT_EQ(test, ret, GUC_HXG_RESPONSE_MSG_MIN_LEN);
+	KUNIT_EXPECT_EQ(test, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, response[0]),
+			GUC_HXG_ORIGIN_HOST);
+	KUNIT_EXPECT_EQ(test, FIELD_GET(GUC_HXG_MSG_0_TYPE, response[0]),
+			GUC_HXG_TYPE_RESPONSE_SUCCESS);
+	KUNIT_EXPECT_EQ(test, FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, response[0]), 0);
+}
+
+static void pf_loopback_echo(struct kunit *test)
+{
+	struct xe_guc_relay *relay = test->priv;
+	u32 request[] = {
+		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_RELAY_ACTION_VFXPF_TESTLOOP) |
+		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_DATA0, VFXPF_TESTLOOP_OPCODE_ECHO),
+		TEST_DATA(1), TEST_DATA(2), TEST_DATA(3), TEST_DATA(4),
+	};
+	u32 response[ARRAY_SIZE(request)];
+	unsigned int n;
+	int ret;
+
+	test_requires_relay_testloop(test);
+
+	kunit_activate_static_stub(test, relay_kick_worker, relay_process_incoming_action);
+	kunit_activate_static_stub(test, xe_guc_ct_send_recv,
+				   replacement_xe_guc_ct_send_recv_loopback_relay);
+	ret = xe_guc_relay_send_to_vf(relay, TEST_VFID,
+				      request, ARRAY_SIZE(request),
+				      response, ARRAY_SIZE(response));
+	KUNIT_ASSERT_EQ(test, ret, ARRAY_SIZE(response));
+	KUNIT_EXPECT_EQ(test, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, response[0]),
+			GUC_HXG_ORIGIN_HOST);
+	KUNIT_EXPECT_EQ(test, FIELD_GET(GUC_HXG_MSG_0_TYPE, response[0]),
+			GUC_HXG_TYPE_RESPONSE_SUCCESS);
+	KUNIT_EXPECT_EQ(test, FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, response[0]),
+			ARRAY_SIZE(response));
+	for (n = GUC_HXG_RESPONSE_MSG_MIN_LEN; n < ret; n++)
+		KUNIT_EXPECT_EQ(test, request[n], response[n]);
+}
+
+static void pf_loopback_fail(struct kunit *test)
+{
+	struct xe_guc_relay *relay = test->priv;
+	u32 request[] = {
+		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_RELAY_ACTION_VFXPF_TESTLOOP) |
+		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_DATA0, VFXPF_TESTLOOP_OPCODE_FAIL),
+	};
+	u32 response[GUC_HXG_RESPONSE_MSG_MIN_LEN];
+	int ret;
+
+	test_requires_relay_testloop(test);
+
+	kunit_activate_static_stub(test, relay_kick_worker, relay_process_incoming_action);
+	kunit_activate_static_stub(test, xe_guc_ct_send_recv,
+				   replacement_xe_guc_ct_send_recv_loopback_relay);
+	ret = xe_guc_relay_send_to_vf(relay, TEST_VFID,
+				      request, ARRAY_SIZE(request),
+				      response, ARRAY_SIZE(response));
+	KUNIT_ASSERT_EQ(test, ret, -EREMOTEIO);
+}
+
+static void pf_loopback_busy(struct kunit *test)
+{
+	struct xe_guc_relay *relay = test->priv;
+	u32 request[] = {
+		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_RELAY_ACTION_VFXPF_TESTLOOP) |
+		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_DATA0, VFXPF_TESTLOOP_OPCODE_BUSY),
+		TEST_DATA(0xb),
+	};
+	u32 response[GUC_HXG_RESPONSE_MSG_MIN_LEN];
+	int ret;
+
+	test_requires_relay_testloop(test);
+
+	kunit_activate_static_stub(test, relay_testonly_nop, relay_process_incoming_action);
+	kunit_activate_static_stub(test, relay_kick_worker, relay_process_incoming_action);
+	kunit_activate_static_stub(test, xe_guc_ct_send_recv,
+				   replacement_xe_guc_ct_send_recv_loopback_relay);
+	ret = xe_guc_relay_send_to_vf(relay, TEST_VFID,
+				      request, ARRAY_SIZE(request),
+				      response, ARRAY_SIZE(response));
+	KUNIT_ASSERT_EQ(test, ret, GUC_HXG_RESPONSE_MSG_MIN_LEN);
+}
+
+static void pf_loopback_retry(struct kunit *test)
+{
+	struct xe_guc_relay *relay = test->priv;
+	u32 request[] = {
+		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_RELAY_ACTION_VFXPF_TESTLOOP) |
+		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_DATA0, VFXPF_TESTLOOP_OPCODE_RETRY),
+		TEST_DATA(0xd), TEST_DATA(0xd),
+	};
+	u32 response[GUC_HXG_RESPONSE_MSG_MIN_LEN];
+	int ret;
+
+	test_requires_relay_testloop(test);
+
+	kunit_activate_static_stub(test, relay_kick_worker, relay_process_incoming_action);
+	kunit_activate_static_stub(test, xe_guc_ct_send_recv,
+				   replacement_xe_guc_ct_send_recv_loopback_relay);
+	ret = xe_guc_relay_send_to_vf(relay, TEST_VFID,
+				      request, ARRAY_SIZE(request),
+				      response, ARRAY_SIZE(response));
+	KUNIT_ASSERT_EQ(test, ret, GUC_HXG_RESPONSE_MSG_MIN_LEN);
+}
+
+static struct kunit_case pf_relay_test_cases[] = {
+	KUNIT_CASE(pf_rejects_guc2pf_too_short),
+	KUNIT_CASE(pf_rejects_guc2pf_too_long),
+	KUNIT_CASE(pf_rejects_guc2pf_no_payload),
+	KUNIT_CASE(pf_fails_no_payload),
+	KUNIT_CASE(pf_fails_bad_origin),
+	KUNIT_CASE(pf_fails_bad_type),
+	KUNIT_CASE(pf_txn_reports_error),
+	KUNIT_CASE(pf_txn_sends_pf2guc),
+	KUNIT_CASE(pf_sends_pf2guc),
+	KUNIT_CASE(pf_loopback_nop),
+	KUNIT_CASE(pf_loopback_echo),
+	KUNIT_CASE(pf_loopback_fail),
+	KUNIT_CASE_SLOW(pf_loopback_busy),
+	KUNIT_CASE_SLOW(pf_loopback_retry),
+	{}
+};
+
+static struct kunit_suite pf_relay_suite = {
+	.name = "pf_relay",
+	.test_cases = pf_relay_test_cases,
+	.init = relay_test_init,
+};
+
+static void vf_rejects_guc2vf_too_short(struct kunit *test)
+{
+	const u32 len = GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN - 1;
+	struct xe_guc_relay *relay = test->priv;
+	const u32 *msg = test_guc2vf;
+
+	KUNIT_ASSERT_EQ(test, -EPROTO, xe_guc_relay_process_guc2vf(relay, msg, len));
+}
+
+static void vf_rejects_guc2vf_too_long(struct kunit *test)
+{
+	const u32 len = GUC2VF_RELAY_FROM_PF_EVENT_MSG_MAX_LEN + 1;
+	struct xe_guc_relay *relay = test->priv;
+	const u32 *msg = test_guc2vf;
+
+	KUNIT_ASSERT_EQ(test, -EMSGSIZE, xe_guc_relay_process_guc2vf(relay, msg, len));
+}
+
+static void vf_rejects_guc2vf_no_payload(struct kunit *test)
+{
+	const u32 len = GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN;
+	struct xe_guc_relay *relay = test->priv;
+	const u32 *msg = test_guc2vf;
+
+	KUNIT_ASSERT_EQ(test, -EPROTO, xe_guc_relay_process_guc2vf(relay, msg, len));
+}
+
+static struct kunit_case vf_relay_test_cases[] = {
+	KUNIT_CASE(vf_rejects_guc2vf_too_short),
+	KUNIT_CASE(vf_rejects_guc2vf_too_long),
+	KUNIT_CASE(vf_rejects_guc2vf_no_payload),
+	{}
+};
+
+static struct kunit_suite vf_relay_suite = {
+	.name = "vf_relay",
+	.test_cases = vf_relay_test_cases,
+	.init = relay_test_init,
+};
+
+static void xe_drops_guc2pf_if_not_ready(struct kunit *test)
+{
+	struct xe_device *xe = test->priv;
+	struct xe_guc_relay *relay = &xe_device_get_gt(xe, 0)->uc.guc.relay;
+	const u32 *msg = test_guc2pf;
+	u32 len = GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN + GUC_RELAY_MSG_MIN_LEN;
+
+	KUNIT_ASSERT_EQ(test, -ENODEV, xe_guc_relay_process_guc2pf(relay, msg, len));
+}
+
+static void xe_drops_guc2vf_if_not_ready(struct kunit *test)
+{
+	struct xe_device *xe = test->priv;
+	struct xe_guc_relay *relay = &xe_device_get_gt(xe, 0)->uc.guc.relay;
+	const u32 *msg = test_guc2vf;
+	u32 len = GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN + GUC_RELAY_MSG_MIN_LEN;
+
+	KUNIT_ASSERT_EQ(test, -ENODEV, xe_guc_relay_process_guc2vf(relay, msg, len));
+}
+
+static void xe_rejects_send_if_not_ready(struct kunit *test)
+{
+	struct xe_device *xe = test->priv;
+	struct xe_guc_relay *relay = &xe_device_get_gt(xe, 0)->uc.guc.relay;
+	u32 msg[GUC_RELAY_MSG_MIN_LEN];
+	u32 len = ARRAY_SIZE(msg);
+
+	KUNIT_ASSERT_EQ(test, -ENODEV, xe_guc_relay_send_to_pf(relay, msg, len, NULL, 0));
+	KUNIT_ASSERT_EQ(test, -ENODEV, relay_send_to(relay, TEST_VFID, msg, len, NULL, 0));
+}
+
+static struct kunit_case no_relay_test_cases[] = {
+	KUNIT_CASE(xe_drops_guc2pf_if_not_ready),
+	KUNIT_CASE(xe_drops_guc2vf_if_not_ready),
+	KUNIT_CASE(xe_rejects_send_if_not_ready),
+	{}
+};
+
+static struct kunit_suite no_relay_suite = {
+	.name = "no_relay",
+	.test_cases = no_relay_test_cases,
+	.init = xe_kunit_helper_xe_device_test_init,
+};
+
+kunit_test_suites(&no_relay_suite,
+		  &pf_relay_suite,
+		  &vf_relay_suite);
diff --git a/drivers/gpu/drm/xe/tests/xe_kunit_helpers.c b/drivers/gpu/drm/xe/tests/xe_kunit_helpers.c
new file mode 100644
index 0000000000..fefe79b3b7
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_kunit_helpers.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0 AND MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <kunit/test.h>
+#include <kunit/static_stub.h>
+#include <kunit/visibility.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_kunit_helpers.h>
+
+#include "tests/xe_kunit_helpers.h"
+#include "tests/xe_pci_test.h"
+#include "xe_device_types.h"
+
+/**
+ * xe_kunit_helper_alloc_xe_device - Allocate a &xe_device for a KUnit test.
+ * @test: the &kunit where this &xe_device will be used
+ * @dev: The parent device object
+ *
+ * This function allocates xe_device using drm_kunit_helper_alloc_device().
+ * The xe_device allocation is managed by the test.
+ *
+ * @dev should be allocated using drm_kunit_helper_alloc_device().
+ *
+ * This function uses KUNIT_ASSERT to detect any allocation failures.
+ *
+ * Return: A pointer to the new &xe_device.
+ */
+struct xe_device *xe_kunit_helper_alloc_xe_device(struct kunit *test,
+						  struct device *dev)
+{
+	struct xe_device *xe;
+
+	xe = drm_kunit_helper_alloc_drm_device(test, dev,
+					       struct xe_device,
+					       drm, DRIVER_GEM);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe);
+	return xe;
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_kunit_helper_alloc_xe_device);
+
+static void kunit_action_restore_priv(void *priv)
+{
+	struct kunit *test = kunit_get_current_test();
+
+	test->priv = priv;
+}
+
+/**
+ * xe_kunit_helper_xe_device_test_init - Prepare a &xe_device for a KUnit test.
+ * @test: the &kunit where this fake &xe_device will be used
+ *
+ * This function allocates and initializes a fake &xe_device and stores its
+ * pointer as &kunit.priv to allow the test code to access it.
+ *
+ * This function can be directly used as custom implementation of
+ * &kunit_suite.init.
+ *
+ * It is possible to prepare specific variant of the fake &xe_device by passing
+ * in &kunit.priv pointer to the struct xe_pci_fake_data supplemented with
+ * desired parameters prior to calling this function.
+ *
+ * This function uses KUNIT_ASSERT to detect any failures.
+ *
+ * Return: Always 0.
+ */
+int xe_kunit_helper_xe_device_test_init(struct kunit *test)
+{
+	struct xe_device *xe;
+	struct device *dev;
+	int err;
+
+	dev = drm_kunit_helper_alloc_device(test);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
+
+	xe = xe_kunit_helper_alloc_xe_device(test, dev);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe);
+
+	err = xe_pci_fake_device_init(xe);
+	KUNIT_ASSERT_EQ(test, err, 0);
+
+	err = kunit_add_action_or_reset(test, kunit_action_restore_priv, test->priv);
+	KUNIT_ASSERT_EQ(test, err, 0);
+
+	test->priv = xe;
+	return 0;
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_kunit_helper_xe_device_test_init);
diff --git a/drivers/gpu/drm/xe/tests/xe_kunit_helpers.h b/drivers/gpu/drm/xe/tests/xe_kunit_helpers.h
new file mode 100644
index 0000000000..067a1babf0
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_kunit_helpers.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 AND MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_KUNIT_HELPERS_H_
+#define _XE_KUNIT_HELPERS_H_
+
+struct device;
+struct kunit;
+struct xe_device;
+
+struct xe_device *xe_kunit_helper_alloc_xe_device(struct kunit *test,
+						  struct device *dev);
+int xe_kunit_helper_xe_device_test_init(struct kunit *test);
+
+#endif
diff --git a/drivers/gpu/drm/xe/tests/xe_live_test_mod.c b/drivers/gpu/drm/xe/tests/xe_live_test_mod.c
new file mode 100644
index 0000000000..eb1ea99a5a
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_live_test_mod.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+#include <linux/module.h>
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("xe live kunit tests");
+MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index c347e2c29f..977d5f4e44 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -10,6 +10,7 @@
 #include "tests/xe_pci_test.h"
 
 #include "xe_pci.h"
+#include "xe_pm.h"
 
 static bool sanity_fence_failed(struct xe_device *xe, struct dma_fence *fence,
 				const char *str, struct kunit *test)
@@ -112,7 +113,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
 						   bo->size,
 						   ttm_bo_type_kernel,
 						   region |
-						   XE_BO_NEEDS_CPU_ACCESS);
+						   XE_BO_FLAG_NEEDS_CPU_ACCESS);
 	if (IS_ERR(remote)) {
 		KUNIT_FAIL(test, "Failed to allocate remote bo for %s: %pe\n",
 			   str, remote);
@@ -190,7 +191,7 @@ out_unlock:
 static void test_copy_sysmem(struct xe_migrate *m, struct xe_bo *bo,
 			     struct kunit *test)
 {
-	test_copy(m, bo, test, XE_BO_CREATE_SYSTEM_BIT);
+	test_copy(m, bo, test, XE_BO_FLAG_SYSTEM);
 }
 
 static void test_copy_vram(struct xe_migrate *m, struct xe_bo *bo,
@@ -202,9 +203,9 @@ static void test_copy_vram(struct xe_migrate *m, struct xe_bo *bo,
 		return;
 
 	if (bo->ttm.resource->mem_type == XE_PL_VRAM0)
-		region = XE_BO_CREATE_VRAM1_BIT;
+		region = XE_BO_FLAG_VRAM1;
 	else
-		region = XE_BO_CREATE_VRAM0_BIT;
+		region = XE_BO_FLAG_VRAM0;
 	test_copy(m, bo, test, region);
 }
 
@@ -280,8 +281,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 
 	big = xe_bo_create_pin_map(xe, tile, m->q->vm, SZ_4M,
 				   ttm_bo_type_kernel,
-				   XE_BO_CREATE_VRAM_IF_DGFX(tile) |
-				   XE_BO_CREATE_PINNED_BIT);
+				   XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+				   XE_BO_FLAG_PINNED);
 	if (IS_ERR(big)) {
 		KUNIT_FAIL(test, "Failed to allocate bo: %li\n", PTR_ERR(big));
 		goto vunmap;
@@ -289,8 +290,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 
 	pt = xe_bo_create_pin_map(xe, tile, m->q->vm, XE_PAGE_SIZE,
 				  ttm_bo_type_kernel,
-				  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
-				  XE_BO_CREATE_PINNED_BIT);
+				  XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+				  XE_BO_FLAG_PINNED);
 	if (IS_ERR(pt)) {
 		KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n",
 			   PTR_ERR(pt));
@@ -300,8 +301,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 	tiny = xe_bo_create_pin_map(xe, tile, m->q->vm,
 				    2 * SZ_4K,
 				    ttm_bo_type_kernel,
-				    XE_BO_CREATE_VRAM_IF_DGFX(tile) |
-				    XE_BO_CREATE_PINNED_BIT);
+				    XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+				    XE_BO_FLAG_PINNED);
 	if (IS_ERR(tiny)) {
 		KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n",
 			   PTR_ERR(pt));
@@ -423,17 +424,19 @@ static int migrate_test_run_device(struct xe_device *xe)
 	struct xe_tile *tile;
 	int id;
 
+	xe_pm_runtime_get(xe);
+
 	for_each_tile(tile, xe, id) {
 		struct xe_migrate *m = tile->migrate;
 
 		kunit_info(test, "Testing tile id %d.\n", id);
 		xe_vm_lock(m->q->vm, true);
-		xe_device_mem_access_get(xe);
 		xe_migrate_sanity_test(m, test);
-		xe_device_mem_access_put(xe);
 		xe_vm_unlock(m->q->vm);
 	}
 
+	xe_pm_runtime_put(xe);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.c b/drivers/gpu/drm/xe/tests/xe_migrate_test.c
index cf0c173b94..eb0d896341 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.c
@@ -18,8 +18,3 @@ static struct kunit_suite xe_migrate_test_suite = {
 };
 
 kunit_test_suite(xe_migrate_test_suite);
-
-MODULE_AUTHOR("Intel Corporation");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("xe_migrate kunit test");
-MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c
index 7dd34f94e8..1b8617075b 100644
--- a/drivers/gpu/drm/xe/tests/xe_mocs.c
+++ b/drivers/gpu/drm/xe/tests/xe_mocs.c
@@ -10,10 +10,11 @@
 #include "tests/xe_pci_test.h"
 #include "tests/xe_test.h"
 
-#include "xe_pci.h"
+#include "xe_device.h"
 #include "xe_gt.h"
 #include "xe_mocs.h"
-#include "xe_device.h"
+#include "xe_pci.h"
+#include "xe_pm.h"
 
 struct live_mocs {
 	struct xe_mocs_info table;
@@ -28,6 +29,8 @@ static int live_mocs_init(struct live_mocs *arg, struct xe_gt *gt)
 
 	flags = get_mocs_settings(gt_to_xe(gt), &arg->table);
 
+	kunit_info(test, "gt %d", gt->info.id);
+	kunit_info(test, "gt type %d", gt->info.type);
 	kunit_info(test, "table size %d", arg->table.size);
 	kunit_info(test, "table uc_index %d", arg->table.uc_index);
 	kunit_info(test, "table n_entries %d", arg->table.n_entries);
@@ -38,69 +41,72 @@ static int live_mocs_init(struct live_mocs *arg, struct xe_gt *gt)
 static void read_l3cc_table(struct xe_gt *gt,
 			    const struct xe_mocs_info *info)
 {
+	struct kunit *test = xe_cur_kunit();
+	u32 l3cc, l3cc_expected;
 	unsigned int i;
-	u32 l3cc;
 	u32 reg_val;
 	u32 ret;
 
-	struct kunit *test = xe_cur_kunit();
-
-	xe_device_mem_access_get(gt_to_xe(gt));
 	ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
 	KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n");
-	mocs_dbg(&gt_to_xe(gt)->drm, "L3CC entries:%d\n", info->n_entries);
-	for (i = 0;
-	     i < (info->n_entries + 1) / 2 ?
-	     (l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i),
-				  get_entry_l3cc(info, 2 * i + 1))), 1 : 0;
-	     i++) {
-		if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250)
-			reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i));
-		else
-			reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i));
-		mocs_dbg(&gt_to_xe(gt)->drm, "%d 0x%x 0x%x 0x%x\n", i,
-			 XELP_LNCFCMOCS(i).addr, reg_val, l3cc);
-		if (reg_val != l3cc)
-			KUNIT_FAIL(test, "l3cc reg 0x%x has incorrect val.\n",
-				   XELP_LNCFCMOCS(i).addr);
+
+	for (i = 0; i < info->n_entries; i++) {
+		if (!(i & 1)) {
+			if (regs_are_mcr(gt))
+				reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i >> 1));
+			else
+				reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i >> 1));
+
+			mocs_dbg(gt, "reg_val=0x%x\n", reg_val);
+		} else {
+			/* Just re-use value read on previous iteration */
+			reg_val >>= 16;
+		}
+
+		l3cc_expected = get_entry_l3cc(info, i);
+		l3cc = reg_val & 0xffff;
+
+		mocs_dbg(gt, "[%u] expected=0x%x actual=0x%x\n",
+			 i, l3cc_expected, l3cc);
+
+		KUNIT_EXPECT_EQ_MSG(test, l3cc_expected, l3cc,
+				    "l3cc idx=%u has incorrect val.\n", i);
 	}
 	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
-	xe_device_mem_access_put(gt_to_xe(gt));
 }
 
 static void read_mocs_table(struct xe_gt *gt,
 			    const struct xe_mocs_info *info)
 {
-	struct xe_device *xe = gt_to_xe(gt);
-
+	struct kunit *test = xe_cur_kunit();
+	u32 mocs, mocs_expected;
 	unsigned int i;
-	u32 mocs;
 	u32 reg_val;
 	u32 ret;
 
-	struct kunit *test = xe_cur_kunit();
+	KUNIT_EXPECT_TRUE_MSG(test, info->unused_entries_index,
+			      "Unused entries index should have been defined\n");
 
-	xe_device_mem_access_get(gt_to_xe(gt));
 	ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
 	KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n");
-	mocs_dbg(&gt_to_xe(gt)->drm, "Global MOCS entries:%d\n", info->n_entries);
-	drm_WARN_ONCE(&xe->drm, !info->unused_entries_index,
-		      "Unused entries index should have been defined\n");
-	for (i = 0;
-	     i < info->n_entries ? (mocs = get_entry_control(info, i)), 1 : 0;
-	     i++) {
-		if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250)
+
+	for (i = 0; i < info->n_entries; i++) {
+		if (regs_are_mcr(gt))
 			reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i));
 		else
 			reg_val = xe_mmio_read32(gt, XELP_GLOBAL_MOCS(i));
-		mocs_dbg(&gt_to_xe(gt)->drm, "%d 0x%x 0x%x 0x%x\n", i,
-			 XELP_GLOBAL_MOCS(i).addr, reg_val, mocs);
-		if (reg_val != mocs)
-			KUNIT_FAIL(test, "mocs reg 0x%x has incorrect val.\n",
-				   XELP_GLOBAL_MOCS(i).addr);
+
+		mocs_expected = get_entry_control(info, i);
+		mocs = reg_val;
+
+		mocs_dbg(gt, "[%u] expected=0x%x actual=0x%x\n",
+			 i, mocs_expected, mocs);
+
+		KUNIT_EXPECT_EQ_MSG(test, mocs_expected, mocs,
+				    "mocs reg 0x%x has incorrect val.\n", i);
 	}
+
 	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
-	xe_device_mem_access_put(gt_to_xe(gt));
 }
 
 static int mocs_kernel_test_run_device(struct xe_device *xe)
@@ -113,6 +119,8 @@ static int mocs_kernel_test_run_device(struct xe_device *xe)
 	unsigned int flags;
 	int id;
 
+	xe_pm_runtime_get(xe);
+
 	for_each_gt(gt, xe, id) {
 		flags = live_mocs_init(&mocs, gt);
 		if (flags & HAS_GLOBAL_MOCS)
@@ -120,6 +128,9 @@ static int mocs_kernel_test_run_device(struct xe_device *xe)
 		if (flags & HAS_LNCF_MOCS)
 			read_l3cc_table(gt, &mocs.table);
 	}
+
+	xe_pm_runtime_put(xe);
+
 	return 0;
 }
 
@@ -128,3 +139,44 @@ void xe_live_mocs_kernel_kunit(struct kunit *test)
 	xe_call_for_each_device(mocs_kernel_test_run_device);
 }
 EXPORT_SYMBOL_IF_KUNIT(xe_live_mocs_kernel_kunit);
+
+static int mocs_reset_test_run_device(struct xe_device *xe)
+{
+	/* Check the mocs setup is retained over GT reset */
+
+	struct live_mocs mocs;
+	struct xe_gt *gt;
+	unsigned int flags;
+	int id;
+	struct kunit *test = xe_cur_kunit();
+
+	xe_pm_runtime_get(xe);
+
+	for_each_gt(gt, xe, id) {
+		flags = live_mocs_init(&mocs, gt);
+		kunit_info(test, "mocs_reset_test before reset\n");
+		if (flags & HAS_GLOBAL_MOCS)
+			read_mocs_table(gt, &mocs.table);
+		if (flags & HAS_LNCF_MOCS)
+			read_l3cc_table(gt, &mocs.table);
+
+		xe_gt_reset_async(gt);
+		flush_work(&gt->reset.worker);
+
+		kunit_info(test, "mocs_reset_test after reset\n");
+		if (flags & HAS_GLOBAL_MOCS)
+			read_mocs_table(gt, &mocs.table);
+		if (flags & HAS_LNCF_MOCS)
+			read_l3cc_table(gt, &mocs.table);
+	}
+
+	xe_pm_runtime_put(xe);
+
+	return 0;
+}
+
+void xe_live_mocs_reset_kunit(struct kunit *test)
+{
+	xe_call_for_each_device(mocs_reset_test_run_device);
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_live_mocs_reset_kunit);
diff --git a/drivers/gpu/drm/xe/tests/xe_mocs_test.c b/drivers/gpu/drm/xe/tests/xe_mocs_test.c
index 421b819fd4..6315886b65 100644
--- a/drivers/gpu/drm/xe/tests/xe_mocs_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_mocs_test.c
@@ -9,6 +9,7 @@
 
 static struct kunit_case xe_mocs_tests[] = {
 	KUNIT_CASE(xe_live_mocs_kernel_kunit),
+	KUNIT_CASE(xe_live_mocs_reset_kunit),
 	{}
 };
 
@@ -18,8 +19,3 @@ static struct kunit_suite xe_mocs_test_suite = {
 };
 
 kunit_test_suite(xe_mocs_test_suite);
-
-MODULE_AUTHOR("Intel Corporation");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("xe_mocs kunit test");
-MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/tests/xe_mocs_test.h b/drivers/gpu/drm/xe/tests/xe_mocs_test.h
index 7faa3575e6..e7699d4954 100644
--- a/drivers/gpu/drm/xe/tests/xe_mocs_test.h
+++ b/drivers/gpu/drm/xe/tests/xe_mocs_test.h
@@ -9,5 +9,6 @@
 struct kunit;
 
 void xe_live_mocs_kernel_kunit(struct kunit *test);
+void xe_live_mocs_reset_kunit(struct kunit *test);
 
 #endif
diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c
index 602793644f..f62809ca8b 100644
--- a/drivers/gpu/drm/xe/tests/xe_pci.c
+++ b/drivers/gpu/drm/xe/tests/xe_pci.c
@@ -156,6 +156,9 @@ int xe_pci_fake_device_init(struct xe_device *xe)
 		return -ENODEV;
 
 done:
+	xe->sriov.__mode = data && data->sriov_mode ?
+			   data->sriov_mode : XE_SRIOV_MODE_NONE;
+
 	kunit_activate_static_stub(test, read_gmdid, fake_read_gmdid);
 
 	xe_info_init_early(xe, desc, subplatform_desc);
diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.c b/drivers/gpu/drm/xe/tests/xe_pci_test.c
index 171e4180f1..a6705a5363 100644
--- a/drivers/gpu/drm/xe/tests/xe_pci_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_pci_test.c
@@ -64,8 +64,3 @@ static struct kunit_suite xe_pci_test_suite = {
 };
 
 kunit_test_suite(xe_pci_test_suite);
-
-MODULE_AUTHOR("Intel Corporation");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("xe_pci kunit test");
-MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.h b/drivers/gpu/drm/xe/tests/xe_pci_test.h
index 811ffe5bd9..f40dcec839 100644
--- a/drivers/gpu/drm/xe/tests/xe_pci_test.h
+++ b/drivers/gpu/drm/xe/tests/xe_pci_test.h
@@ -9,6 +9,7 @@
 #include <linux/types.h>
 
 #include "xe_platform_types.h"
+#include "xe_sriov_types.h"
 
 struct xe_device;
 struct xe_graphics_desc;
@@ -23,6 +24,7 @@ void xe_call_for_each_graphics_ip(xe_graphics_fn xe_fn);
 void xe_call_for_each_media_ip(xe_media_fn xe_fn);
 
 struct xe_pci_fake_data {
+	enum xe_sriov_mode sriov_mode;
 	enum xe_platform platform;
 	enum xe_subplatform subplatform;
 	u32 graphics_verx100;
diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
index 4a69728976..06759d7547 100644
--- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
@@ -15,6 +15,7 @@
 #include "regs/xe_reg_defs.h"
 #include "xe_device.h"
 #include "xe_device_types.h"
+#include "xe_kunit_helpers.h"
 #include "xe_pci_test.h"
 #include "xe_reg_sr.h"
 #include "xe_rtp.h"
@@ -276,9 +277,7 @@ static int xe_rtp_test_init(struct kunit *test)
 	dev = drm_kunit_helper_alloc_device(test);
 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
 
-	xe = drm_kunit_helper_alloc_drm_device(test, dev,
-					       struct xe_device,
-					       drm, DRIVER_GEM);
+	xe = xe_kunit_helper_alloc_xe_device(test, dev);
 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe);
 
 	/* Initialize an empty device */
@@ -312,8 +311,3 @@ static struct kunit_suite xe_rtp_test_suite = {
 };
 
 kunit_test_suite(xe_rtp_test_suite);
-
-MODULE_AUTHOR("Intel Corporation");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("xe_rtp kunit test");
-MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/tests/xe_test_mod.c b/drivers/gpu/drm/xe/tests/xe_test_mod.c
new file mode 100644
index 0000000000..875f3e6f96
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_test_mod.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+#include <linux/module.h>
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("xe kunit tests");
+MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c
index b4715b78ef..9d0c715142 100644
--- a/drivers/gpu/drm/xe/tests/xe_wa_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c
@@ -9,6 +9,7 @@
 #include <kunit/test.h>
 
 #include "xe_device.h"
+#include "xe_kunit_helpers.h"
 #include "xe_pci_test.h"
 #include "xe_reg_sr.h"
 #include "xe_tuning.h"
@@ -65,17 +66,12 @@ static const struct platform_test_case cases[] = {
 	PLATFORM_CASE(ALDERLAKE_P, C0),
 	SUBPLATFORM_CASE(ALDERLAKE_S, RPLS, D0),
 	SUBPLATFORM_CASE(ALDERLAKE_P, RPLU, E0),
-	SUBPLATFORM_CASE(DG2, G10, A0),
-	SUBPLATFORM_CASE(DG2, G10, A1),
-	SUBPLATFORM_CASE(DG2, G10, B0),
 	SUBPLATFORM_CASE(DG2, G10, C0),
-	SUBPLATFORM_CASE(DG2, G11, A0),
-	SUBPLATFORM_CASE(DG2, G11, B0),
 	SUBPLATFORM_CASE(DG2, G11, B1),
-	SUBPLATFORM_CASE(DG2, G12, A0),
 	SUBPLATFORM_CASE(DG2, G12, A1),
 	GMDID_CASE(METEORLAKE, 1270, A0, 1300, A0),
 	GMDID_CASE(METEORLAKE, 1271, A0, 1300, A0),
+	GMDID_CASE(METEORLAKE, 1274, A0, 1300, A0),
 	GMDID_CASE(LUNARLAKE, 2004, A0, 2000, A0),
 	GMDID_CASE(LUNARLAKE, 2004, B0, 2000, A0),
 };
@@ -105,9 +101,7 @@ static int xe_wa_test_init(struct kunit *test)
 	dev = drm_kunit_helper_alloc_device(test);
 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
 
-	xe = drm_kunit_helper_alloc_drm_device(test, dev,
-					       struct xe_device,
-					       drm, DRIVER_GEM);
+	xe = xe_kunit_helper_alloc_xe_device(test, dev);
 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe);
 
 	test->priv = &data;
@@ -160,8 +154,3 @@ static struct kunit_suite xe_rtp_test_suite = {
 };
 
 kunit_test_suite(xe_rtp_test_suite);
-
-MODULE_AUTHOR("Intel Corporation");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("xe_wa kunit test");
-MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c
index 7c124475c4..541361caff 100644
--- a/drivers/gpu/drm/xe/xe_bb.c
+++ b/drivers/gpu/drm/xe/xe_bb.c
@@ -86,7 +86,8 @@ struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q,
 	};
 
 	xe_gt_assert(q->gt, second_idx <= bb->len);
-	xe_gt_assert(q->gt, q->vm->flags & XE_VM_FLAG_MIGRATION);
+	xe_gt_assert(q->gt, xe_sched_job_is_migration(q));
+	xe_gt_assert(q->gt, q->width == 1);
 
 	return __xe_bb_create_job(q, bb, addr);
 }
@@ -96,7 +97,8 @@ struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q,
 {
 	u64 addr = xe_sa_bo_gpu_addr(bb->bo);
 
-	xe_gt_assert(q->gt, !(q->vm && q->vm->flags & XE_VM_FLAG_MIGRATION));
+	xe_gt_assert(q->gt, !xe_sched_job_is_migration(q));
+	xe_gt_assert(q->gt, q->width == 1);
 	return __xe_bb_create_job(q, bb, &addr);
 }
 
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index eb2c44a328..b6f3a43d63 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -22,6 +22,7 @@
 #include "xe_gt.h"
 #include "xe_map.h"
 #include "xe_migrate.h"
+#include "xe_pm.h"
 #include "xe_preempt_fence.h"
 #include "xe_res_cursor.h"
 #include "xe_trace.h"
@@ -46,22 +47,26 @@ static const struct ttm_place sys_placement_flags = {
 static struct ttm_placement sys_placement = {
 	.num_placement = 1,
 	.placement = &sys_placement_flags,
-	.num_busy_placement = 1,
-	.busy_placement = &sys_placement_flags,
 };
 
-static const struct ttm_place tt_placement_flags = {
-	.fpfn = 0,
-	.lpfn = 0,
-	.mem_type = XE_PL_TT,
-	.flags = 0,
+static const struct ttm_place tt_placement_flags[] = {
+	{
+		.fpfn = 0,
+		.lpfn = 0,
+		.mem_type = XE_PL_TT,
+		.flags = TTM_PL_FLAG_DESIRED,
+	},
+	{
+		.fpfn = 0,
+		.lpfn = 0,
+		.mem_type = XE_PL_SYSTEM,
+		.flags = TTM_PL_FLAG_FALLBACK,
+	}
 };
 
 static struct ttm_placement tt_placement = {
-	.num_placement = 1,
-	.placement = &tt_placement_flags,
-	.num_busy_placement = 1,
-	.busy_placement = &sys_placement_flags,
+	.num_placement = 2,
+	.placement = tt_placement_flags,
 };
 
 bool mem_type_is_vram(u32 mem_type)
@@ -107,7 +112,7 @@ bool xe_bo_is_stolen_devmem(struct xe_bo *bo)
 
 static bool xe_bo_is_user(struct xe_bo *bo)
 {
-	return bo->flags & XE_BO_CREATE_USER_BIT;
+	return bo->flags & XE_BO_FLAG_USER;
 }
 
 static struct xe_migrate *
@@ -133,7 +138,7 @@ static struct xe_mem_region *res_to_mem_region(struct ttm_resource *res)
 static void try_add_system(struct xe_device *xe, struct xe_bo *bo,
 			   u32 bo_flags, u32 *c)
 {
-	if (bo_flags & XE_BO_CREATE_SYSTEM_BIT) {
+	if (bo_flags & XE_BO_FLAG_SYSTEM) {
 		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
 
 		bo->placements[*c] = (struct ttm_place) {
@@ -160,12 +165,12 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo,
 	 * For eviction / restore on suspend / resume objects
 	 * pinned in VRAM must be contiguous
 	 */
-	if (bo_flags & (XE_BO_CREATE_PINNED_BIT |
-			XE_BO_CREATE_GGTT_BIT))
+	if (bo_flags & (XE_BO_FLAG_PINNED |
+			XE_BO_FLAG_GGTT))
 		place.flags |= TTM_PL_FLAG_CONTIGUOUS;
 
 	if (io_size < vram->usable_size) {
-		if (bo_flags & XE_BO_NEEDS_CPU_ACCESS) {
+		if (bo_flags & XE_BO_FLAG_NEEDS_CPU_ACCESS) {
 			place.fpfn = 0;
 			place.lpfn = io_size >> PAGE_SHIFT;
 		} else {
@@ -179,22 +184,22 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo,
 static void try_add_vram(struct xe_device *xe, struct xe_bo *bo,
 			 u32 bo_flags, u32 *c)
 {
-	if (bo_flags & XE_BO_CREATE_VRAM0_BIT)
+	if (bo_flags & XE_BO_FLAG_VRAM0)
 		add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
-	if (bo_flags & XE_BO_CREATE_VRAM1_BIT)
+	if (bo_flags & XE_BO_FLAG_VRAM1)
 		add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
 }
 
 static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
 			   u32 bo_flags, u32 *c)
 {
-	if (bo_flags & XE_BO_CREATE_STOLEN_BIT) {
+	if (bo_flags & XE_BO_FLAG_STOLEN) {
 		xe_assert(xe, *c < ARRAY_SIZE(bo->placements));
 
 		bo->placements[*c] = (struct ttm_place) {
 			.mem_type = XE_PL_STOLEN,
-			.flags = bo_flags & (XE_BO_CREATE_PINNED_BIT |
-					     XE_BO_CREATE_GGTT_BIT) ?
+			.flags = bo_flags & (XE_BO_FLAG_PINNED |
+					     XE_BO_FLAG_GGTT) ?
 				TTM_PL_FLAG_CONTIGUOUS : 0,
 		};
 		*c += 1;
@@ -216,8 +221,6 @@ static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
 	bo->placement = (struct ttm_placement) {
 		.num_placement = c,
 		.placement = bo->placements,
-		.num_busy_placement = c,
-		.busy_placement = bo->placements,
 	};
 
 	return 0;
@@ -237,7 +240,6 @@ static void xe_evict_flags(struct ttm_buffer_object *tbo,
 		/* Don't handle scatter gather BOs */
 		if (tbo->type == ttm_bo_type_sg) {
 			placement->num_placement = 0;
-			placement->num_busy_placement = 0;
 			return;
 		}
 
@@ -315,7 +317,7 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
 	struct xe_device *xe = xe_bo_device(bo);
 	struct xe_ttm_tt *tt;
 	unsigned long extra_pages;
-	enum ttm_caching caching;
+	enum ttm_caching caching = ttm_cached;
 	int err;
 
 	tt = kzalloc(sizeof(*tt), GFP_KERNEL);
@@ -329,26 +331,35 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
 		extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size),
 					   PAGE_SIZE);
 
-	switch (bo->cpu_caching) {
-	case DRM_XE_GEM_CPU_CACHING_WC:
-		caching = ttm_write_combined;
-		break;
-	default:
-		caching = ttm_cached;
-		break;
-	}
-
-	WARN_ON((bo->flags & XE_BO_CREATE_USER_BIT) && !bo->cpu_caching);
-
 	/*
-	 * Display scanout is always non-coherent with the CPU cache.
-	 *
-	 * For Xe_LPG and beyond, PPGTT PTE lookups are also non-coherent and
-	 * require a CPU:WC mapping.
+	 * DGFX system memory is always WB / ttm_cached, since
+	 * other caching modes are only supported on x86. DGFX
+	 * GPU system memory accesses are always coherent with the
+	 * CPU.
 	 */
-	if ((!bo->cpu_caching && bo->flags & XE_BO_SCANOUT_BIT) ||
-	    (xe->info.graphics_verx100 >= 1270 && bo->flags & XE_BO_PAGETABLE))
-		caching = ttm_write_combined;
+	if (!IS_DGFX(xe)) {
+		switch (bo->cpu_caching) {
+		case DRM_XE_GEM_CPU_CACHING_WC:
+			caching = ttm_write_combined;
+			break;
+		default:
+			caching = ttm_cached;
+			break;
+		}
+
+		WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching);
+
+		/*
+		 * Display scanout is always non-coherent with the CPU cache.
+		 *
+		 * For Xe_LPG and beyond, PPGTT PTE lookups are also
+		 * non-coherent and require a CPU:WC mapping.
+		 */
+		if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) ||
+		    (xe->info.graphics_verx100 >= 1270 &&
+		     bo->flags & XE_BO_FLAG_PAGETABLE))
+			caching = ttm_write_combined;
+	}
 
 	err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, caching, extra_pages);
 	if (err) {
@@ -572,6 +583,8 @@ static int xe_bo_move_notify(struct xe_bo *bo,
 {
 	struct ttm_buffer_object *ttm_bo = &bo->ttm;
 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
+	struct ttm_resource *old_mem = ttm_bo->resource;
+	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
 	int ret;
 
 	/*
@@ -591,6 +604,18 @@ static int xe_bo_move_notify(struct xe_bo *bo,
 	if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach)
 		dma_buf_move_notify(ttm_bo->base.dma_buf);
 
+	/*
+	 * TTM has already nuked the mmap for us (see ttm_bo_unmap_virtual),
+	 * so if we moved from VRAM make sure to unlink this from the userfault
+	 * tracking.
+	 */
+	if (mem_type_is_vram(old_mem_type)) {
+		mutex_lock(&xe->mem_access.vram_userfault.lock);
+		if (!list_empty(&bo->vram_userfault_link))
+			list_del_init(&bo->vram_userfault_link);
+		mutex_unlock(&xe->mem_access.vram_userfault.lock);
+	}
+
 	return 0;
 }
 
@@ -700,7 +725,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 
 	xe_assert(xe, migrate);
 	trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type, move_lacks_source);
-	xe_device_mem_access_get(xe);
+	xe_pm_runtime_get_noresume(xe);
 
 	if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) {
 		/*
@@ -724,7 +749,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 
 				if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) {
 					ret = -EINVAL;
-					xe_device_mem_access_put(xe);
+					xe_pm_runtime_put(xe);
 					goto out;
 				}
 
@@ -742,7 +767,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 						new_mem, handle_system_ccs);
 		if (IS_ERR(fence)) {
 			ret = PTR_ERR(fence);
-			xe_device_mem_access_put(xe);
+			xe_pm_runtime_put(xe);
 			goto out;
 		}
 		if (!move_lacks_source) {
@@ -767,7 +792,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 		dma_fence_put(fence);
 	}
 
-	xe_device_mem_access_put(xe);
+	xe_pm_runtime_put(xe);
 
 out:
 	return ret;
@@ -779,7 +804,6 @@ out:
  * @bo: The buffer object to move.
  *
  * On successful completion, the object memory will be moved to sytem memory.
- * This function blocks until the object has been fully moved.
  *
  * This is needed to for special handling of pinned VRAM object during
  * suspend-resume.
@@ -836,9 +860,6 @@ int xe_bo_evict_pinned(struct xe_bo *bo)
 	if (ret)
 		goto err_res_free;
 
-	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
-			      false, MAX_SCHEDULE_TIMEOUT);
-
 	return 0;
 
 err_res_free:
@@ -851,7 +872,6 @@ err_res_free:
  * @bo: The buffer object to move.
  *
  * On successful completion, the object memory will be moved back to VRAM.
- * This function blocks until the object has been fully moved.
  *
  * This is needed to for special handling of pinned VRAM object during
  * suspend-resume.
@@ -893,9 +913,6 @@ int xe_bo_restore_pinned(struct xe_bo *bo)
 	if (ret)
 		goto err_res_free;
 
-	dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
-			      false, MAX_SCHEDULE_TIMEOUT);
-
 	return 0;
 
 err_res_free:
@@ -1012,7 +1029,7 @@ static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo)
 	}
 }
 
-struct ttm_device_funcs xe_ttm_funcs = {
+const struct ttm_device_funcs xe_ttm_funcs = {
 	.ttm_tt_create = xe_ttm_tt_create,
 	.ttm_tt_populate = xe_ttm_tt_populate,
 	.ttm_tt_unpopulate = xe_ttm_tt_unpopulate,
@@ -1048,6 +1065,11 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
 	if (bo->vm && xe_bo_is_user(bo))
 		xe_vm_put(bo->vm);
 
+	mutex_lock(&xe->mem_access.vram_userfault.lock);
+	if (!list_empty(&bo->vram_userfault_link))
+		list_del(&bo->vram_userfault_link);
+	mutex_unlock(&xe->mem_access.vram_userfault.lock);
+
 	kfree(bo);
 }
 
@@ -1088,16 +1110,20 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
 {
 	struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
 	struct drm_device *ddev = tbo->base.dev;
+	struct xe_device *xe = to_xe_device(ddev);
+	struct xe_bo *bo = ttm_to_xe_bo(tbo);
+	bool needs_rpm = bo->flags & XE_BO_FLAG_VRAM_MASK;
 	vm_fault_t ret;
 	int idx;
 
+	if (needs_rpm)
+		xe_pm_runtime_get(xe);
+
 	ret = ttm_bo_vm_reserve(tbo, vmf);
 	if (ret)
-		return ret;
+		goto out;
 
 	if (drm_dev_enter(ddev, &idx)) {
-		struct xe_bo *bo = ttm_to_xe_bo(tbo);
-
 		trace_xe_bo_cpu_fault(bo);
 
 		ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
@@ -1106,10 +1132,24 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
 	} else {
 		ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
 	}
+
 	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
-		return ret;
+		goto out;
+	/*
+	 * ttm_bo_vm_reserve() already has dma_resv_lock.
+	 */
+	if (ret == VM_FAULT_NOPAGE && mem_type_is_vram(tbo->resource->mem_type)) {
+		mutex_lock(&xe->mem_access.vram_userfault.lock);
+		if (list_empty(&bo->vram_userfault_link))
+			list_add(&bo->vram_userfault_link, &xe->mem_access.vram_userfault.list);
+		mutex_unlock(&xe->mem_access.vram_userfault.lock);
+	}
 
 	dma_resv_unlock(tbo->base.resv);
+out:
+	if (needs_rpm)
+		xe_pm_runtime_put(xe);
+
 	return ret;
 }
 
@@ -1185,18 +1225,19 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
 		return ERR_PTR(-EINVAL);
 	}
 
-	if (flags & (XE_BO_CREATE_VRAM_MASK | XE_BO_CREATE_STOLEN_BIT) &&
-	    !(flags & XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT) &&
-	    xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) {
+	if (flags & (XE_BO_FLAG_VRAM_MASK | XE_BO_FLAG_STOLEN) &&
+	    !(flags & XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE) &&
+	    ((xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) ||
+	     (flags & XE_BO_NEEDS_64K))) {
 		aligned_size = ALIGN(size, SZ_64K);
 		if (type != ttm_bo_type_device)
 			size = ALIGN(size, SZ_64K);
-		flags |= XE_BO_INTERNAL_64K;
+		flags |= XE_BO_FLAG_INTERNAL_64K;
 		alignment = SZ_64K >> PAGE_SHIFT;
 
 	} else {
 		aligned_size = ALIGN(size, SZ_4K);
-		flags &= ~XE_BO_INTERNAL_64K;
+		flags &= ~XE_BO_FLAG_INTERNAL_64K;
 		alignment = SZ_4K >> PAGE_SHIFT;
 	}
 
@@ -1220,15 +1261,16 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
 #ifdef CONFIG_PROC_FS
 	INIT_LIST_HEAD(&bo->client_link);
 #endif
+	INIT_LIST_HEAD(&bo->vram_userfault_link);
 
 	drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
 
 	if (resv) {
-		ctx.allow_res_evict = !(flags & XE_BO_CREATE_NO_RESV_EVICT);
+		ctx.allow_res_evict = !(flags & XE_BO_FLAG_NO_RESV_EVICT);
 		ctx.resv = resv;
 	}
 
-	if (!(flags & XE_BO_FIXED_PLACEMENT_BIT)) {
+	if (!(flags & XE_BO_FLAG_FIXED_PLACEMENT)) {
 		err = __xe_bo_placement_for_flags(xe, bo, bo->flags);
 		if (WARN_ON(err)) {
 			xe_ttm_bo_destroy(&bo->ttm);
@@ -1238,7 +1280,7 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
 
 	/* Defer populating type_sg bos */
 	placement = (type == ttm_bo_type_sg ||
-		     bo->flags & XE_BO_DEFER_BACKING) ? &sys_placement :
+		     bo->flags & XE_BO_FLAG_DEFER_BACKING) ? &sys_placement :
 		&bo->placement;
 	err = ttm_bo_init_reserved(&xe->ttm, &bo->ttm, type,
 				   placement, alignment,
@@ -1293,21 +1335,21 @@ static int __xe_bo_fixed_placement(struct xe_device *xe,
 {
 	struct ttm_place *place = bo->placements;
 
-	if (flags & (XE_BO_CREATE_USER_BIT|XE_BO_CREATE_SYSTEM_BIT))
+	if (flags & (XE_BO_FLAG_USER | XE_BO_FLAG_SYSTEM))
 		return -EINVAL;
 
 	place->flags = TTM_PL_FLAG_CONTIGUOUS;
 	place->fpfn = start >> PAGE_SHIFT;
 	place->lpfn = end >> PAGE_SHIFT;
 
-	switch (flags & (XE_BO_CREATE_STOLEN_BIT | XE_BO_CREATE_VRAM_MASK)) {
-	case XE_BO_CREATE_VRAM0_BIT:
+	switch (flags & (XE_BO_FLAG_STOLEN | XE_BO_FLAG_VRAM_MASK)) {
+	case XE_BO_FLAG_VRAM0:
 		place->mem_type = XE_PL_VRAM0;
 		break;
-	case XE_BO_CREATE_VRAM1_BIT:
+	case XE_BO_FLAG_VRAM1:
 		place->mem_type = XE_PL_VRAM1;
 		break;
-	case XE_BO_CREATE_STOLEN_BIT:
+	case XE_BO_FLAG_STOLEN:
 		place->mem_type = XE_PL_STOLEN;
 		break;
 
@@ -1319,8 +1361,6 @@ static int __xe_bo_fixed_placement(struct xe_device *xe,
 	bo->placement = (struct ttm_placement) {
 		.num_placement = 1,
 		.placement = place,
-		.num_busy_placement = 1,
-		.busy_placement = place,
 	};
 
 	return 0;
@@ -1343,7 +1383,7 @@ __xe_bo_create_locked(struct xe_device *xe,
 		if (IS_ERR(bo))
 			return bo;
 
-		flags |= XE_BO_FIXED_PLACEMENT_BIT;
+		flags |= XE_BO_FLAG_FIXED_PLACEMENT;
 		err = __xe_bo_fixed_placement(xe, bo, flags, start, end, size);
 		if (err) {
 			xe_bo_free(bo);
@@ -1353,7 +1393,7 @@ __xe_bo_create_locked(struct xe_device *xe,
 
 	bo = ___xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL,
 				    vm && !xe_vm_in_fault_mode(vm) &&
-				    flags & XE_BO_CREATE_USER_BIT ?
+				    flags & XE_BO_FLAG_USER ?
 				    &vm->lru_bulk_move : NULL, size,
 				    cpu_caching, type, flags);
 	if (IS_ERR(bo))
@@ -1370,13 +1410,13 @@ __xe_bo_create_locked(struct xe_device *xe,
 		xe_vm_get(vm);
 	bo->vm = vm;
 
-	if (bo->flags & XE_BO_CREATE_GGTT_BIT) {
-		if (!tile && flags & XE_BO_CREATE_STOLEN_BIT)
+	if (bo->flags & XE_BO_FLAG_GGTT) {
+		if (!tile && flags & XE_BO_FLAG_STOLEN)
 			tile = xe_device_get_root_tile(xe);
 
 		xe_assert(xe, tile);
 
-		if (flags & XE_BO_FIXED_PLACEMENT_BIT) {
+		if (flags & XE_BO_FLAG_FIXED_PLACEMENT) {
 			err = xe_ggtt_insert_bo_at(tile->mem.ggtt, bo,
 						   start + bo->size, U64_MAX);
 		} else {
@@ -1419,7 +1459,7 @@ struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
 {
 	struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL,
 						 cpu_caching, type,
-						 flags | XE_BO_CREATE_USER_BIT);
+						 flags | XE_BO_FLAG_USER);
 	if (!IS_ERR(bo))
 		xe_bo_unlock_vm_held(bo);
 
@@ -1448,12 +1488,12 @@ struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile
 	u64 start = offset == ~0ull ? 0 : offset;
 	u64 end = offset == ~0ull ? offset : start + size;
 
-	if (flags & XE_BO_CREATE_STOLEN_BIT &&
+	if (flags & XE_BO_FLAG_STOLEN &&
 	    xe_ttm_stolen_cpu_access_needs_ggtt(xe))
-		flags |= XE_BO_CREATE_GGTT_BIT;
+		flags |= XE_BO_FLAG_GGTT;
 
 	bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type,
-				       flags | XE_BO_NEEDS_CPU_ACCESS);
+				       flags | XE_BO_FLAG_NEEDS_CPU_ACCESS);
 	if (IS_ERR(bo))
 		return bo;
 
@@ -1534,6 +1574,40 @@ struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_til
 	return bo;
 }
 
+/**
+ * xe_managed_bo_reinit_in_vram
+ * @xe: xe device
+ * @tile: Tile where the new buffer will be created
+ * @src: Managed buffer object allocated in system memory
+ *
+ * Replace a managed src buffer object allocated in system memory with a new
+ * one allocated in vram, copying the data between them.
+ * Buffer object in VRAM is not going to have the same GGTT address, the caller
+ * is responsible for making sure that any old references to it are updated.
+ *
+ * Returns 0 for success, negative error code otherwise.
+ */
+int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, struct xe_bo **src)
+{
+	struct xe_bo *bo;
+	u32 dst_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT;
+
+	dst_flags |= (*src)->flags & XE_BO_FLAG_GGTT_INVALIDATE;
+
+	xe_assert(xe, IS_DGFX(xe));
+	xe_assert(xe, !(*src)->vmap.is_iomem);
+
+	bo = xe_managed_bo_create_from_data(xe, tile, (*src)->vmap.vaddr,
+					    (*src)->size, dst_flags);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	drmm_release_action(&xe->drm, __xe_bo_unpin_map_no_vm, *src);
+	*src = bo;
+
+	return 0;
+}
+
 /*
  * XXX: This is in the VM bind data path, likely should calculate this once and
  * store, with a recalculation if the BO is moved.
@@ -1599,8 +1673,8 @@ int xe_bo_pin(struct xe_bo *bo)
 	xe_assert(xe, !xe_bo_is_user(bo));
 
 	/* Pinned object must be in GGTT or have pinned flag */
-	xe_assert(xe, bo->flags & (XE_BO_CREATE_PINNED_BIT |
-				   XE_BO_CREATE_GGTT_BIT));
+	xe_assert(xe, bo->flags & (XE_BO_FLAG_PINNED |
+				   XE_BO_FLAG_GGTT));
 
 	/*
 	 * No reason we can't support pinning imported dma-bufs we just don't
@@ -1621,7 +1695,7 @@ int xe_bo_pin(struct xe_bo *bo)
 	 * during suspend / resume (force restore to same physical address).
 	 */
 	if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
-	    bo->flags & XE_BO_INTERNAL_TEST)) {
+	    bo->flags & XE_BO_FLAG_INTERNAL_TEST)) {
 		struct ttm_place *place = &(bo->placements[0]);
 
 		if (mem_type_is_vram(place->mem_type)) {
@@ -1689,7 +1763,7 @@ void xe_bo_unpin(struct xe_bo *bo)
 	xe_assert(xe, xe_bo_is_pinned(bo));
 
 	if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) &&
-	    bo->flags & XE_BO_INTERNAL_TEST)) {
+	    bo->flags & XE_BO_FLAG_INTERNAL_TEST)) {
 		struct ttm_place *place = &(bo->placements[0]);
 
 		if (mem_type_is_vram(place->mem_type)) {
@@ -1792,7 +1866,7 @@ int xe_bo_vmap(struct xe_bo *bo)
 
 	xe_bo_assert_held(bo);
 
-	if (!(bo->flags & XE_BO_NEEDS_CPU_ACCESS))
+	if (!(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS))
 		return -EINVAL;
 
 	if (!iosys_map_is_null(&bo->vmap))
@@ -1874,29 +1948,29 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
 
 	bo_flags = 0;
 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING)
-		bo_flags |= XE_BO_DEFER_BACKING;
+		bo_flags |= XE_BO_FLAG_DEFER_BACKING;
 
 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT)
-		bo_flags |= XE_BO_SCANOUT_BIT;
+		bo_flags |= XE_BO_FLAG_SCANOUT;
 
-	bo_flags |= args->placement << (ffs(XE_BO_CREATE_SYSTEM_BIT) - 1);
+	bo_flags |= args->placement << (ffs(XE_BO_FLAG_SYSTEM) - 1);
 
 	if (args->flags & DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) {
-		if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_CREATE_VRAM_MASK)))
+		if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_FLAG_VRAM_MASK)))
 			return -EINVAL;
 
-		bo_flags |= XE_BO_NEEDS_CPU_ACCESS;
+		bo_flags |= XE_BO_FLAG_NEEDS_CPU_ACCESS;
 	}
 
 	if (XE_IOCTL_DBG(xe, !args->cpu_caching ||
 			 args->cpu_caching > DRM_XE_GEM_CPU_CACHING_WC))
 		return -EINVAL;
 
-	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_CREATE_VRAM_MASK &&
+	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_VRAM_MASK &&
 			 args->cpu_caching != DRM_XE_GEM_CPU_CACHING_WC))
 		return -EINVAL;
 
-	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_SCANOUT_BIT &&
+	if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_SCANOUT &&
 			 args->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB))
 		return -EINVAL;
 
@@ -2078,9 +2152,7 @@ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
 
 	xe_place_from_ttm_type(mem_type, &requested);
 	placement.num_placement = 1;
-	placement.num_busy_placement = 1;
 	placement.placement = &requested;
-	placement.busy_placement = &requested;
 
 	/*
 	 * Stolen needs to be handled like below VRAM handling if we ever need
@@ -2139,6 +2211,9 @@ bool xe_bo_needs_ccs_pages(struct xe_bo *bo)
 {
 	struct xe_device *xe = xe_bo_device(bo);
 
+	if (GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe))
+		return false;
+
 	if (!xe_device_has_flat_ccs(xe) || bo->ttm.type != ttm_bo_type_device)
 		return false;
 
@@ -2147,7 +2222,7 @@ bool xe_bo_needs_ccs_pages(struct xe_bo *bo)
 	 * can't be used since there's no CCS storage associated with
 	 * non-VRAM addresses.
 	 */
-	if (IS_DGFX(xe) && (bo->flags & XE_BO_CREATE_SYSTEM_BIT))
+	if (IS_DGFX(xe) && (bo->flags & XE_BO_FLAG_SYSTEM))
 		return false;
 
 	return true;
@@ -2216,9 +2291,9 @@ int xe_bo_dumb_create(struct drm_file *file_priv,
 	bo = xe_bo_create_user(xe, NULL, NULL, args->size,
 			       DRM_XE_GEM_CPU_CACHING_WC,
 			       ttm_bo_type_device,
-			       XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
-			       XE_BO_CREATE_USER_BIT | XE_BO_SCANOUT_BIT |
-			       XE_BO_NEEDS_CPU_ACCESS);
+			       XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
+			       XE_BO_FLAG_SCANOUT |
+			       XE_BO_FLAG_NEEDS_CPU_ACCESS);
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
 
@@ -2230,6 +2305,16 @@ int xe_bo_dumb_create(struct drm_file *file_priv,
 	return err;
 }
 
+void xe_bo_runtime_pm_release_mmap_offset(struct xe_bo *bo)
+{
+	struct ttm_buffer_object *tbo = &bo->ttm;
+	struct ttm_device *bdev = tbo->bdev;
+
+	drm_vma_node_unmap(&tbo->base.vma_node, bdev->dev_mapping);
+
+	list_del_init(&bo->vram_userfault_link);
+}
+
 #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
 #include "tests/xe_bo.c"
 #endif
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 8be42ac6cd..a885b14bf5 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -13,47 +13,34 @@
 #include "xe_vm_types.h"
 #include "xe_vm.h"
 
-/**
- * xe_vm_assert_held(vm) - Assert that the vm's reservation object is held.
- * @vm: The vm
- */
-#define xe_vm_assert_held(vm) dma_resv_assert_held(xe_vm_resv(vm))
-
-
-
 #define XE_DEFAULT_GTT_SIZE_MB          3072ULL /* 3GB by default */
 
-#define XE_BO_CREATE_USER_BIT		BIT(0)
+#define XE_BO_FLAG_USER		BIT(0)
 /* The bits below need to be contiguous, or things break */
-#define XE_BO_CREATE_SYSTEM_BIT		BIT(1)
-#define XE_BO_CREATE_VRAM0_BIT		BIT(2)
-#define XE_BO_CREATE_VRAM1_BIT		BIT(3)
-#define XE_BO_CREATE_VRAM_MASK		(XE_BO_CREATE_VRAM0_BIT | \
-					 XE_BO_CREATE_VRAM1_BIT)
+#define XE_BO_FLAG_SYSTEM		BIT(1)
+#define XE_BO_FLAG_VRAM0		BIT(2)
+#define XE_BO_FLAG_VRAM1		BIT(3)
+#define XE_BO_FLAG_VRAM_MASK		(XE_BO_FLAG_VRAM0 | XE_BO_FLAG_VRAM1)
 /* -- */
-#define XE_BO_CREATE_STOLEN_BIT		BIT(4)
-#define XE_BO_CREATE_VRAM_IF_DGFX(tile) \
-	(IS_DGFX(tile_to_xe(tile)) ? XE_BO_CREATE_VRAM0_BIT << (tile)->id : \
-	 XE_BO_CREATE_SYSTEM_BIT)
-#define XE_BO_CREATE_GGTT_BIT		BIT(5)
-#define XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT BIT(6)
-#define XE_BO_CREATE_PINNED_BIT		BIT(7)
-#define XE_BO_CREATE_NO_RESV_EVICT	BIT(8)
-#define XE_BO_DEFER_BACKING		BIT(9)
-#define XE_BO_SCANOUT_BIT		BIT(10)
-#define XE_BO_FIXED_PLACEMENT_BIT	BIT(11)
-#define XE_BO_PAGETABLE			BIT(12)
-#define XE_BO_NEEDS_CPU_ACCESS		BIT(13)
+#define XE_BO_FLAG_STOLEN		BIT(4)
+#define XE_BO_FLAG_VRAM_IF_DGFX(tile)	(IS_DGFX(tile_to_xe(tile)) ? \
+					 XE_BO_FLAG_VRAM0 << (tile)->id : \
+					 XE_BO_FLAG_SYSTEM)
+#define XE_BO_FLAG_GGTT			BIT(5)
+#define XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE BIT(6)
+#define XE_BO_FLAG_PINNED		BIT(7)
+#define XE_BO_FLAG_NO_RESV_EVICT	BIT(8)
+#define XE_BO_FLAG_DEFER_BACKING	BIT(9)
+#define XE_BO_FLAG_SCANOUT		BIT(10)
+#define XE_BO_FLAG_FIXED_PLACEMENT	BIT(11)
+#define XE_BO_FLAG_PAGETABLE		BIT(12)
+#define XE_BO_FLAG_NEEDS_CPU_ACCESS	BIT(13)
+#define XE_BO_FLAG_NEEDS_UC		BIT(14)
+#define XE_BO_NEEDS_64K			BIT(15)
+#define XE_BO_FLAG_GGTT_INVALIDATE	BIT(16)
 /* this one is trigger internally only */
-#define XE_BO_INTERNAL_TEST		BIT(30)
-#define XE_BO_INTERNAL_64K		BIT(31)
-
-#define XELPG_PPGTT_PTE_PAT3		BIT_ULL(62)
-#define XE2_PPGTT_PTE_PAT4		BIT_ULL(61)
-#define XE_PPGTT_PDE_PDPE_PAT2		BIT_ULL(12)
-#define XE_PPGTT_PTE_PAT2		BIT_ULL(7)
-#define XE_PPGTT_PTE_PAT1		BIT_ULL(4)
-#define XE_PPGTT_PTE_PAT0		BIT_ULL(3)
+#define XE_BO_FLAG_INTERNAL_TEST	BIT(30)
+#define XE_BO_FLAG_INTERNAL_64K		BIT(31)
 
 #define XE_PTE_SHIFT			12
 #define XE_PAGE_SIZE			(1 << XE_PTE_SHIFT)
@@ -67,20 +54,6 @@
 #define XE_64K_PTE_MASK			(XE_64K_PAGE_SIZE - 1)
 #define XE_64K_PDE_MASK			(XE_PDE_MASK >> 4)
 
-#define XE_PDE_PS_2M			BIT_ULL(7)
-#define XE_PDPE_PS_1G			BIT_ULL(7)
-#define XE_PDE_IPS_64K			BIT_ULL(11)
-
-#define XE_GGTT_PTE_DM			BIT_ULL(1)
-#define XE_USM_PPGTT_PTE_AE		BIT_ULL(10)
-#define XE_PPGTT_PTE_DM			BIT_ULL(11)
-#define XE_PDE_64K			BIT_ULL(6)
-#define XE_PTE_PS64			BIT_ULL(8)
-#define XE_PTE_NULL			BIT_ULL(9)
-
-#define XE_PAGE_PRESENT			BIT_ULL(0)
-#define XE_PAGE_RW			BIT_ULL(1)
-
 #define XE_PL_SYSTEM		TTM_PL_SYSTEM
 #define XE_PL_TT		TTM_PL_TT
 #define XE_PL_VRAM0		TTM_PL_VRAM
@@ -128,6 +101,7 @@ struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile
 					   size_t size, u32 flags);
 struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
 					     const void *data, size_t size, u32 flags);
+int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, struct xe_bo **src);
 
 int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
 			      u32 bo_flags);
@@ -242,13 +216,15 @@ int xe_bo_evict(struct xe_bo *bo, bool force_alloc);
 int xe_bo_evict_pinned(struct xe_bo *bo);
 int xe_bo_restore_pinned(struct xe_bo *bo);
 
-extern struct ttm_device_funcs xe_ttm_funcs;
+extern const struct ttm_device_funcs xe_ttm_funcs;
 extern const char *const xe_mem_type_to_name[];
 
 int xe_gem_create_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file);
 int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
 			     struct drm_file *file);
+void xe_bo_runtime_pm_release_mmap_offset(struct xe_bo *bo);
+
 int xe_bo_dumb_create(struct drm_file *file_priv,
 		      struct drm_device *dev,
 		      struct drm_mode_create_dumb *args);
diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c
index 7a264a9ca0..541b49007d 100644
--- a/drivers/gpu/drm/xe/xe_bo_evict.c
+++ b/drivers/gpu/drm/xe/xe_bo_evict.c
@@ -146,7 +146,7 @@ int xe_bo_restore_kernel(struct xe_device *xe)
 			return ret;
 		}
 
-		if (bo->flags & XE_BO_CREATE_GGTT_BIT) {
+		if (bo->flags & XE_BO_FLAG_GGTT) {
 			struct xe_tile *tile = bo->tile;
 
 			mutex_lock(&tile->mem.ggtt->lock);
@@ -220,7 +220,7 @@ int xe_bo_restore_user(struct xe_device *xe)
 	list_splice_tail(&still_in_list, &xe->pinned.external_vram);
 	spin_unlock(&xe->pinned.lock);
 
-	/* Wait for validate to complete */
+	/* Wait for restore to complete */
 	for_each_tile(tile, xe, id)
 		xe_tile_migrate_wait(tile);
 
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index 81dca15315..10450f1fbb 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -66,9 +66,13 @@ struct xe_bo {
 
 	/**
 	 * @cpu_caching: CPU caching mode. Currently only used for userspace
-	 * objects.
+	 * objects. Exceptions are system memory on DGFX, which is always
+	 * WB.
 	 */
 	u16 cpu_caching;
+
+	/** @vram_userfault_link: Link into @mem_access.vram_userfault.list */
+		struct list_head vram_userfault_link;
 };
 
 #define intel_bo_to_drm_bo(bo) (&(bo)->ttm.base)
diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
index c56fd7d59f..0b7aebaae8 100644
--- a/drivers/gpu/drm/xe/xe_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -5,6 +5,7 @@
 
 #include "xe_debugfs.h"
 
+#include <linux/debugfs.h>
 #include <linux/string_helpers.h>
 
 #include <drm/drm_debugfs.h>
@@ -12,6 +13,8 @@
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_gt_debugfs.h"
+#include "xe_pm.h"
+#include "xe_sriov.h"
 #include "xe_step.h"
 
 #ifdef CONFIG_DRM_XE_DEBUG
@@ -37,6 +40,8 @@ static int info(struct seq_file *m, void *data)
 	struct xe_gt *gt;
 	u8 id;
 
+	xe_pm_runtime_get(xe);
+
 	drm_printf(&p, "graphics_verx100 %d\n", xe->info.graphics_verx100);
 	drm_printf(&p, "media_verx100 %d\n", xe->info.media_verx100);
 	drm_printf(&p, "stepping G:%s M:%s D:%s B:%s\n",
@@ -55,6 +60,7 @@ static int info(struct seq_file *m, void *data)
 	drm_printf(&p, "force_execlist %s\n", str_yes_no(xe->info.force_execlist));
 	drm_printf(&p, "has_flat_ccs %s\n", str_yes_no(xe->info.has_flat_ccs));
 	drm_printf(&p, "has_usm %s\n", str_yes_no(xe->info.has_usm));
+	drm_printf(&p, "skip_guc_pc %s\n", str_yes_no(xe->info.skip_guc_pc));
 	for_each_gt(gt, xe, id) {
 		drm_printf(&p, "gt%d force wake %d\n", id,
 			   xe_force_wake_ref(gt_to_fw(gt), XE_FW_GT));
@@ -62,11 +68,22 @@ static int info(struct seq_file *m, void *data)
 			   gt->info.engine_mask);
 	}
 
+	xe_pm_runtime_put(xe);
+	return 0;
+}
+
+static int sriov_info(struct seq_file *m, void *data)
+{
+	struct xe_device *xe = node_to_xe(m->private);
+	struct drm_printer p = drm_seq_file_printer(m);
+
+	xe_sriov_print_info(xe, &p);
 	return 0;
 }
 
 static const struct drm_info_list debugfs_list[] = {
 	{"info", info, 0},
+	{ .name = "sriov_info", .show = sriov_info, },
 };
 
 static int forcewake_open(struct inode *inode, struct file *file)
@@ -75,8 +92,7 @@ static int forcewake_open(struct inode *inode, struct file *file)
 	struct xe_gt *gt;
 	u8 id;
 
-	xe_device_mem_access_get(xe);
-
+	xe_pm_runtime_get(xe);
 	for_each_gt(gt, xe, id)
 		XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL));
 
@@ -91,8 +107,7 @@ static int forcewake_release(struct inode *inode, struct file *file)
 
 	for_each_gt(gt, xe, id)
 		XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
-
-	xe_device_mem_access_put(xe);
+	xe_pm_runtime_put(xe);
 
 	return 0;
 }
@@ -126,7 +141,7 @@ void xe_debugfs_register(struct xe_device *xe)
 		if (man) {
 			char name[16];
 
-			sprintf(name, "vram%d_mm", mem_type - XE_PL_VRAM0);
+			snprintf(name, sizeof(name), "vram%d_mm", mem_type - XE_PL_VRAM0);
 			ttm_resource_manager_create_debugfs(man, root, name);
 		}
 	}
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index 68abc0b195..3d7980232b 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -9,13 +9,18 @@
 #include <linux/devcoredump.h>
 #include <generated/utsrelease.h>
 
+#include <drm/drm_managed.h>
+
 #include "xe_device.h"
 #include "xe_exec_queue.h"
 #include "xe_force_wake.h"
 #include "xe_gt.h"
+#include "xe_gt_printk.h"
 #include "xe_guc_ct.h"
 #include "xe_guc_submit.h"
 #include "xe_hw_engine.h"
+#include "xe_sched_job.h"
+#include "xe_vm.h"
 
 /**
  * DOC: Xe device coredump
@@ -58,26 +63,43 @@ static struct xe_guc *exec_queue_to_guc(struct xe_exec_queue *q)
 	return &q->gt->uc.guc;
 }
 
+static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
+{
+	struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work);
+
+	/* keep going if fw fails as we still want to save the memory and SW data */
+	if (xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL))
+		xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n");
+	xe_vm_snapshot_capture_delayed(ss->vm);
+	xe_guc_exec_queue_snapshot_capture_delayed(ss->ge);
+	xe_force_wake_put(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL);
+}
+
 static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
 				   size_t count, void *data, size_t datalen)
 {
 	struct xe_devcoredump *coredump = data;
+	struct xe_device *xe;
 	struct xe_devcoredump_snapshot *ss;
 	struct drm_printer p;
 	struct drm_print_iterator iter;
 	struct timespec64 ts;
 	int i;
 
-	/* Our device is gone already... */
-	if (!data || !coredump_to_xe(coredump))
+	if (!coredump)
 		return -ENODEV;
 
+	xe = coredump_to_xe(coredump);
+	ss = &coredump->snapshot;
+
+	/* Ensure delayed work is captured before continuing */
+	flush_work(&ss->work);
+
 	iter.data = buffer;
 	iter.offset = 0;
 	iter.start = offset;
 	iter.remain = count;
 
-	ss = &coredump->snapshot;
 	p = drm_coredump_printer(&iter);
 
 	drm_printf(&p, "**** Xe Device Coredump ****\n");
@@ -88,16 +110,22 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
 	drm_printf(&p, "Snapshot time: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec);
 	ts = ktime_to_timespec64(ss->boot_time);
 	drm_printf(&p, "Uptime: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec);
+	xe_device_snapshot_print(xe, &p);
 
 	drm_printf(&p, "\n**** GuC CT ****\n");
 	xe_guc_ct_snapshot_print(coredump->snapshot.ct, &p);
 	xe_guc_exec_queue_snapshot_print(coredump->snapshot.ge, &p);
 
+	drm_printf(&p, "\n**** Job ****\n");
+	xe_sched_job_snapshot_print(coredump->snapshot.job, &p);
+
 	drm_printf(&p, "\n**** HW Engines ****\n");
 	for (i = 0; i < XE_NUM_HW_ENGINES; i++)
 		if (coredump->snapshot.hwe[i])
 			xe_hw_engine_snapshot_print(coredump->snapshot.hwe[i],
 						    &p);
+	drm_printf(&p, "\n**** VM state ****\n");
+	xe_vm_snapshot_print(coredump->snapshot.vm, &p);
 
 	return count - iter.remain;
 }
@@ -111,21 +139,28 @@ static void xe_devcoredump_free(void *data)
 	if (!data || !coredump_to_xe(coredump))
 		return;
 
+	cancel_work_sync(&coredump->snapshot.work);
+
 	xe_guc_ct_snapshot_free(coredump->snapshot.ct);
 	xe_guc_exec_queue_snapshot_free(coredump->snapshot.ge);
+	xe_sched_job_snapshot_free(coredump->snapshot.job);
 	for (i = 0; i < XE_NUM_HW_ENGINES; i++)
 		if (coredump->snapshot.hwe[i])
 			xe_hw_engine_snapshot_free(coredump->snapshot.hwe[i]);
+	xe_vm_snapshot_free(coredump->snapshot.vm);
 
+	/* To prevent stale data on next snapshot, clear everything */
+	memset(&coredump->snapshot, 0, sizeof(coredump->snapshot));
 	coredump->captured = false;
 	drm_info(&coredump_to_xe(coredump)->drm,
 		 "Xe device coredump has been deleted.\n");
 }
 
 static void devcoredump_snapshot(struct xe_devcoredump *coredump,
-				 struct xe_exec_queue *q)
+				 struct xe_sched_job *job)
 {
 	struct xe_devcoredump_snapshot *ss = &coredump->snapshot;
+	struct xe_exec_queue *q = job->q;
 	struct xe_guc *guc = exec_queue_to_guc(q);
 	struct xe_hw_engine *hwe;
 	enum xe_hw_engine_id id;
@@ -137,6 +172,9 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
 	ss->snapshot_time = ktime_get_real();
 	ss->boot_time = ktime_get_boottime();
 
+	ss->gt = q->gt;
+	INIT_WORK(&ss->work, xe_devcoredump_deferred_snap_work);
+
 	cookie = dma_fence_begin_signalling();
 	for (i = 0; q->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) {
 		if (adj_logical_mask & BIT(i)) {
@@ -147,10 +185,14 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
 		}
 	}
 
-	xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
+	/* keep going if fw fails as we still want to save the memory and SW data */
+	if (xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL))
+		xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n");
 
 	coredump->snapshot.ct = xe_guc_ct_snapshot_capture(&guc->ct, true);
 	coredump->snapshot.ge = xe_guc_exec_queue_snapshot_capture(q);
+	coredump->snapshot.job = xe_sched_job_snapshot_capture(job);
+	coredump->snapshot.vm = xe_vm_snapshot_capture(q->vm);
 
 	for_each_hw_engine(hwe, q->gt, id) {
 		if (hwe->class != q->hwe->class ||
@@ -161,21 +203,23 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
 		coredump->snapshot.hwe[id] = xe_hw_engine_snapshot_capture(hwe);
 	}
 
+	queue_work(system_unbound_wq, &ss->work);
+
 	xe_force_wake_put(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
 	dma_fence_end_signalling(cookie);
 }
 
 /**
  * xe_devcoredump - Take the required snapshots and initialize coredump device.
- * @q: The faulty xe_exec_queue, where the issue was detected.
+ * @job: The faulty xe_sched_job, where the issue was detected.
  *
  * This function should be called at the crash time within the serialized
  * gt_reset. It is skipped if we still have the core dump device available
  * with the information of the 'first' snapshot.
  */
-void xe_devcoredump(struct xe_exec_queue *q)
+void xe_devcoredump(struct xe_sched_job *job)
 {
-	struct xe_device *xe = gt_to_xe(q->gt);
+	struct xe_device *xe = gt_to_xe(job->q->gt);
 	struct xe_devcoredump *coredump = &xe->devcoredump;
 
 	if (coredump->captured) {
@@ -184,7 +228,7 @@ void xe_devcoredump(struct xe_exec_queue *q)
 	}
 
 	coredump->captured = true;
-	devcoredump_snapshot(coredump, q);
+	devcoredump_snapshot(coredump, job);
 
 	drm_info(&xe->drm, "Xe device coredump has been created\n");
 	drm_info(&xe->drm, "Check your /sys/class/drm/card%d/device/devcoredump/data\n",
@@ -193,4 +237,14 @@ void xe_devcoredump(struct xe_exec_queue *q)
 	dev_coredumpm(xe->drm.dev, THIS_MODULE, coredump, 0, GFP_KERNEL,
 		      xe_devcoredump_read, xe_devcoredump_free);
 }
+
+static void xe_driver_devcoredump_fini(struct drm_device *drm, void *arg)
+{
+	dev_coredump_put(drm->dev);
+}
+
+int xe_devcoredump_init(struct xe_device *xe)
+{
+	return drmm_add_action_or_reset(&xe->drm, xe_driver_devcoredump_fini, xe);
+}
 #endif
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.h b/drivers/gpu/drm/xe/xe_devcoredump.h
index 6ac218a5c1..e2fa65ce09 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.h
+++ b/drivers/gpu/drm/xe/xe_devcoredump.h
@@ -7,14 +7,20 @@
 #define _XE_DEVCOREDUMP_H_
 
 struct xe_device;
-struct xe_exec_queue;
+struct xe_sched_job;
 
 #ifdef CONFIG_DEV_COREDUMP
-void xe_devcoredump(struct xe_exec_queue *q);
+void xe_devcoredump(struct xe_sched_job *job);
+int xe_devcoredump_init(struct xe_device *xe);
 #else
-static inline void xe_devcoredump(struct xe_exec_queue *q)
+static inline void xe_devcoredump(struct xe_sched_job *job)
 {
 }
+
+static inline int xe_devcoredump_init(struct xe_device *xe)
+{
+	return 0;
+}
 #endif
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h
index 7fdad9c3d3..6f654b63c7 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump_types.h
+++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h
@@ -12,6 +12,7 @@
 #include "xe_hw_engine_types.h"
 
 struct xe_device;
+struct xe_gt;
 
 /**
  * struct xe_devcoredump_snapshot - Crash snapshot
@@ -26,13 +27,23 @@ struct xe_devcoredump_snapshot {
 	/** @boot_time:  Relative boot time so the uptime can be calculated. */
 	ktime_t boot_time;
 
+	/** @gt: Affected GT, used by forcewake for delayed capture */
+	struct xe_gt *gt;
+	/** @work: Workqueue for deferred capture outside of signaling context */
+	struct work_struct work;
+
 	/* GuC snapshots */
 	/** @ct: GuC CT snapshot */
 	struct xe_guc_ct_snapshot *ct;
 	/** @ge: Guc Engine snapshot */
 	struct xe_guc_submit_exec_queue_snapshot *ge;
+
 	/** @hwe: HW Engine snapshot array */
 	struct xe_hw_engine_snapshot *hwe[XE_NUM_HW_ENGINES];
+	/** @job: Snapshot of job state */
+	struct xe_sched_job_snapshot *job;
+	/** @vm: Snapshot of VM state */
+	struct xe_vm_snapshot *vm;
 };
 
 /**
@@ -44,8 +55,6 @@ struct xe_devcoredump_snapshot {
  * for reading the information.
  */
 struct xe_devcoredump {
-	/** @xe: Xe device. */
-	struct xe_device *xe;
 	/** @captured: The snapshot of the first hang has already been taken. */
 	bool captured;
 	/** @snapshot: Snapshot is captured at time of the first crash */
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 5176c27e4b..5ef9b50a20 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -9,44 +9,43 @@
 
 #include <drm/drm_aperture.h>
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_client.h>
 #include <drm/drm_gem_ttm_helper.h>
 #include <drm/drm_ioctl.h>
 #include <drm/drm_managed.h>
 #include <drm/drm_print.h>
 #include <drm/xe_drm.h>
 
+#include "display/xe_display.h"
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_regs.h"
 #include "xe_bo.h"
 #include "xe_debugfs.h"
-#include "xe_display.h"
+#include "xe_devcoredump.h"
 #include "xe_dma_buf.h"
 #include "xe_drm_client.h"
 #include "xe_drv.h"
-#include "xe_exec_queue.h"
 #include "xe_exec.h"
+#include "xe_exec_queue.h"
 #include "xe_ggtt.h"
+#include "xe_gsc_proxy.h"
 #include "xe_gt.h"
 #include "xe_gt_mcr.h"
+#include "xe_hwmon.h"
 #include "xe_irq.h"
+#include "xe_memirq.h"
 #include "xe_mmio.h"
 #include "xe_module.h"
 #include "xe_pat.h"
 #include "xe_pcode.h"
 #include "xe_pm.h"
 #include "xe_query.h"
+#include "xe_sriov.h"
 #include "xe_tile.h"
 #include "xe_ttm_stolen_mgr.h"
 #include "xe_ttm_sys_mgr.h"
 #include "xe_vm.h"
 #include "xe_wait_user_fence.h"
-#include "xe_hwmon.h"
-
-#ifdef CONFIG_LOCKDEP
-struct lockdep_map xe_device_mem_access_lockdep_map = {
-	.name = "xe_device_mem_access_lockdep_map"
-};
-#endif
 
 static int xe_file_open(struct drm_device *dev, struct drm_file *file)
 {
@@ -133,15 +132,48 @@ static const struct drm_ioctl_desc xe_ioctls[] = {
 			  DRM_RENDER_ALLOW),
 };
 
+static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct drm_file *file_priv = file->private_data;
+	struct xe_device *xe = to_xe_device(file_priv->minor->dev);
+	long ret;
+
+	ret = xe_pm_runtime_get_ioctl(xe);
+	if (ret >= 0)
+		ret = drm_ioctl(file, cmd, arg);
+	xe_pm_runtime_put(xe);
+
+	return ret;
+}
+
+#ifdef CONFIG_COMPAT
+static long xe_drm_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct drm_file *file_priv = file->private_data;
+	struct xe_device *xe = to_xe_device(file_priv->minor->dev);
+	long ret;
+
+	ret = xe_pm_runtime_get_ioctl(xe);
+	if (ret >= 0)
+		ret = drm_compat_ioctl(file, cmd, arg);
+	xe_pm_runtime_put(xe);
+
+	return ret;
+}
+#else
+/* similarly to drm_compat_ioctl, let's it be assigned to .compat_ioct unconditionally */
+#define xe_drm_compat_ioctl NULL
+#endif
+
 static const struct file_operations xe_driver_fops = {
 	.owner = THIS_MODULE,
 	.open = drm_open,
 	.release = drm_release_noglobal,
-	.unlocked_ioctl = drm_ioctl,
+	.unlocked_ioctl = xe_drm_ioctl,
 	.mmap = drm_gem_mmap,
 	.poll = drm_poll,
 	.read = drm_read,
-	.compat_ioctl = drm_compat_ioctl,
+	.compat_ioctl = xe_drm_compat_ioctl,
 	.llseek = noop_llseek,
 #ifdef CONFIG_PROC_FS
 	.show_fdinfo = drm_show_fdinfo,
@@ -190,6 +222,9 @@ static void xe_device_destroy(struct drm_device *dev, void *dummy)
 {
 	struct xe_device *xe = to_xe_device(dev);
 
+	if (xe->preempt_fence_wq)
+		destroy_workqueue(xe->preempt_fence_wq);
+
 	if (xe->ordered_wq)
 		destroy_workqueue(xe->ordered_wq);
 
@@ -255,9 +290,15 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
 	INIT_LIST_HEAD(&xe->pinned.external_vram);
 	INIT_LIST_HEAD(&xe->pinned.evicted);
 
+	xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", 0);
 	xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0);
 	xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0);
-	if (!xe->ordered_wq || !xe->unordered_wq) {
+	if (!xe->ordered_wq || !xe->unordered_wq ||
+	    !xe->preempt_fence_wq) {
+		/*
+		 * Cleanup done in xe_device_destroy via
+		 * drmm_add_action_or_reset register above
+		 */
 		drm_err(&xe->drm, "Failed to allocate xe workqueues\n");
 		err = -ENOMEM;
 		goto err;
@@ -377,8 +418,70 @@ mask_err:
 	return err;
 }
 
-/*
- * Initialize MMIO resources that don't require any knowledge about tile count.
+static bool verify_lmem_ready(struct xe_gt *gt)
+{
+	u32 val = xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT;
+
+	return !!val;
+}
+
+static int wait_for_lmem_ready(struct xe_device *xe)
+{
+	struct xe_gt *gt = xe_root_mmio_gt(xe);
+	unsigned long timeout, start;
+
+	if (!IS_DGFX(xe))
+		return 0;
+
+	if (IS_SRIOV_VF(xe))
+		return 0;
+
+	if (verify_lmem_ready(gt))
+		return 0;
+
+	drm_dbg(&xe->drm, "Waiting for lmem initialization\n");
+
+	start = jiffies;
+	timeout = start + msecs_to_jiffies(60 * 1000); /* 60 sec! */
+
+	do {
+		if (signal_pending(current))
+			return -EINTR;
+
+		/*
+		 * The boot firmware initializes local memory and
+		 * assesses its health. If memory training fails,
+		 * the punit will have been instructed to keep the GT powered
+		 * down.we won't be able to communicate with it
+		 *
+		 * If the status check is done before punit updates the register,
+		 * it can lead to the system being unusable.
+		 * use a timeout and defer the probe to prevent this.
+		 */
+		if (time_after(jiffies, timeout)) {
+			drm_dbg(&xe->drm, "lmem not initialized by firmware\n");
+			return -EPROBE_DEFER;
+		}
+
+		msleep(20);
+
+	} while (!verify_lmem_ready(gt));
+
+	drm_dbg(&xe->drm, "lmem ready after %ums",
+		jiffies_to_msecs(jiffies - start));
+
+	return 0;
+}
+
+/**
+ * xe_device_probe_early: Device early probe
+ * @xe: xe device instance
+ *
+ * Initialize MMIO resources that don't require any
+ * knowledge about tile count. Also initialize pcode and
+ * check vram initialization on root tile.
+ *
+ * Return: 0 on success, error code on failure
  */
 int xe_device_probe_early(struct xe_device *xe)
 {
@@ -388,7 +491,13 @@ int xe_device_probe_early(struct xe_device *xe)
 	if (err)
 		return err;
 
-	err = xe_mmio_root_tile_init(xe);
+	xe_sriov_probe_early(xe);
+
+	err = xe_pcode_probe_early(xe);
+	if (err)
+		return err;
+
+	err = wait_for_lmem_ready(xe);
 	if (err)
 		return err;
 
@@ -424,10 +533,15 @@ int xe_device_probe(struct xe_device *xe)
 	struct xe_tile *tile;
 	struct xe_gt *gt;
 	int err;
+	u8 last_gt;
 	u8 id;
 
 	xe_pat_init_early(xe);
 
+	err = xe_sriov_init(xe);
+	if (err)
+		return err;
+
 	xe->info.mem_region_mask = 1;
 	err = xe_display_init_nommio(xe);
 	if (err)
@@ -448,18 +562,29 @@ int xe_device_probe(struct xe_device *xe)
 		err = xe_ggtt_init_early(tile->mem.ggtt);
 		if (err)
 			return err;
+		if (IS_SRIOV_VF(xe)) {
+			err = xe_memirq_init(&tile->sriov.vf.memirq);
+			if (err)
+				return err;
+		}
 	}
 
-	err = drmm_add_action_or_reset(&xe->drm, xe_driver_flr_fini, xe);
-	if (err)
-		return err;
-
 	for_each_gt(gt, xe, id) {
-		err = xe_pcode_probe(gt);
+		err = xe_gt_init_hwconfig(gt);
 		if (err)
 			return err;
 	}
 
+	err = xe_devcoredump_init(xe);
+	if (err)
+		return err;
+	err = drmm_add_action_or_reset(&xe->drm, xe_driver_flr_fini, xe);
+	if (err)
+		return err;
+
+	for_each_gt(gt, xe, id)
+		xe_pcode_init(gt);
+
 	err = xe_display_init_noirq(xe);
 	if (err)
 		return err;
@@ -502,16 +627,18 @@ int xe_device_probe(struct xe_device *xe)
 		goto err_irq_shutdown;
 
 	for_each_gt(gt, xe, id) {
+		last_gt = id;
+
 		err = xe_gt_init(gt);
 		if (err)
-			goto err_irq_shutdown;
+			goto err_fini_gt;
 	}
 
 	xe_heci_gsc_init(xe);
 
 	err = xe_display_init(xe);
 	if (err)
-		goto err_irq_shutdown;
+		goto err_fini_gt;
 
 	err = drm_dev_register(&xe->drm, 0);
 	if (err)
@@ -523,15 +650,19 @@ int xe_device_probe(struct xe_device *xe)
 
 	xe_hwmon_register(xe);
 
-	err = drmm_add_action_or_reset(&xe->drm, xe_device_sanitize, xe);
-	if (err)
-		return err;
-
-	return 0;
+	return drmm_add_action_or_reset(&xe->drm, xe_device_sanitize, xe);
 
 err_fini_display:
 	xe_display_driver_remove(xe);
 
+err_fini_gt:
+	for_each_gt(gt, xe, id) {
+		if (id < last_gt)
+			xe_gt_remove(gt);
+		else
+			break;
+	}
+
 err_irq_shutdown:
 	xe_irq_shutdown(xe);
 err:
@@ -549,12 +680,18 @@ static void xe_device_remove_display(struct xe_device *xe)
 
 void xe_device_remove(struct xe_device *xe)
 {
+	struct xe_gt *gt;
+	u8 id;
+
 	xe_device_remove_display(xe);
 
 	xe_display_fini(xe);
 
 	xe_heci_gsc_fini(xe);
 
+	for_each_gt(gt, xe, id)
+		xe_gt_remove(gt);
+
 	xe_irq_shutdown(xe);
 }
 
@@ -577,85 +714,48 @@ u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
 		DIV_ROUND_UP_ULL(size, NUM_BYTES_PER_CCS_BYTE(xe)) : 0;
 }
 
-bool xe_device_mem_access_ongoing(struct xe_device *xe)
-{
-	if (xe_pm_read_callback_task(xe) != NULL)
-		return true;
-
-	return atomic_read(&xe->mem_access.ref);
-}
-
+/**
+ * xe_device_assert_mem_access - Inspect the current runtime_pm state.
+ * @xe: xe device instance
+ *
+ * To be used before any kind of memory access. It will splat a debug warning
+ * if the device is currently sleeping. But it doesn't guarantee in any way
+ * that the device is going to remain awake. Xe PM runtime get and put
+ * functions might be added to the outer bound of the memory access, while
+ * this check is intended for inner usage to splat some warning if the worst
+ * case has just happened.
+ */
 void xe_device_assert_mem_access(struct xe_device *xe)
 {
-	XE_WARN_ON(!xe_device_mem_access_ongoing(xe));
+	xe_assert(xe, !xe_pm_runtime_suspended(xe));
 }
 
-bool xe_device_mem_access_get_if_ongoing(struct xe_device *xe)
+void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p)
 {
-	bool active;
-
-	if (xe_pm_read_callback_task(xe) == current)
-		return true;
+	struct xe_gt *gt;
+	u8 id;
 
-	active = xe_pm_runtime_get_if_active(xe);
-	if (active) {
-		int ref = atomic_inc_return(&xe->mem_access.ref);
+	drm_printf(p, "PCI ID: 0x%04x\n", xe->info.devid);
+	drm_printf(p, "PCI revision: 0x%02x\n", xe->info.revid);
 
-		xe_assert(xe, ref != S32_MAX);
+	for_each_gt(gt, xe, id) {
+		drm_printf(p, "GT id: %u\n", id);
+		drm_printf(p, "\tType: %s\n",
+			   gt->info.type == XE_GT_TYPE_MAIN ? "main" : "media");
+		drm_printf(p, "\tIP ver: %u.%u.%u\n",
+			   REG_FIELD_GET(GMD_ID_ARCH_MASK, gt->info.gmdid),
+			   REG_FIELD_GET(GMD_ID_RELEASE_MASK, gt->info.gmdid),
+			   REG_FIELD_GET(GMD_ID_REVID, gt->info.gmdid));
+		drm_printf(p, "\tCS reference clock: %u\n", gt->info.reference_clock);
 	}
-
-	return active;
 }
 
-void xe_device_mem_access_get(struct xe_device *xe)
+u64 xe_device_canonicalize_addr(struct xe_device *xe, u64 address)
 {
-	int ref;
-
-	/*
-	 * This looks racy, but should be fine since the pm_callback_task only
-	 * transitions from NULL -> current (and back to NULL again), during the
-	 * runtime_resume() or runtime_suspend() callbacks, for which there can
-	 * only be a single one running for our device. We only need to prevent
-	 * recursively calling the runtime_get or runtime_put from those
-	 * callbacks, as well as preventing triggering any access_ongoing
-	 * asserts.
-	 */
-	if (xe_pm_read_callback_task(xe) == current)
-		return;
-
-	/*
-	 * Since the resume here is synchronous it can be quite easy to deadlock
-	 * if we are not careful. Also in practice it might be quite timing
-	 * sensitive to ever see the 0 -> 1 transition with the callers locks
-	 * held, so deadlocks might exist but are hard for lockdep to ever see.
-	 * With this in mind, help lockdep learn about the potentially scary
-	 * stuff that can happen inside the runtime_resume callback by acquiring
-	 * a dummy lock (it doesn't protect anything and gets compiled out on
-	 * non-debug builds).  Lockdep then only needs to see the
-	 * mem_access_lockdep_map -> runtime_resume callback once, and then can
-	 * hopefully validate all the (callers_locks) -> mem_access_lockdep_map.
-	 * For example if the (callers_locks) are ever grabbed in the
-	 * runtime_resume callback, lockdep should give us a nice splat.
-	 */
-	lock_map_acquire(&xe_device_mem_access_lockdep_map);
-	lock_map_release(&xe_device_mem_access_lockdep_map);
-
-	xe_pm_runtime_get(xe);
-	ref = atomic_inc_return(&xe->mem_access.ref);
-
-	xe_assert(xe, ref != S32_MAX);
-
+	return sign_extend64(address, xe->info.va_bits - 1);
 }
 
-void xe_device_mem_access_put(struct xe_device *xe)
+u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address)
 {
-	int ref;
-
-	if (xe_pm_read_callback_task(xe) == current)
-		return;
-
-	ref = atomic_dec_return(&xe->mem_access.ref);
-	xe_pm_runtime_put(xe);
-
-	xe_assert(xe, ref >= 0);
+	return address & GENMASK_ULL(xe->info.va_bits - 1, 0);
 }
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index bf8efb44ed..36d4434ebc 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -16,10 +16,6 @@ struct xe_file;
 #include "xe_force_wake.h"
 #include "xe_macros.h"
 
-#ifdef CONFIG_LOCKDEP
-extern struct lockdep_map xe_device_mem_access_lockdep_map;
-#endif
-
 static inline struct xe_device *to_xe_device(const struct drm_device *dev)
 {
 	return container_of(dev, struct xe_device, drm);
@@ -137,12 +133,7 @@ static inline struct xe_force_wake *gt_to_fw(struct xe_gt *gt)
 	return &gt->mmio.fw;
 }
 
-void xe_device_mem_access_get(struct xe_device *xe);
-bool xe_device_mem_access_get_if_ongoing(struct xe_device *xe);
-void xe_device_mem_access_put(struct xe_device *xe);
-
 void xe_device_assert_mem_access(struct xe_device *xe);
-bool xe_device_mem_access_ongoing(struct xe_device *xe);
 
 static inline bool xe_device_in_fault_mode(struct xe_device *xe)
 {
@@ -164,6 +155,16 @@ static inline bool xe_device_has_sriov(struct xe_device *xe)
 	return xe->info.has_sriov;
 }
 
+static inline bool xe_device_has_memirq(struct xe_device *xe)
+{
+	return GRAPHICS_VERx100(xe) >= 1250;
+}
+
 u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size);
 
+void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p);
+
+u64 xe_device_canonicalize_addr(struct xe_device *xe, u64 address);
+u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address);
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.c b/drivers/gpu/drm/xe/xe_device_sysfs.c
index 99113a5a2b..21677b8cd9 100644
--- a/drivers/gpu/drm/xe/xe_device_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_device_sysfs.c
@@ -35,7 +35,9 @@ vram_d3cold_threshold_show(struct device *dev,
 	if (!xe)
 		return -EINVAL;
 
+	xe_pm_runtime_get(xe);
 	ret = sysfs_emit(buf, "%d\n", xe->d3cold.vram_threshold);
+	xe_pm_runtime_put(xe);
 
 	return ret;
 }
@@ -58,7 +60,9 @@ vram_d3cold_threshold_store(struct device *dev, struct device_attribute *attr,
 
 	drm_dbg(&xe->drm, "vram_d3cold_threshold: %u\n", vram_d3cold_threshold);
 
+	xe_pm_runtime_get(xe);
 	ret = xe_pm_set_vram_threshold(xe, vram_d3cold_threshold);
+	xe_pm_runtime_put(xe);
 
 	return ret ?: count;
 }
@@ -72,18 +76,14 @@ static void xe_device_sysfs_fini(struct drm_device *drm, void *arg)
 	sysfs_remove_file(&xe->drm.dev->kobj, &dev_attr_vram_d3cold_threshold.attr);
 }
 
-void xe_device_sysfs_init(struct xe_device *xe)
+int xe_device_sysfs_init(struct xe_device *xe)
 {
 	struct device *dev = xe->drm.dev;
 	int ret;
 
 	ret = sysfs_create_file(&dev->kobj, &dev_attr_vram_d3cold_threshold.attr);
-	if (ret) {
-		drm_warn(&xe->drm, "Failed to create sysfs file\n");
-		return;
-	}
-
-	ret = drmm_add_action_or_reset(&xe->drm, xe_device_sysfs_fini, xe);
 	if (ret)
-		drm_warn(&xe->drm, "Failed to add sysfs fini drm action\n");
+		return ret;
+
+	return drmm_add_action_or_reset(&xe->drm, xe_device_sysfs_fini, xe);
 }
diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.h b/drivers/gpu/drm/xe/xe_device_sysfs.h
index 38b240684b..f9e83d8bd2 100644
--- a/drivers/gpu/drm/xe/xe_device_sysfs.h
+++ b/drivers/gpu/drm/xe/xe_device_sysfs.h
@@ -8,6 +8,6 @@
 
 struct xe_device;
 
-void xe_device_sysfs_init(struct xe_device *xe);
+int xe_device_sysfs_init(struct xe_device *xe);
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index e8491979a6..2e62450d86 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -16,6 +16,7 @@
 #include "xe_heci_gsc.h"
 #include "xe_gt_types.h"
 #include "xe_lmtt_types.h"
+#include "xe_memirq_types.h"
 #include "xe_platform_types.h"
 #include "xe_pt_types.h"
 #include "xe_sriov_types.h"
@@ -142,10 +143,10 @@ struct xe_tile {
 	 * * 8MB-16MB: global GTT
 	 */
 	struct {
-		/** @size: size of tile's MMIO space */
+		/** @mmio.size: size of tile's MMIO space */
 		size_t size;
 
-		/** @regs: pointer to tile's MMIO space (starting with registers) */
+		/** @mmio.regs: pointer to tile's MMIO space (starting with registers) */
 		void __iomem *regs;
 	} mmio;
 
@@ -155,31 +156,31 @@ struct xe_tile {
 	 * Each tile has its own additional 256MB (28-bit) MMIO-extension space.
 	 */
 	struct {
-		/** @size: size of tile's additional MMIO-extension space */
+		/** @mmio_ext.size: size of tile's additional MMIO-extension space */
 		size_t size;
 
-		/** @regs: pointer to tile's additional MMIO-extension space */
+		/** @mmio_ext.regs: pointer to tile's additional MMIO-extension space */
 		void __iomem *regs;
 	} mmio_ext;
 
 	/** @mem: memory management info for tile */
 	struct {
 		/**
-		 * @vram: VRAM info for tile.
+		 * @mem.vram: VRAM info for tile.
 		 *
 		 * Although VRAM is associated with a specific tile, it can
 		 * still be accessed by all tiles' GTs.
 		 */
 		struct xe_mem_region vram;
 
-		/** @vram_mgr: VRAM TTM manager */
+		/** @mem.vram_mgr: VRAM TTM manager */
 		struct xe_ttm_vram_mgr *vram_mgr;
 
-		/** @ggtt: Global graphics translation table */
+		/** @mem.ggtt: Global graphics translation table */
 		struct xe_ggtt *ggtt;
 
 		/**
-		 * @kernel_bb_pool: Pool from which batchbuffers are allocated.
+		 * @mem.kernel_bb_pool: Pool from which batchbuffers are allocated.
 		 *
 		 * Media GT shares a pool with its primary GT.
 		 */
@@ -192,6 +193,10 @@ struct xe_tile {
 			/** @sriov.pf.lmtt: Local Memory Translation Table. */
 			struct xe_lmtt lmtt;
 		} pf;
+		struct {
+			/** @sriov.vf.memirq: Memory Based Interrupts. */
+			struct xe_memirq memirq;
+		} vf;
 	} sriov;
 
 	/** @migrate: Migration helper for vram blits and clearing */
@@ -213,68 +218,68 @@ struct xe_device {
 
 	/** @info: device info */
 	struct intel_device_info {
-		/** @graphics_name: graphics IP name */
+		/** @info.graphics_name: graphics IP name */
 		const char *graphics_name;
-		/** @media_name: media IP name */
+		/** @info.media_name: media IP name */
 		const char *media_name;
-		/** @tile_mmio_ext_size: size of MMIO extension space, per-tile */
+		/** @info.tile_mmio_ext_size: size of MMIO extension space, per-tile */
 		u32 tile_mmio_ext_size;
-		/** @graphics_verx100: graphics IP version */
+		/** @info.graphics_verx100: graphics IP version */
 		u32 graphics_verx100;
-		/** @media_verx100: media IP version */
+		/** @info.media_verx100: media IP version */
 		u32 media_verx100;
-		/** @mem_region_mask: mask of valid memory regions */
+		/** @info.mem_region_mask: mask of valid memory regions */
 		u32 mem_region_mask;
-		/** @platform: XE platform enum */
+		/** @info.platform: XE platform enum */
 		enum xe_platform platform;
-		/** @subplatform: XE subplatform enum */
+		/** @info.subplatform: XE subplatform enum */
 		enum xe_subplatform subplatform;
-		/** @devid: device ID */
+		/** @info.devid: device ID */
 		u16 devid;
-		/** @revid: device revision */
+		/** @info.revid: device revision */
 		u8 revid;
-		/** @step: stepping information for each IP */
+		/** @info.step: stepping information for each IP */
 		struct xe_step_info step;
-		/** @dma_mask_size: DMA address bits */
+		/** @info.dma_mask_size: DMA address bits */
 		u8 dma_mask_size;
-		/** @vram_flags: Vram flags */
+		/** @info.vram_flags: Vram flags */
 		u8 vram_flags;
-		/** @tile_count: Number of tiles */
+		/** @info.tile_count: Number of tiles */
 		u8 tile_count;
-		/** @gt_count: Total number of GTs for entire device */
+		/** @info.gt_count: Total number of GTs for entire device */
 		u8 gt_count;
-		/** @vm_max_level: Max VM level */
+		/** @info.vm_max_level: Max VM level */
 		u8 vm_max_level;
-		/** @va_bits: Maximum bits of a virtual address */
+		/** @info.va_bits: Maximum bits of a virtual address */
 		u8 va_bits;
 
-		/** @is_dgfx: is discrete device */
+		/** @info.is_dgfx: is discrete device */
 		u8 is_dgfx:1;
-		/** @has_asid: Has address space ID */
+		/** @info.has_asid: Has address space ID */
 		u8 has_asid:1;
-		/** @force_execlist: Forced execlist submission */
+		/** @info.force_execlist: Forced execlist submission */
 		u8 force_execlist:1;
-		/** @has_flat_ccs: Whether flat CCS metadata is used */
+		/** @info.has_flat_ccs: Whether flat CCS metadata is used */
 		u8 has_flat_ccs:1;
-		/** @has_llc: Device has a shared CPU+GPU last level cache */
+		/** @info.has_llc: Device has a shared CPU+GPU last level cache */
 		u8 has_llc:1;
-		/** @has_mmio_ext: Device has extra MMIO address range */
+		/** @info.has_mmio_ext: Device has extra MMIO address range */
 		u8 has_mmio_ext:1;
-		/** @has_range_tlb_invalidation: Has range based TLB invalidations */
+		/** @info.has_range_tlb_invalidation: Has range based TLB invalidations */
 		u8 has_range_tlb_invalidation:1;
-		/** @has_sriov: Supports SR-IOV */
+		/** @info.has_sriov: Supports SR-IOV */
 		u8 has_sriov:1;
-		/** @has_usm: Device has unified shared memory support */
+		/** @info.has_usm: Device has unified shared memory support */
 		u8 has_usm:1;
-		/** @enable_display: display enabled */
+		/** @info.enable_display: display enabled */
 		u8 enable_display:1;
-		/** @skip_mtcfg: skip Multi-Tile configuration from MTCFG register */
+		/** @info.skip_mtcfg: skip Multi-Tile configuration from MTCFG register */
 		u8 skip_mtcfg:1;
-		/** @skip_pcode: skip access to PCODE uC */
+		/** @info.skip_pcode: skip access to PCODE uC */
 		u8 skip_pcode:1;
-		/** @has_heci_gscfi: device has heci gscfi */
+		/** @info.has_heci_gscfi: device has heci gscfi */
 		u8 has_heci_gscfi:1;
-		/** @skip_guc_pc: Skip GuC based PM feature init */
+		/** @info.skip_guc_pc: Skip GuC based PM feature init */
 		u8 skip_guc_pc:1;
 
 #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
@@ -286,10 +291,10 @@ struct xe_device {
 
 	/** @irq: device interrupt state */
 	struct {
-		/** @lock: lock for processing irq's on this device */
+		/** @irq.lock: lock for processing irq's on this device */
 		spinlock_t lock;
 
-		/** @enabled: interrupts enabled on this device */
+		/** @irq.enabled: interrupts enabled on this device */
 		bool enabled;
 	} irq;
 
@@ -298,17 +303,17 @@ struct xe_device {
 
 	/** @mmio: mmio info for device */
 	struct {
-		/** @size: size of MMIO space for device */
+		/** @mmio.size: size of MMIO space for device */
 		size_t size;
-		/** @regs: pointer to MMIO space for device */
+		/** @mmio.regs: pointer to MMIO space for device */
 		void __iomem *regs;
 	} mmio;
 
 	/** @mem: memory info for device */
 	struct {
-		/** @vram: VRAM info for device */
+		/** @mem.vram: VRAM info for device */
 		struct xe_mem_region vram;
-		/** @sys_mgr: system TTM manager */
+		/** @mem.sys_mgr: system TTM manager */
 		struct ttm_resource_manager sys_mgr;
 	} mem;
 
@@ -316,46 +321,55 @@ struct xe_device {
 	struct {
 		/** @sriov.__mode: SR-IOV mode (Don't access directly!) */
 		enum xe_sriov_mode __mode;
+
+		/** @sriov.pf: PF specific data */
+		struct xe_device_pf pf;
+
+		/** @sriov.wq: workqueue used by the virtualization workers */
+		struct workqueue_struct *wq;
 	} sriov;
 
 	/** @clients: drm clients info */
 	struct {
-		/** @lock: Protects drm clients info */
+		/** @clients.lock: Protects drm clients info */
 		spinlock_t lock;
 
-		/** @count: number of drm clients */
+		/** @clients.count: number of drm clients */
 		u64 count;
 	} clients;
 
 	/** @usm: unified memory state */
 	struct {
-		/** @asid: convert a ASID to VM */
+		/** @usm.asid: convert a ASID to VM */
 		struct xarray asid_to_vm;
-		/** @next_asid: next ASID, used to cyclical alloc asids */
+		/** @usm.next_asid: next ASID, used to cyclical alloc asids */
 		u32 next_asid;
-		/** @num_vm_in_fault_mode: number of VM in fault mode */
+		/** @usm.num_vm_in_fault_mode: number of VM in fault mode */
 		u32 num_vm_in_fault_mode;
-		/** @num_vm_in_non_fault_mode: number of VM in non-fault mode */
+		/** @usm.num_vm_in_non_fault_mode: number of VM in non-fault mode */
 		u32 num_vm_in_non_fault_mode;
-		/** @lock: protects UM state */
+		/** @usm.lock: protects UM state */
 		struct mutex lock;
 	} usm;
 
 	/** @pinned: pinned BO state */
 	struct {
-		/** @lock: protected pinned BO list state */
+		/** @pinned.lock: protected pinned BO list state */
 		spinlock_t lock;
-		/** @evicted: pinned kernel BO that are present */
+		/** @pinned.kernel_bo_present: pinned kernel BO that are present */
 		struct list_head kernel_bo_present;
-		/** @evicted: pinned BO that have been evicted */
+		/** @pinned.evicted: pinned BO that have been evicted */
 		struct list_head evicted;
-		/** @external_vram: pinned external BO in vram*/
+		/** @pinned.external_vram: pinned external BO in vram*/
 		struct list_head external_vram;
 	} pinned;
 
 	/** @ufence_wq: user fence wait queue */
 	wait_queue_head_t ufence_wq;
 
+	/** @preempt_fence_wq: used to serialize preempt fences */
+	struct workqueue_struct *preempt_fence_wq;
+
 	/** @ordered_wq: used to serialize compute mode resume */
 	struct workqueue_struct *ordered_wq;
 
@@ -370,36 +384,54 @@ struct xe_device {
 	 * triggering additional actions when they occur.
 	 */
 	struct {
-		/** @ref: ref count of memory accesses */
-		atomic_t ref;
+		/**
+		 * @mem_access.vram_userfault: Encapsulate vram_userfault
+		 * related stuff
+		 */
+		struct {
+			/**
+			 * @mem_access.vram_userfault.lock: Protects access to
+			 * @vram_usefault.list Using mutex instead of spinlock
+			 * as lock is applied to entire list operation which
+			 * may sleep
+			 */
+			struct mutex lock;
+
+			/**
+			 * @mem_access.vram_userfault.list: Keep list of userfaulted
+			 * vram bo, which require to release their mmap mappings
+			 * at runtime suspend path
+			 */
+			struct list_head list;
+		} vram_userfault;
 	} mem_access;
 
 	/**
 	 * @pat: Encapsulate PAT related stuff
 	 */
 	struct {
-		/** Internal operations to abstract platforms */
+		/** @pat.ops: Internal operations to abstract platforms */
 		const struct xe_pat_ops *ops;
-		/** PAT table to program in the HW */
+		/** @pat.table: PAT table to program in the HW */
 		const struct xe_pat_table_entry *table;
-		/** Number of PAT entries */
+		/** @pat.n_entries: Number of PAT entries */
 		int n_entries;
 		u32 idx[__XE_CACHE_LEVEL_COUNT];
 	} pat;
 
 	/** @d3cold: Encapsulate d3cold related stuff */
 	struct {
-		/** capable: Indicates if root port is d3cold capable */
+		/** @d3cold.capable: Indicates if root port is d3cold capable */
 		bool capable;
 
-		/** @allowed: Indicates if d3cold is a valid device state */
+		/** @d3cold.allowed: Indicates if d3cold is a valid device state */
 		bool allowed;
 
-		/** @power_lost: Indicates if card has really lost power. */
+		/** @d3cold.power_lost: Indicates if card has really lost power. */
 		bool power_lost;
 
 		/**
-		 * @vram_threshold:
+		 * @d3cold.vram_threshold:
 		 *
 		 * This represents the permissible threshold(in megabytes)
 		 * for vram save/restore. d3cold will be disallowed,
@@ -408,7 +440,7 @@ struct xe_device {
 		 * Default threshold value is 300mb.
 		 */
 		u32 vram_threshold;
-		/** @lock: protect vram_threshold */
+		/** @d3cold.lock: protect vram_threshold */
 		struct mutex lock;
 	} d3cold;
 
@@ -469,20 +501,9 @@ struct xe_device {
 	/* For pcode */
 	struct mutex sb_lock;
 
-	/* Should be in struct intel_display */
-	u32 skl_preferred_vco_freq, max_dotclk_freq, hti_state;
-	u8 snps_phy_failed_calibration;
-	struct drm_atomic_state *modeset_restore_state;
-	struct list_head global_obj_list;
-
-	union {
-		/* only to allow build, not used functionally */
-		u32 irq_mask;
-		u32 de_irq_mask[I915_MAX_PIPES];
-	};
-	u32 pipestat_irq_mask[I915_MAX_PIPES];
+	/* only to allow build, not used functionally */
+	u32 irq_mask;
 
-	bool display_irqs_enabled;
 	u32 enabled_irq_mask;
 
 	struct intel_uncore {
@@ -494,11 +515,7 @@ struct xe_device {
 		unsigned int hpll_freq;
 		unsigned int czclk_freq;
 		unsigned int fsb_freq, mem_freq, is_ddr3;
-		u8 vblank_enabled;
 	};
-	struct {
-		const char *dmc_firmware_path;
-	} params;
 
 	void *pxp;
 #endif
@@ -516,17 +533,17 @@ struct xe_file {
 
 	/** @vm: VM state for file */
 	struct {
-		/** @xe: xarray to store VMs */
+		/** @vm.xe: xarray to store VMs */
 		struct xarray xa;
-		/** @lock: protects file VM state */
+		/** @vm.lock: protects file VM state */
 		struct mutex lock;
 	} vm;
 
 	/** @exec_queue: Submission exec queue state for file */
 	struct {
-		/** @xe: xarray to store engines */
+		/** @exec_queue.xe: xarray to store engines */
 		struct xarray xa;
-		/** @lock: protects file engine state */
+		/** @exec_queue.lock: protects file engine state */
 		struct mutex lock;
 	} exec_queue;
 
diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c
index da2627ed6a..68f309f5e9 100644
--- a/drivers/gpu/drm/xe/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/xe_dma_buf.c
@@ -16,6 +16,7 @@
 #include "tests/xe_test.h"
 #include "xe_bo.h"
 #include "xe_device.h"
+#include "xe_pm.h"
 #include "xe_ttm_vram_mgr.h"
 #include "xe_vm.h"
 
@@ -33,7 +34,7 @@ static int xe_dma_buf_attach(struct dma_buf *dmabuf,
 	if (!attach->peer2peer && !xe_bo_can_migrate(gem_to_xe_bo(obj), XE_PL_TT))
 		return -EOPNOTSUPP;
 
-	xe_device_mem_access_get(to_xe_device(obj->dev));
+	xe_pm_runtime_get(to_xe_device(obj->dev));
 	return 0;
 }
 
@@ -42,7 +43,7 @@ static void xe_dma_buf_detach(struct dma_buf *dmabuf,
 {
 	struct drm_gem_object *obj = attach->dmabuf->priv;
 
-	xe_device_mem_access_put(to_xe_device(obj->dev));
+	xe_pm_runtime_put(to_xe_device(obj->dev));
 }
 
 static int xe_dma_buf_pin(struct dma_buf_attachment *attach)
@@ -216,7 +217,7 @@ xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage,
 	dma_resv_lock(resv, NULL);
 	bo = ___xe_bo_create_locked(xe, storage, NULL, resv, NULL, dma_buf->size,
 				    0, /* Will require 1way or 2way for vm_bind */
-				    ttm_bo_type_sg, XE_BO_CREATE_SYSTEM_BIT);
+				    ttm_bo_type_sg, XE_BO_FLAG_SYSTEM);
 	if (IS_ERR(bo)) {
 		ret = PTR_ERR(bo);
 		goto error;
diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c
index 6040e4d22b..08f0b7c959 100644
--- a/drivers/gpu/drm/xe/xe_drm_client.c
+++ b/drivers/gpu/drm/xe/xe_drm_client.c
@@ -78,7 +78,7 @@ void xe_drm_client_add_bo(struct xe_drm_client *client,
 
 	spin_lock(&client->bos_lock);
 	bo->client = xe_drm_client_get(client);
-	list_add_tail_rcu(&bo->client_link, &client->bos_list);
+	list_add_tail(&bo->client_link, &client->bos_list);
 	spin_unlock(&client->bos_lock);
 }
 
@@ -96,7 +96,7 @@ void xe_drm_client_remove_bo(struct xe_bo *bo)
 	struct xe_drm_client *client = bo->client;
 
 	spin_lock(&client->bos_lock);
-	list_del_rcu(&bo->client_link);
+	list_del(&bo->client_link);
 	spin_unlock(&client->bos_lock);
 
 	xe_drm_client_put(client);
@@ -113,7 +113,7 @@ static void bo_meminfo(struct xe_bo *bo,
 	else
 		mem_type = XE_PL_TT;
 
-	if (bo->ttm.base.handle_count > 1)
+	if (drm_gem_object_is_shared_for_memory_stats(&bo->ttm.base))
 		stats[mem_type].shared += sz;
 	else
 		stats[mem_type].private += sz;
@@ -154,8 +154,8 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file)
 
 	/* Internal objects. */
 	spin_lock(&client->bos_lock);
-	list_for_each_entry_rcu(bo, &client->bos_list, client_link) {
-		if (!bo || !kref_get_unless_zero(&bo->ttm.base.refcount))
+	list_for_each_entry(bo, &client->bos_list, client_link) {
+		if (!kref_get_unless_zero(&bo->ttm.base.refcount))
 			continue;
 		bo_meminfo(bo, stats);
 		xe_bo_put(bo);
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index 2cd4ad2fcf..074344c739 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -94,9 +94,16 @@
  *	Unlock all
  */
 
+/*
+ * Add validation and rebinding to the drm_exec locking loop, since both can
+ * trigger eviction which may require sleeping dma_resv locks.
+ */
 static int xe_exec_fn(struct drm_gpuvm_exec *vm_exec)
 {
-	return drm_gpuvm_validate(vm_exec->vm, &vm_exec->exec);
+	struct xe_vm *vm = container_of(vm_exec->vm, struct xe_vm, gpuvm);
+
+	/* The fence slot added here is intended for the exec sched job. */
+	return xe_vm_validate_rebind(vm, &vm_exec->exec, 1);
 }
 
 int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
@@ -111,7 +118,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	u64 addresses[XE_HW_ENGINE_MAX_INSTANCE];
 	struct drm_gpuvm_exec vm_exec = {.extra.fn = xe_exec_fn};
 	struct drm_exec *exec = &vm_exec.exec;
-	u32 i, num_syncs = 0, num_ufence = 0;
+	u32 i, num_syncs, num_ufence = 0;
 	struct xe_sched_job *job;
 	struct xe_vm *vm;
 	bool write_locked, skip_retry = false;
@@ -149,15 +156,15 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 
 	vm = q->vm;
 
-	for (i = 0; i < args->num_syncs; i++) {
-		err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs++],
-					  &syncs_user[i], SYNC_PARSE_FLAG_EXEC |
+	for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
+		err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
+					  &syncs_user[num_syncs], SYNC_PARSE_FLAG_EXEC |
 					  (xe_vm_in_lr_mode(vm) ?
 					   SYNC_PARSE_FLAG_LR_MODE : 0));
 		if (err)
 			goto err_syncs;
 
-		if (xe_sync_is_ufence(&syncs[i]))
+		if (xe_sync_is_ufence(&syncs[num_syncs]))
 			num_ufence++;
 	}
 
@@ -209,7 +216,7 @@ retry:
 				goto err_unlock_list;
 			}
 			for (i = 0; i < num_syncs; i++)
-				xe_sync_entry_signal(&syncs[i], NULL, fence);
+				xe_sync_entry_signal(&syncs[i], fence);
 			xe_exec_queue_last_fence_set(q, vm, fence);
 			dma_fence_put(fence);
 		}
@@ -219,7 +226,6 @@ retry:
 	}
 
 	vm_exec.vm = &vm->gpuvm;
-	vm_exec.num_fences = 1 + vm->xe->info.tile_count;
 	vm_exec.flags = DRM_EXEC_INTERRUPTIBLE_WAIT;
 	if (xe_vm_in_lr_mode(vm)) {
 		drm_exec_init(exec, vm_exec.flags, 0);
@@ -251,14 +257,6 @@ retry:
 		goto err_exec;
 	}
 
-	/*
-	 * Rebind any invalidated userptr or evicted BOs in the VM, non-compute
-	 * VM mode only.
-	 */
-	err = xe_vm_rebind(vm, false);
-	if (err)
-		goto err_put_job;
-
 	/* Wait behind rebinds */
 	if (!xe_vm_in_lr_mode(vm)) {
 		err = drm_sched_job_add_resv_dependencies(&job->drm,
@@ -296,9 +294,10 @@ retry:
 		drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, &job->drm.s_fence->finished,
 					 DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_WRITE);
 
-	for (i = 0; i < num_syncs; i++)
-		xe_sync_entry_signal(&syncs[i], job,
-				     &job->drm.s_fence->finished);
+	for (i = 0; i < num_syncs; i++) {
+		xe_sync_entry_signal(&syncs[i], &job->drm.s_fence->finished);
+		xe_sched_job_init_user_fence(job, &syncs[i]);
+	}
 
 	if (xe_exec_queue_is_lr(q))
 		q->ring_ops->emit_job(job);
@@ -322,15 +321,12 @@ err_put_job:
 err_exec:
 	drm_exec_fini(exec);
 err_unlock_list:
-	if (write_locked)
-		up_write(&vm->lock);
-	else
-		up_read(&vm->lock);
+	up_read(&vm->lock);
 	if (err == -EAGAIN && !skip_retry)
 		goto retry;
 err_syncs:
-	for (i = 0; i < num_syncs; i++)
-		xe_sync_entry_cleanup(&syncs[i]);
+	while (num_syncs--)
+		xe_sync_entry_cleanup(&syncs[num_syncs]);
 	kfree(syncs);
 err_exec_queue:
 	xe_exec_queue_put(q);
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 5093c56d60..395de93579 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -30,21 +30,30 @@ enum xe_exec_queue_sched_prop {
 	XE_EXEC_QUEUE_SCHED_PROP_MAX = 3,
 };
 
-static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe,
-						    struct xe_vm *vm,
-						    u32 logical_mask,
-						    u16 width, struct xe_hw_engine *hwe,
-						    u32 flags)
+static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q,
+				      u64 extensions, int ext_number);
+
+static void __xe_exec_queue_free(struct xe_exec_queue *q)
+{
+	if (q->vm)
+		xe_vm_put(q->vm);
+	kfree(q);
+}
+
+static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
+						   struct xe_vm *vm,
+						   u32 logical_mask,
+						   u16 width, struct xe_hw_engine *hwe,
+						   u32 flags, u64 extensions)
 {
 	struct xe_exec_queue *q;
 	struct xe_gt *gt = hwe->gt;
 	int err;
-	int i;
 
 	/* only kernel queues can be permanent */
 	XE_WARN_ON((flags & EXEC_QUEUE_FLAG_PERMANENT) && !(flags & EXEC_QUEUE_FLAG_KERNEL));
 
-	q = kzalloc(sizeof(*q) + sizeof(struct xe_lrc) * width, GFP_KERNEL);
+	q = kzalloc(struct_size(q, lrc, width), GFP_KERNEL);
 	if (!q)
 		return ERR_PTR(-ENOMEM);
 
@@ -52,8 +61,6 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe,
 	q->flags = flags;
 	q->hwe = hwe;
 	q->gt = gt;
-	if (vm)
-		q->vm = xe_vm_get(vm);
 	q->class = hwe->class;
 	q->width = width;
 	q->logical_mask = logical_mask;
@@ -74,17 +81,36 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe,
 	else
 		q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_NORMAL;
 
+	if (vm)
+		q->vm = xe_vm_get(vm);
+
+	if (extensions) {
+		/*
+		 * may set q->usm, must come before xe_lrc_init(),
+		 * may overwrite q->sched_props, must come before q->ops->init()
+		 */
+		err = exec_queue_user_extensions(xe, q, extensions, 0);
+		if (err) {
+			__xe_exec_queue_free(q);
+			return ERR_PTR(err);
+		}
+	}
+
 	if (xe_exec_queue_is_parallel(q)) {
 		q->parallel.composite_fence_ctx = dma_fence_context_alloc(1);
 		q->parallel.composite_fence_seqno = XE_FENCE_INITIAL_SEQNO;
 	}
-	if (q->flags & EXEC_QUEUE_FLAG_VM) {
-		q->bind.fence_ctx = dma_fence_context_alloc(1);
-		q->bind.fence_seqno = XE_FENCE_INITIAL_SEQNO;
-	}
 
-	for (i = 0; i < width; ++i) {
-		err = xe_lrc_init(q->lrc + i, hwe, q, vm, SZ_16K);
+	return q;
+}
+
+static int __xe_exec_queue_init(struct xe_exec_queue *q)
+{
+	struct xe_device *xe = gt_to_xe(q->gt);
+	int i, err;
+
+	for (i = 0; i < q->width; ++i) {
+		err = xe_lrc_init(q->lrc + i, q->hwe, q, q->vm, SZ_16K);
 		if (err)
 			goto err_lrc;
 	}
@@ -101,35 +127,47 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe,
 	 * can perform GuC CT actions when needed. Caller is expected to have
 	 * already grabbed the rpm ref outside any sensitive locks.
 	 */
-	if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM || !vm))
-		drm_WARN_ON(&xe->drm, !xe_device_mem_access_get_if_ongoing(xe));
+	if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM || !q->vm))
+		xe_pm_runtime_get_noresume(xe);
 
-	return q;
+	return 0;
 
 err_lrc:
 	for (i = i - 1; i >= 0; --i)
 		xe_lrc_finish(q->lrc + i);
-	kfree(q);
-	return ERR_PTR(err);
+	return err;
 }
 
 struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm,
 					   u32 logical_mask, u16 width,
-					   struct xe_hw_engine *hwe, u32 flags)
+					   struct xe_hw_engine *hwe, u32 flags,
+					   u64 extensions)
 {
 	struct xe_exec_queue *q;
 	int err;
 
+	q = __xe_exec_queue_alloc(xe, vm, logical_mask, width, hwe, flags,
+				  extensions);
+	if (IS_ERR(q))
+		return q;
+
 	if (vm) {
 		err = xe_vm_lock(vm, true);
 		if (err)
-			return ERR_PTR(err);
+			goto err_post_alloc;
 	}
-	q = __xe_exec_queue_create(xe, vm, logical_mask, width, hwe, flags);
+
+	err = __xe_exec_queue_init(q);
 	if (vm)
 		xe_vm_unlock(vm);
+	if (err)
+		goto err_post_alloc;
 
 	return q;
+
+err_post_alloc:
+	__xe_exec_queue_free(q);
+	return ERR_PTR(err);
 }
 
 struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt,
@@ -154,7 +192,7 @@ struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe
 	if (!logical_mask)
 		return ERR_PTR(-ENODEV);
 
-	return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags);
+	return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags, 0);
 }
 
 void xe_exec_queue_destroy(struct kref *ref)
@@ -179,33 +217,30 @@ void xe_exec_queue_fini(struct xe_exec_queue *q)
 	for (i = 0; i < q->width; ++i)
 		xe_lrc_finish(q->lrc + i);
 	if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM || !q->vm))
-		xe_device_mem_access_put(gt_to_xe(q->gt));
-	if (q->vm)
-		xe_vm_put(q->vm);
-
-	kfree(q);
+		xe_pm_runtime_put(gt_to_xe(q->gt));
+	__xe_exec_queue_free(q);
 }
 
 void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance)
 {
 	switch (q->class) {
 	case XE_ENGINE_CLASS_RENDER:
-		sprintf(q->name, "rcs%d", instance);
+		snprintf(q->name, sizeof(q->name), "rcs%d", instance);
 		break;
 	case XE_ENGINE_CLASS_VIDEO_DECODE:
-		sprintf(q->name, "vcs%d", instance);
+		snprintf(q->name, sizeof(q->name), "vcs%d", instance);
 		break;
 	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
-		sprintf(q->name, "vecs%d", instance);
+		snprintf(q->name, sizeof(q->name), "vecs%d", instance);
 		break;
 	case XE_ENGINE_CLASS_COPY:
-		sprintf(q->name, "bcs%d", instance);
+		snprintf(q->name, sizeof(q->name), "bcs%d", instance);
 		break;
 	case XE_ENGINE_CLASS_COMPUTE:
-		sprintf(q->name, "ccs%d", instance);
+		snprintf(q->name, sizeof(q->name), "ccs%d", instance);
 		break;
 	case XE_ENGINE_CLASS_OTHER:
-		sprintf(q->name, "gsccs%d", instance);
+		snprintf(q->name, sizeof(q->name), "gsccs%d", instance);
 		break;
 	default:
 		XE_WARN_ON(q->class);
@@ -233,7 +268,7 @@ xe_exec_queue_device_get_max_priority(struct xe_device *xe)
 }
 
 static int exec_queue_set_priority(struct xe_device *xe, struct xe_exec_queue *q,
-				   u64 value, bool create)
+				   u64 value)
 {
 	if (XE_IOCTL_DBG(xe, value > XE_EXEC_QUEUE_PRIORITY_HIGH))
 		return -EINVAL;
@@ -241,7 +276,8 @@ static int exec_queue_set_priority(struct xe_device *xe, struct xe_exec_queue *q
 	if (XE_IOCTL_DBG(xe, value > xe_exec_queue_device_get_max_priority(xe)))
 		return -EPERM;
 
-	return q->ops->set_priority(q, value);
+	q->sched_props.priority = value;
+	return 0;
 }
 
 static bool xe_exec_queue_enforce_schedule_limit(void)
@@ -297,7 +333,7 @@ xe_exec_queue_get_prop_minmax(struct xe_hw_engine_class_intf *eclass,
 }
 
 static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue *q,
-				    u64 value, bool create)
+				    u64 value)
 {
 	u32 min = 0, max = 0;
 
@@ -308,12 +344,13 @@ static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue *
 	    !xe_hw_engine_timeout_in_range(value, min, max))
 		return -EINVAL;
 
-	return q->ops->set_timeslice(q, value);
+	q->sched_props.timeslice_us = value;
+	return 0;
 }
 
 typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe,
 					     struct xe_exec_queue *q,
-					     u64 value, bool create);
+					     u64 value);
 
 static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = {
 	[DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority,
@@ -322,8 +359,7 @@ static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = {
 
 static int exec_queue_user_ext_set_property(struct xe_device *xe,
 					    struct xe_exec_queue *q,
-					    u64 extension,
-					    bool create)
+					    u64 extension)
 {
 	u64 __user *address = u64_to_user_ptr(extension);
 	struct drm_xe_ext_set_property ext;
@@ -345,21 +381,20 @@ static int exec_queue_user_ext_set_property(struct xe_device *xe,
 	if (!exec_queue_set_property_funcs[idx])
 		return -EINVAL;
 
-	return exec_queue_set_property_funcs[idx](xe, q, ext.value,  create);
+	return exec_queue_set_property_funcs[idx](xe, q, ext.value);
 }
 
 typedef int (*xe_exec_queue_user_extension_fn)(struct xe_device *xe,
 					       struct xe_exec_queue *q,
-					       u64 extension,
-					       bool create);
+					       u64 extension);
 
-static const xe_exec_queue_set_property_fn exec_queue_user_extension_funcs[] = {
+static const xe_exec_queue_user_extension_fn exec_queue_user_extension_funcs[] = {
 	[DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY] = exec_queue_user_ext_set_property,
 };
 
 #define MAX_USER_EXTENSIONS	16
 static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q,
-				      u64 extensions, int ext_number, bool create)
+				      u64 extensions, int ext_number)
 {
 	u64 __user *address = u64_to_user_ptr(extensions);
 	struct drm_xe_user_extension ext;
@@ -380,13 +415,13 @@ static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue
 
 	idx = array_index_nospec(ext.name,
 				 ARRAY_SIZE(exec_queue_user_extension_funcs));
-	err = exec_queue_user_extension_funcs[idx](xe, q, extensions, create);
+	err = exec_queue_user_extension_funcs[idx](xe, q, extensions);
 	if (XE_IOCTL_DBG(xe, err))
 		return err;
 
 	if (ext.next_extension)
 		return exec_queue_user_extensions(xe, q, ext.next_extension,
-					      ++ext_number, create);
+						  ++ext_number);
 
 	return 0;
 }
@@ -537,6 +572,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
 	if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) {
 		for_each_gt(gt, xe, id) {
 			struct xe_exec_queue *new;
+			u32 flags;
 
 			if (xe_gt_is_media_type(gt))
 				continue;
@@ -553,18 +589,16 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
 				return -EINVAL;
 
 			/* The migration vm doesn't hold rpm ref */
-			xe_device_mem_access_get(xe);
+			xe_pm_runtime_get_noresume(xe);
+
+			flags = EXEC_QUEUE_FLAG_VM | (id ? EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD : 0);
 
 			migrate_vm = xe_migrate_get_vm(gt_to_tile(gt)->migrate);
 			new = xe_exec_queue_create(xe, migrate_vm, logical_mask,
-						   args->width, hwe,
-						   EXEC_QUEUE_FLAG_PERSISTENT |
-						   EXEC_QUEUE_FLAG_VM |
-						   (id ?
-						    EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD :
-						    0));
+						   args->width, hwe, flags,
+						   args->extensions);
 
-			xe_device_mem_access_put(xe); /* now held by engine */
+			xe_pm_runtime_put(xe); /* now held by engine */
 
 			xe_vm_put(migrate_vm);
 			if (IS_ERR(new)) {
@@ -608,7 +642,8 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
 		}
 
 		q = xe_exec_queue_create(xe, vm, logical_mask,
-					 args->width, hwe, 0);
+					 args->width, hwe, 0,
+					 args->extensions);
 		up_read(&vm->lock);
 		xe_vm_put(vm);
 		if (IS_ERR(q))
@@ -624,12 +659,6 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
 		}
 	}
 
-	if (args->extensions) {
-		err = exec_queue_user_extensions(xe, q, args->extensions, 0, true);
-		if (XE_IOCTL_DBG(xe, err))
-			goto kill_exec_queue;
-	}
-
 	mutex_lock(&xef->exec_queue.lock);
 	err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL);
 	mutex_unlock(&xef->exec_queue.lock);
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h
index d959cc4a1a..02ce8d2046 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue.h
@@ -16,7 +16,8 @@ struct xe_file;
 
 struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm,
 					   u32 logical_mask, u16 width,
-					   struct xe_hw_engine *hw_engine, u32 flags);
+					   struct xe_hw_engine *hw_engine, u32 flags,
+					   u64 extensions);
 struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt,
 						 struct xe_vm *vm,
 						 enum xe_engine_class class, u32 flags);
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index 22e1dffd06..ee78d497d8 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -76,14 +76,12 @@ struct xe_exec_queue {
 #define EXEC_QUEUE_FLAG_KERNEL			BIT(1)
 /* kernel engine only destroyed at driver unload */
 #define EXEC_QUEUE_FLAG_PERMANENT		BIT(2)
-/* queue keeps running pending jobs after destroy ioctl */
-#define EXEC_QUEUE_FLAG_PERSISTENT		BIT(3)
 /* for VM jobs. Caller needs to hold rpm ref when creating queue with this flag */
-#define EXEC_QUEUE_FLAG_VM			BIT(4)
+#define EXEC_QUEUE_FLAG_VM			BIT(3)
 /* child of VM queue for multi-tile VM jobs */
-#define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD	BIT(5)
+#define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD	BIT(4)
 /* kernel exec_queue only, set priority to highest level */
-#define EXEC_QUEUE_FLAG_HIGH_PRIORITY		BIT(6)
+#define EXEC_QUEUE_FLAG_HIGH_PRIORITY		BIT(5)
 
 	/**
 	 * @flags: flags for this exec queue, should statically setup aside from ban
@@ -105,50 +103,39 @@ struct xe_exec_queue {
 		struct xe_guc_exec_queue *guc;
 	};
 
-	union {
-		/**
-		 * @parallel: parallel submission state
-		 */
-		struct {
-			/** @composite_fence_ctx: context composite fence */
-			u64 composite_fence_ctx;
-			/** @composite_fence_seqno: seqno for composite fence */
-			u32 composite_fence_seqno;
-		} parallel;
-		/**
-		 * @bind: bind submission state
-		 */
-		struct {
-			/** @fence_ctx: context bind fence */
-			u64 fence_ctx;
-			/** @fence_seqno: seqno for bind fence */
-			u32 fence_seqno;
-		} bind;
-	};
+	/**
+	 * @parallel: parallel submission state
+	 */
+	struct {
+		/** @parallel.composite_fence_ctx: context composite fence */
+		u64 composite_fence_ctx;
+		/** @parallel.composite_fence_seqno: seqno for composite fence */
+		u32 composite_fence_seqno;
+	} parallel;
 
 	/** @sched_props: scheduling properties */
 	struct {
-		/** @timeslice_us: timeslice period in micro-seconds */
+		/** @sched_props.timeslice_us: timeslice period in micro-seconds */
 		u32 timeslice_us;
-		/** @preempt_timeout_us: preemption timeout in micro-seconds */
+		/** @sched_props.preempt_timeout_us: preemption timeout in micro-seconds */
 		u32 preempt_timeout_us;
-		/** @job_timeout_ms: job timeout in milliseconds */
+		/** @sched_props.job_timeout_ms: job timeout in milliseconds */
 		u32 job_timeout_ms;
-		/** @priority: priority of this exec queue */
+		/** @sched_props.priority: priority of this exec queue */
 		enum xe_exec_queue_priority priority;
 	} sched_props;
 
 	/** @compute: compute exec queue state */
 	struct {
-		/** @pfence: preemption fence */
+		/** @compute.pfence: preemption fence */
 		struct dma_fence *pfence;
-		/** @context: preemption fence context */
+		/** @compute.context: preemption fence context */
 		u64 context;
-		/** @seqno: preemption fence seqno */
+		/** @compute.seqno: preemption fence seqno */
 		u32 seqno;
-		/** @link: link into VM's list of exec queues */
+		/** @compute.link: link into VM's list of exec queues */
 		struct list_head link;
-		/** @lock: preemption fences lock */
+		/** @compute.lock: preemption fences lock */
 		spinlock_t lock;
 	} compute;
 
@@ -185,8 +172,6 @@ struct xe_exec_queue_ops {
 	int (*set_timeslice)(struct xe_exec_queue *q, u32 timeslice_us);
 	/** @set_preempt_timeout: Set preemption timeout for exec queue */
 	int (*set_preempt_timeout)(struct xe_exec_queue *q, u32 preempt_timeout_us);
-	/** @set_job_timeout: Set job timeout for exec queue */
-	int (*set_job_timeout)(struct xe_exec_queue *q, u32 job_timeout_ms);
 	/**
 	 * @suspend: Suspend exec queue from executing, allowed to be called
 	 * multiple times in a row before resume with the caveat that
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index acb4d9f38f..dece278593 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -416,13 +416,6 @@ static int execlist_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
 	return 0;
 }
 
-static int execlist_exec_queue_set_job_timeout(struct xe_exec_queue *q,
-					       u32 job_timeout_ms)
-{
-	/* NIY */
-	return 0;
-}
-
 static int execlist_exec_queue_suspend(struct xe_exec_queue *q)
 {
 	/* NIY */
@@ -453,7 +446,6 @@ static const struct xe_exec_queue_ops execlist_exec_queue_ops = {
 	.set_priority = execlist_exec_queue_set_priority,
 	.set_timeslice = execlist_exec_queue_set_timeslice,
 	.set_preempt_timeout = execlist_exec_queue_set_preempt_timeout,
-	.set_job_timeout = execlist_exec_queue_set_job_timeout,
 	.suspend = execlist_exec_queue_suspend,
 	.suspend_wait = execlist_exec_queue_suspend_wait,
 	.resume = execlist_exec_queue_resume,
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 3efd2d066b..0d541f55b4 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -5,26 +5,26 @@
 
 #include "xe_ggtt.h"
 
+#include <linux/io-64-nonatomic-lo-hi.h>
 #include <linux/sizes.h>
 
 #include <drm/drm_managed.h>
 #include <drm/i915_drm.h>
 
 #include "regs/xe_gt_regs.h"
+#include "regs/xe_gtt_defs.h"
+#include "regs/xe_regs.h"
+#include "xe_assert.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_gt.h"
+#include "xe_gt_printk.h"
 #include "xe_gt_tlb_invalidation.h"
 #include "xe_map.h"
-#include "xe_mmio.h"
+#include "xe_pm.h"
+#include "xe_sriov.h"
 #include "xe_wopcm.h"
 
-#define XELPG_GGTT_PTE_PAT0	BIT_ULL(52)
-#define XELPG_GGTT_PTE_PAT1	BIT_ULL(53)
-
-/* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */
-#define GUC_GGTT_TOP	0xFEE00000
-
 static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
 				   u16 pat_index)
 {
@@ -141,7 +141,11 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt)
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
 	unsigned int gsm_size;
 
-	gsm_size = probe_gsm_size(pdev);
+	if (IS_SRIOV_VF(xe))
+		gsm_size = SZ_8M; /* GGTT is expected to be 4GiB */
+	else
+		gsm_size = probe_gsm_size(pdev);
+
 	if (gsm_size == 0) {
 		drm_err(&xe->drm, "Hardware reported no preallocated GSM\n");
 		return -ENOMEM;
@@ -192,20 +196,20 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt)
 	return drmm_add_action_or_reset(&xe->drm, ggtt_fini_early, ggtt);
 }
 
+static void xe_ggtt_invalidate(struct xe_ggtt *ggtt);
+
 static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt)
 {
 	struct drm_mm_node *hole;
 	u64 start, end;
 
 	/* Display may have allocated inside ggtt, so be careful with clearing here */
-	xe_device_mem_access_get(tile_to_xe(ggtt->tile));
 	mutex_lock(&ggtt->lock);
 	drm_mm_for_each_hole(hole, &ggtt->mm, start, end)
 		xe_ggtt_clear(ggtt, start, end - start);
 
 	xe_ggtt_invalidate(ggtt);
 	mutex_unlock(&ggtt->lock);
-	xe_device_mem_access_put(tile_to_xe(ggtt->tile));
 }
 
 int xe_ggtt_init(struct xe_ggtt *ggtt)
@@ -219,11 +223,11 @@ int xe_ggtt_init(struct xe_ggtt *ggtt)
 	 * scratch entires, rather keep the scratch page in system memory on
 	 * platforms where 64K pages are needed for VRAM.
 	 */
-	flags = XE_BO_CREATE_PINNED_BIT;
+	flags = XE_BO_FLAG_PINNED;
 	if (ggtt->flags & XE_GGTT_FLAGS_64K)
-		flags |= XE_BO_CREATE_SYSTEM_BIT;
+		flags |= XE_BO_FLAG_SYSTEM;
 	else
-		flags |= XE_BO_CREATE_VRAM_IF_DGFX(ggtt->tile);
+		flags |= XE_BO_FLAG_VRAM_IF_DGFX(ggtt->tile);
 
 	ggtt->scratch = xe_managed_bo_create_pin_map(xe, ggtt->tile, XE_PAGE_SIZE, flags);
 	if (IS_ERR(ggtt->scratch)) {
@@ -241,51 +245,19 @@ err:
 	return err;
 }
 
-#define GUC_TLB_INV_CR				XE_REG(0xcee8)
-#define   GUC_TLB_INV_CR_INVALIDATE		REG_BIT(0)
-#define PVC_GUC_TLB_INV_DESC0			XE_REG(0xcf7c)
-#define   PVC_GUC_TLB_INV_DESC0_VALID		REG_BIT(0)
-#define PVC_GUC_TLB_INV_DESC1			XE_REG(0xcf80)
-#define   PVC_GUC_TLB_INV_DESC1_INVALIDATE	REG_BIT(6)
-
 static void ggtt_invalidate_gt_tlb(struct xe_gt *gt)
 {
+	int err;
+
 	if (!gt)
 		return;
 
-	/*
-	 * Invalidation can happen when there's no in-flight work keeping the
-	 * GT awake.  We need to explicitly grab forcewake to ensure the GT
-	 * and GuC are accessible.
-	 */
-	xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
-
-	/* TODO: vfunc for GuC vs. non-GuC */
-
-	if (gt->uc.guc.submission_state.enabled) {
-		int seqno;
-
-		seqno = xe_gt_tlb_invalidation_guc(gt);
-		xe_gt_assert(gt, seqno > 0);
-		if (seqno > 0)
-			xe_gt_tlb_invalidation_wait(gt, seqno);
-	} else if (xe_device_uc_enabled(gt_to_xe(gt))) {
-		struct xe_device *xe = gt_to_xe(gt);
-
-		if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) {
-			xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1,
-					PVC_GUC_TLB_INV_DESC1_INVALIDATE);
-			xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC0,
-					PVC_GUC_TLB_INV_DESC0_VALID);
-		} else
-			xe_mmio_write32(gt, GUC_TLB_INV_CR,
-					GUC_TLB_INV_CR_INVALIDATE);
-	}
-
-	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+	err = xe_gt_tlb_invalidation_ggtt(gt);
+	if (err)
+		drm_warn(&gt_to_xe(gt)->drm, "xe_gt_tlb_invalidation_ggtt error=%d", err);
 }
 
-void xe_ggtt_invalidate(struct xe_ggtt *ggtt)
+static void xe_ggtt_invalidate(struct xe_ggtt *ggtt)
 {
 	/* Each GT in a tile has its own TLB to cache GGTT lookups */
 	ggtt_invalidate_gt_tlb(ggtt->tile->primary_gt);
@@ -312,6 +284,74 @@ void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix)
 	}
 }
 
+static void xe_ggtt_dump_node(struct xe_ggtt *ggtt,
+			      const struct drm_mm_node *node, const char *description)
+{
+	char buf[10];
+
+	if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
+		string_get_size(node->size, 1, STRING_UNITS_2, buf, sizeof(buf));
+		xe_gt_dbg(ggtt->tile->primary_gt, "GGTT %#llx-%#llx (%s) %s\n",
+			  node->start, node->start + node->size, buf, description);
+	}
+}
+
+/**
+ * xe_ggtt_balloon - prevent allocation of specified GGTT addresses
+ * @ggtt: the &xe_ggtt where we want to make reservation
+ * @start: the starting GGTT address of the reserved region
+ * @end: then end GGTT address of the reserved region
+ * @node: the &drm_mm_node to hold reserved GGTT node
+ *
+ * Use xe_ggtt_deballoon() to release a reserved GGTT node.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_ggtt_balloon(struct xe_ggtt *ggtt, u64 start, u64 end, struct drm_mm_node *node)
+{
+	int err;
+
+	xe_tile_assert(ggtt->tile, start < end);
+	xe_tile_assert(ggtt->tile, IS_ALIGNED(start, XE_PAGE_SIZE));
+	xe_tile_assert(ggtt->tile, IS_ALIGNED(end, XE_PAGE_SIZE));
+	xe_tile_assert(ggtt->tile, !drm_mm_node_allocated(node));
+
+	node->color = 0;
+	node->start = start;
+	node->size = end - start;
+
+	mutex_lock(&ggtt->lock);
+	err = drm_mm_reserve_node(&ggtt->mm, node);
+	mutex_unlock(&ggtt->lock);
+
+	if (xe_gt_WARN(ggtt->tile->primary_gt, err,
+		       "Failed to balloon GGTT %#llx-%#llx (%pe)\n",
+		       node->start, node->start + node->size, ERR_PTR(err)))
+		return err;
+
+	xe_ggtt_dump_node(ggtt, node, "balloon");
+	return 0;
+}
+
+/**
+ * xe_ggtt_deballoon - release a reserved GGTT region
+ * @ggtt: the &xe_ggtt where reserved node belongs
+ * @node: the &drm_mm_node with reserved GGTT region
+ *
+ * See xe_ggtt_balloon() for details.
+ */
+void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct drm_mm_node *node)
+{
+	if (!drm_mm_node_allocated(node))
+		return;
+
+	xe_ggtt_dump_node(ggtt, node, "deballoon");
+
+	mutex_lock(&ggtt->lock);
+	drm_mm_remove_node(node);
+	mutex_unlock(&ggtt->lock);
+}
+
 int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, struct drm_mm_node *node,
 				       u32 size, u32 align, u32 mm_flags)
 {
@@ -334,7 +374,8 @@ int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
 
 void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
 {
-	u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB];
+	u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB;
+	u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode];
 	u64 start = bo->ggtt_node.start;
 	u64 offset, pte;
 
@@ -342,8 +383,6 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
 		pte = ggtt->pt_ops->pte_encode_bo(bo, offset, pat_index);
 		xe_ggtt_set_pte(ggtt, start + offset, pte);
 	}
-
-	xe_ggtt_invalidate(ggtt);
 }
 
 static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
@@ -365,14 +404,17 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
 	if (err)
 		return err;
 
-	xe_device_mem_access_get(tile_to_xe(ggtt->tile));
+	xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile));
 	mutex_lock(&ggtt->lock);
 	err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node, bo->size,
 					  alignment, 0, start, end, 0);
 	if (!err)
 		xe_ggtt_map_bo(ggtt, bo);
 	mutex_unlock(&ggtt->lock);
-	xe_device_mem_access_put(tile_to_xe(ggtt->tile));
+
+	if (!err && bo->flags & XE_BO_FLAG_GGTT_INVALIDATE)
+		xe_ggtt_invalidate(ggtt);
+	xe_pm_runtime_put(tile_to_xe(ggtt->tile));
 
 	return err;
 }
@@ -388,19 +430,21 @@ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
 	return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX);
 }
 
-void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node)
+void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
+			 bool invalidate)
 {
-	xe_device_mem_access_get(tile_to_xe(ggtt->tile));
-	mutex_lock(&ggtt->lock);
+	xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile));
 
+	mutex_lock(&ggtt->lock);
 	xe_ggtt_clear(ggtt, node->start, node->size);
 	drm_mm_remove_node(node);
 	node->size = 0;
+	mutex_unlock(&ggtt->lock);
 
-	xe_ggtt_invalidate(ggtt);
+	if (invalidate)
+		xe_ggtt_invalidate(ggtt);
 
-	mutex_unlock(&ggtt->lock);
-	xe_device_mem_access_put(tile_to_xe(ggtt->tile));
+	xe_pm_runtime_put(tile_to_xe(ggtt->tile));
 }
 
 void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
@@ -411,8 +455,53 @@ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
 	/* This BO is not currently in the GGTT */
 	xe_tile_assert(ggtt->tile, bo->ggtt_node.size == bo->size);
 
-	xe_ggtt_remove_node(ggtt, &bo->ggtt_node);
+	xe_ggtt_remove_node(ggtt, &bo->ggtt_node,
+			    bo->flags & XE_BO_FLAG_GGTT_INVALIDATE);
+}
+
+#ifdef CONFIG_PCI_IOV
+static u64 xe_encode_vfid_pte(u16 vfid)
+{
+	return FIELD_PREP(GGTT_PTE_VFID, vfid) | XE_PAGE_PRESENT;
+}
+
+static void xe_ggtt_assign_locked(struct xe_ggtt *ggtt, const struct drm_mm_node *node, u16 vfid)
+{
+	u64 start = node->start;
+	u64 size = node->size;
+	u64 end = start + size - 1;
+	u64 pte = xe_encode_vfid_pte(vfid);
+
+	lockdep_assert_held(&ggtt->lock);
+
+	if (!drm_mm_node_allocated(node))
+		return;
+
+	while (start < end) {
+		xe_ggtt_set_pte(ggtt, start, pte);
+		start += XE_PAGE_SIZE;
+	}
+
+	xe_ggtt_invalidate(ggtt);
+}
+
+/**
+ * xe_ggtt_assign - assign a GGTT region to the VF
+ * @ggtt: the &xe_ggtt where the node belongs
+ * @node: the &drm_mm_node to update
+ * @vfid: the VF identifier
+ *
+ * This function is used by the PF driver to assign a GGTT region to the VF.
+ * In addition to PTE's VFID bits 11:2 also PRESENT bit 0 is set as on some
+ * platforms VFs can't modify that either.
+ */
+void xe_ggtt_assign(struct xe_ggtt *ggtt, const struct drm_mm_node *node, u16 vfid)
+{
+	mutex_lock(&ggtt->lock);
+	xe_ggtt_assign_locked(ggtt, node, vfid);
+	mutex_unlock(&ggtt->lock);
 }
+#endif
 
 int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p)
 {
diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h
index a09c166dff..4a41a17623 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.h
+++ b/drivers/gpu/drm/xe/xe_ggtt.h
@@ -11,17 +11,20 @@
 struct drm_printer;
 
 void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte);
-void xe_ggtt_invalidate(struct xe_ggtt *ggtt);
 int xe_ggtt_init_early(struct xe_ggtt *ggtt);
 int xe_ggtt_init(struct xe_ggtt *ggtt);
 void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix);
 
+int xe_ggtt_balloon(struct xe_ggtt *ggtt, u64 start, u64 size, struct drm_mm_node *node);
+void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct drm_mm_node *node);
+
 int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
 				u32 size, u32 align);
 int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt,
 				       struct drm_mm_node *node,
 				       u32 size, u32 align, u32 mm_flags);
-void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node);
+void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
+			 bool invalidate);
 void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
 int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
 int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
@@ -30,4 +33,8 @@ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
 
 int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p);
 
+#ifdef CONFIG_PCI_IOV
+void xe_ggtt_assign(struct xe_ggtt *ggtt, const struct drm_mm_node *node, u16 vfid);
+#endif
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c
index a8a895cf4b..60202b9036 100644
--- a/drivers/gpu/drm/xe/xe_gsc.c
+++ b/drivers/gpu/drm/xe/xe_gsc.c
@@ -7,23 +7,28 @@
 
 #include <drm/drm_managed.h>
 
+#include <generated/xe_wa_oob.h>
+
 #include "abi/gsc_mkhi_commands_abi.h"
-#include "generated/xe_wa_oob.h"
 #include "xe_bb.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_exec_queue.h"
+#include "xe_gsc_proxy.h"
 #include "xe_gsc_submit.h"
 #include "xe_gt.h"
+#include "xe_gt_mcr.h"
 #include "xe_gt_printk.h"
 #include "xe_huc.h"
 #include "xe_map.h"
 #include "xe_mmio.h"
+#include "xe_pm.h"
 #include "xe_sched_job.h"
 #include "xe_uc_fw.h"
 #include "xe_wa.h"
 #include "instructions/xe_gsc_commands.h"
 #include "regs/xe_gsc_regs.h"
+#include "regs/xe_gt_regs.h"
 
 static struct xe_gt *
 gsc_to_gt(struct xe_gsc *gsc)
@@ -125,8 +130,8 @@ static int query_compatibility_version(struct xe_gsc *gsc)
 
 	bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_VER_PKT_SZ * 2,
 				  ttm_bo_type_kernel,
-				  XE_BO_CREATE_SYSTEM_BIT |
-				  XE_BO_CREATE_GGTT_BIT);
+				  XE_BO_FLAG_SYSTEM |
+				  XE_BO_FLAG_GGTT);
 	if (IS_ERR(bo)) {
 		xe_gt_err(gt, "failed to allocate bo for GSC version query\n");
 		return PTR_ERR(bo);
@@ -242,8 +247,90 @@ static int gsc_upload(struct xe_gsc *gsc)
 	if (err)
 		return err;
 
+	return 0;
+}
+
+static int gsc_upload_and_init(struct xe_gsc *gsc)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	struct xe_tile *tile = gt_to_tile(gt);
+	int ret;
+
+	if (XE_WA(gt, 14018094691)) {
+		ret = xe_force_wake_get(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL);
+
+		/*
+		 * If the forcewake fails we want to keep going, because the worst
+		 * case outcome in failing to apply the WA is that PXP won't work,
+		 * which is not fatal. We still throw a warning so the issue is
+		 * seen if it happens.
+		 */
+		xe_gt_WARN_ON(tile->primary_gt, ret);
+
+		xe_gt_mcr_multicast_write(tile->primary_gt,
+					  EU_SYSTOLIC_LIC_THROTTLE_CTL_WITH_LOCK,
+					  EU_SYSTOLIC_LIC_THROTTLE_CTL_LOCK_BIT);
+	}
+
+	ret = gsc_upload(gsc);
+
+	if (XE_WA(gt, 14018094691))
+		xe_force_wake_put(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL);
+
+	if (ret)
+		return ret;
+
+	xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED);
 	xe_gt_dbg(gt, "GSC FW async load completed\n");
 
+	/* HuC auth failure is not fatal */
+	if (xe_huc_is_authenticated(&gt->uc.huc, XE_HUC_AUTH_VIA_GUC))
+		xe_huc_auth(&gt->uc.huc, XE_HUC_AUTH_VIA_GSC);
+
+	ret = xe_gsc_proxy_start(gsc);
+	if (ret)
+		return ret;
+
+	xe_gt_dbg(gt, "GSC proxy init completed\n");
+
+	return 0;
+}
+
+static int gsc_er_complete(struct xe_gt *gt)
+{
+	u32 er_status;
+
+	if (!gsc_fw_is_loaded(gt))
+		return 0;
+
+	/*
+	 * Starting on Xe2, the GSCCS engine reset is a 2-step process. When the
+	 * driver or the GuC hit the GDRST register, the CS is immediately reset
+	 * and a success is reported, but the GSC shim keeps resetting in the
+	 * background. While the shim reset is ongoing, the CS is able to accept
+	 * new context submission, but any commands that require the shim will
+	 * be stalled until the reset is completed. This means that we can keep
+	 * submitting to the GSCCS as long as we make sure that the preemption
+	 * timeout is big enough to cover any delay introduced by the reset.
+	 * When the shim reset completes, a specific CS interrupt is triggered,
+	 * in response to which we need to check the GSCI_TIMER_STATUS register
+	 * to see if the reset was successful or not.
+	 * Note that the GSCI_TIMER_STATUS register is not power save/restored,
+	 * so it gets reset on MC6 entry. However, a reset failure stops MC6,
+	 * so in that scenario we're always guaranteed to find the correct
+	 * value.
+	 */
+	er_status = xe_mmio_read32(gt, GSCI_TIMER_STATUS) & GSCI_TIMER_STATUS_VALUE;
+
+	if (er_status == GSCI_TIMER_STATUS_TIMER_EXPIRED) {
+		/*
+		 * XXX: we should trigger an FLR here, but we don't have support
+		 * for that yet.
+		 */
+		xe_gt_err(gt, "GSC ER timed out!\n");
+		return -EIO;
+	}
+
 	return 0;
 }
 
@@ -252,26 +339,54 @@ static void gsc_work(struct work_struct *work)
 	struct xe_gsc *gsc = container_of(work, typeof(*gsc), work);
 	struct xe_gt *gt = gsc_to_gt(gsc);
 	struct xe_device *xe = gt_to_xe(gt);
+	u32 actions;
 	int ret;
 
-	xe_device_mem_access_get(xe);
-	xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC);
+	spin_lock_irq(&gsc->lock);
+	actions = gsc->work_actions;
+	gsc->work_actions = 0;
+	spin_unlock_irq(&gsc->lock);
 
-	ret = gsc_upload(gsc);
-	if (ret && ret != -EEXIST) {
-		xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOAD_FAIL);
-		goto out;
+	xe_pm_runtime_get(xe);
+	xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC));
+
+	if (actions & GSC_ACTION_ER_COMPLETE) {
+		ret = gsc_er_complete(gt);
+		if (ret)
+			goto out;
 	}
 
-	xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED);
+	if (actions & GSC_ACTION_FW_LOAD) {
+		ret = gsc_upload_and_init(gsc);
+		if (ret && ret != -EEXIST)
+			xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOAD_FAIL);
+		else
+			xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_RUNNING);
+	}
 
-	/* HuC auth failure is not fatal */
-	if (xe_huc_is_authenticated(&gt->uc.huc, XE_HUC_AUTH_VIA_GUC))
-		xe_huc_auth(&gt->uc.huc, XE_HUC_AUTH_VIA_GSC);
+	if (actions & GSC_ACTION_SW_PROXY)
+		xe_gsc_proxy_request_handler(gsc);
 
 out:
 	xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC);
-	xe_device_mem_access_put(xe);
+	xe_pm_runtime_put(xe);
+}
+
+void xe_gsc_hwe_irq_handler(struct xe_hw_engine *hwe, u16 intr_vec)
+{
+	struct xe_gt *gt = hwe->gt;
+	struct xe_gsc *gsc = &gt->uc.gsc;
+
+	if (unlikely(!intr_vec))
+		return;
+
+	if (intr_vec & GSC_ER_COMPLETE) {
+		spin_lock(&gsc->lock);
+		gsc->work_actions |= GSC_ACTION_ER_COMPLETE;
+		spin_unlock(&gsc->lock);
+
+		queue_work(gsc->wq, &gsc->work);
+	}
 }
 
 int xe_gsc_init(struct xe_gsc *gsc)
@@ -282,6 +397,7 @@ int xe_gsc_init(struct xe_gsc *gsc)
 
 	gsc->fw.type = XE_UC_FW_TYPE_GSC;
 	INIT_WORK(&gsc->work, gsc_work);
+	spin_lock_init(&gsc->lock);
 
 	/* The GSC uC is only available on the media GT */
 	if (tile->media_gt && (gt != tile->media_gt)) {
@@ -302,6 +418,10 @@ int xe_gsc_init(struct xe_gsc *gsc)
 	else if (ret)
 		goto out;
 
+	ret = xe_gsc_proxy_init(gsc);
+	if (ret && ret != -ENODEV)
+		goto out;
+
 	return 0;
 
 out:
@@ -348,15 +468,15 @@ int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc)
 
 	bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4M,
 				  ttm_bo_type_kernel,
-				  XE_BO_CREATE_STOLEN_BIT |
-				  XE_BO_CREATE_GGTT_BIT);
+				  XE_BO_FLAG_STOLEN |
+				  XE_BO_FLAG_GGTT);
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
 
 	q = xe_exec_queue_create(xe, NULL,
 				 BIT(hwe->logical_instance), 1, hwe,
 				 EXEC_QUEUE_FLAG_KERNEL |
-				 EXEC_QUEUE_FLAG_PERMANENT);
+				 EXEC_QUEUE_FLAG_PERMANENT, 0);
 	if (IS_ERR(q)) {
 		xe_gt_err(gt, "Failed to create queue for GSC submission\n");
 		err = PTR_ERR(q);
@@ -401,6 +521,10 @@ void xe_gsc_load_start(struct xe_gsc *gsc)
 		return;
 	}
 
+	spin_lock_irq(&gsc->lock);
+	gsc->work_actions |= GSC_ACTION_FW_LOAD;
+	spin_unlock_irq(&gsc->lock);
+
 	queue_work(gsc->wq, &gsc->work);
 }
 
@@ -410,6 +534,15 @@ void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc)
 		flush_work(&gsc->work);
 }
 
+/**
+ * xe_gsc_remove() - Clean up the GSC structures before driver removal
+ * @gsc: the GSC uC
+ */
+void xe_gsc_remove(struct xe_gsc *gsc)
+{
+	xe_gsc_proxy_remove(gsc);
+}
+
 /*
  * wa_14015076503: if the GSC FW is loaded, we need to alert it before doing a
  * GSC engine reset by writing a notification bit in the GS1 register and then
diff --git a/drivers/gpu/drm/xe/xe_gsc.h b/drivers/gpu/drm/xe/xe_gsc.h
index bc1ef7f31e..dd16e9b8b8 100644
--- a/drivers/gpu/drm/xe/xe_gsc.h
+++ b/drivers/gpu/drm/xe/xe_gsc.h
@@ -9,11 +9,14 @@
 #include "xe_gsc_types.h"
 
 struct xe_gt;
+struct xe_hw_engine;
 
 int xe_gsc_init(struct xe_gsc *gsc);
 int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc);
 void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc);
 void xe_gsc_load_start(struct xe_gsc *gsc);
+void xe_gsc_remove(struct xe_gsc *gsc);
+void xe_gsc_hwe_irq_handler(struct xe_hw_engine *hwe, u16 intr_vec);
 
 void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep);
 
diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c
new file mode 100644
index 0000000000..1b908d238b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c
@@ -0,0 +1,532 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "xe_gsc_proxy.h"
+
+#include <linux/component.h>
+#include <linux/delay.h>
+
+#include <drm/drm_managed.h>
+#include <drm/i915_component.h>
+#include <drm/i915_gsc_proxy_mei_interface.h>
+
+#include "abi/gsc_proxy_commands_abi.h"
+#include "regs/xe_gsc_regs.h"
+#include "xe_bo.h"
+#include "xe_gsc.h"
+#include "xe_gsc_submit.h"
+#include "xe_gt.h"
+#include "xe_gt_printk.h"
+#include "xe_map.h"
+#include "xe_mmio.h"
+#include "xe_pm.h"
+
+/*
+ * GSC proxy:
+ * The GSC uC needs to communicate with the CSME to perform certain operations.
+ * Since the GSC can't perform this communication directly on platforms where it
+ * is integrated in GT, the graphics driver needs to transfer the messages from
+ * GSC to CSME and back. The proxy flow must be manually started after the GSC
+ * is loaded to signal to GSC that we're ready to handle its messages and allow
+ * it to query its init data from CSME; GSC will then trigger an HECI2 interrupt
+ * if it needs to send messages to CSME again.
+ * The proxy flow is as follow:
+ * 1 - Xe submits a request to GSC asking for the message to CSME
+ * 2 - GSC replies with the proxy header + payload for CSME
+ * 3 - Xe sends the reply from GSC as-is to CSME via the mei proxy component
+ * 4 - CSME replies with the proxy header + payload for GSC
+ * 5 - Xe submits a request to GSC with the reply from CSME
+ * 6 - GSC replies either with a new header + payload (same as step 2, so we
+ *     restart from there) or with an end message.
+ */
+
+/*
+ * The component should load quite quickly in most cases, but it could take
+ * a bit. Using a very big timeout just to cover the worst case scenario
+ */
+#define GSC_PROXY_INIT_TIMEOUT_MS 20000
+
+/* shorthand define for code compactness */
+#define PROXY_HDR_SIZE (sizeof(struct xe_gsc_proxy_header))
+
+/* the protocol supports up to 32K in each direction */
+#define GSC_PROXY_BUFFER_SIZE SZ_32K
+#define GSC_PROXY_CHANNEL_SIZE (GSC_PROXY_BUFFER_SIZE * 2)
+
+static struct xe_gt *
+gsc_to_gt(struct xe_gsc *gsc)
+{
+	return container_of(gsc, struct xe_gt, uc.gsc);
+}
+
+static inline struct xe_device *kdev_to_xe(struct device *kdev)
+{
+	return dev_get_drvdata(kdev);
+}
+
+bool xe_gsc_proxy_init_done(struct xe_gsc *gsc)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	u32 fwsts1 = xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE));
+
+	return REG_FIELD_GET(HECI1_FWSTS1_CURRENT_STATE, fwsts1) ==
+	       HECI1_FWSTS1_PROXY_STATE_NORMAL;
+}
+
+static void __gsc_proxy_irq_rmw(struct xe_gsc *gsc, u32 clr, u32 set)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+
+	/* make sure we never accidentally write the RST bit */
+	clr |= HECI_H_CSR_RST;
+
+	xe_mmio_rmw32(gt, HECI_H_CSR(MTL_GSC_HECI2_BASE), clr, set);
+}
+
+static void gsc_proxy_irq_clear(struct xe_gsc *gsc)
+{
+	/* The status bit is cleared by writing to it */
+	__gsc_proxy_irq_rmw(gsc, 0, HECI_H_CSR_IS);
+}
+
+static void gsc_proxy_irq_toggle(struct xe_gsc *gsc, bool enabled)
+{
+	u32 set = enabled ? HECI_H_CSR_IE : 0;
+	u32 clr = enabled ? 0 : HECI_H_CSR_IE;
+
+	__gsc_proxy_irq_rmw(gsc, clr, set);
+}
+
+static int proxy_send_to_csme(struct xe_gsc *gsc, u32 size)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	struct i915_gsc_proxy_component *comp = gsc->proxy.component;
+	int ret;
+
+	ret = comp->ops->send(comp->mei_dev, gsc->proxy.to_csme, size);
+	if (ret < 0) {
+		xe_gt_err(gt, "Failed to send CSME proxy message\n");
+		return ret;
+	}
+
+	ret = comp->ops->recv(comp->mei_dev, gsc->proxy.from_csme, GSC_PROXY_BUFFER_SIZE);
+	if (ret < 0) {
+		xe_gt_err(gt, "Failed to receive CSME proxy message\n");
+		return ret;
+	}
+
+	return ret;
+}
+
+static int proxy_send_to_gsc(struct xe_gsc *gsc, u32 size)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	u64 addr_in = xe_bo_ggtt_addr(gsc->proxy.bo);
+	u64 addr_out = addr_in + GSC_PROXY_BUFFER_SIZE;
+	int err;
+
+	/* the message must contain at least the gsc and proxy headers */
+	if (size > GSC_PROXY_BUFFER_SIZE) {
+		xe_gt_err(gt, "Invalid GSC proxy message size: %u\n", size);
+		return -EINVAL;
+	}
+
+	err = xe_gsc_pkt_submit_kernel(gsc, addr_in, size,
+				       addr_out, GSC_PROXY_BUFFER_SIZE);
+	if (err) {
+		xe_gt_err(gt, "Failed to submit gsc proxy rq (%pe)\n", ERR_PTR(err));
+		return err;
+	}
+
+	return 0;
+}
+
+static int validate_proxy_header(struct xe_gsc_proxy_header *header,
+				 u32 source, u32 dest, u32 max_size)
+{
+	u32 type = FIELD_GET(GSC_PROXY_TYPE, header->hdr);
+	u32 length = FIELD_GET(GSC_PROXY_PAYLOAD_LENGTH, header->hdr);
+
+	if (header->destination != dest || header->source != source)
+		return -ENOEXEC;
+
+	if (length + PROXY_HDR_SIZE > max_size)
+		return -E2BIG;
+
+	switch (type) {
+	case GSC_PROXY_MSG_TYPE_PROXY_PAYLOAD:
+		if (length > 0)
+			break;
+		fallthrough;
+	case GSC_PROXY_MSG_TYPE_PROXY_INVALID:
+		return -EIO;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+#define proxy_header_wr(xe_, map_, offset_, field_, val_) \
+	xe_map_wr_field(xe_, map_, offset_, struct xe_gsc_proxy_header, field_, val_)
+
+#define proxy_header_rd(xe_, map_, offset_, field_) \
+	xe_map_rd_field(xe_, map_, offset_, struct xe_gsc_proxy_header, field_)
+
+static u32 emit_proxy_header(struct xe_device *xe, struct iosys_map *map, u32 offset)
+{
+	xe_map_memset(xe, map, offset, 0, PROXY_HDR_SIZE);
+
+	proxy_header_wr(xe, map, offset, hdr,
+			FIELD_PREP(GSC_PROXY_TYPE, GSC_PROXY_MSG_TYPE_PROXY_QUERY) |
+			FIELD_PREP(GSC_PROXY_PAYLOAD_LENGTH, 0));
+
+	proxy_header_wr(xe, map, offset, source, GSC_PROXY_ADDRESSING_KMD);
+	proxy_header_wr(xe, map, offset, destination, GSC_PROXY_ADDRESSING_GSC);
+	proxy_header_wr(xe, map, offset, status, 0);
+
+	return offset + PROXY_HDR_SIZE;
+}
+
+static int proxy_query(struct xe_gsc *gsc)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_gsc_proxy_header *to_csme_hdr = gsc->proxy.to_csme;
+	void *to_csme_payload = gsc->proxy.to_csme + PROXY_HDR_SIZE;
+	u32 wr_offset;
+	u32 reply_offset;
+	u32 size;
+	int ret;
+
+	wr_offset = xe_gsc_emit_header(xe, &gsc->proxy.to_gsc, 0,
+				       HECI_MEADDRESS_PROXY, 0, PROXY_HDR_SIZE);
+	wr_offset = emit_proxy_header(xe, &gsc->proxy.to_gsc, wr_offset);
+
+	size = wr_offset;
+
+	while (1) {
+		/*
+		 * Poison the GSC response header space to make sure we don't
+		 * read a stale reply.
+		 */
+		xe_gsc_poison_header(xe, &gsc->proxy.from_gsc, 0);
+
+		/* send proxy message to GSC */
+		ret = proxy_send_to_gsc(gsc, size);
+		if (ret)
+			goto proxy_error;
+
+		/* check the reply from GSC */
+		ret = xe_gsc_read_out_header(xe, &gsc->proxy.from_gsc, 0,
+					     PROXY_HDR_SIZE, &reply_offset);
+		if (ret) {
+			xe_gt_err(gt, "Invalid gsc header in proxy reply (%pe)\n",
+				  ERR_PTR(ret));
+			goto proxy_error;
+		}
+
+		/* copy the proxy header reply from GSC */
+		xe_map_memcpy_from(xe, to_csme_hdr, &gsc->proxy.from_gsc,
+				   reply_offset, PROXY_HDR_SIZE);
+
+		/* stop if this was the last message */
+		if (FIELD_GET(GSC_PROXY_TYPE, to_csme_hdr->hdr) == GSC_PROXY_MSG_TYPE_PROXY_END)
+			break;
+
+		/* make sure the GSC-to-CSME proxy header is sane */
+		ret = validate_proxy_header(to_csme_hdr,
+					    GSC_PROXY_ADDRESSING_GSC,
+					    GSC_PROXY_ADDRESSING_CSME,
+					    GSC_PROXY_BUFFER_SIZE - reply_offset);
+		if (ret) {
+			xe_gt_err(gt, "invalid GSC to CSME proxy header! (%pe)\n",
+				  ERR_PTR(ret));
+			goto proxy_error;
+		}
+
+		/* copy the rest of the message */
+		size = FIELD_GET(GSC_PROXY_PAYLOAD_LENGTH, to_csme_hdr->hdr);
+		xe_map_memcpy_from(xe, to_csme_payload, &gsc->proxy.from_gsc,
+				   reply_offset + PROXY_HDR_SIZE, size);
+
+		/* send the GSC message to the CSME */
+		ret = proxy_send_to_csme(gsc, size + PROXY_HDR_SIZE);
+		if (ret < 0)
+			goto proxy_error;
+
+		/* reply size from CSME, including the proxy header */
+		size = ret;
+		if (size < PROXY_HDR_SIZE) {
+			xe_gt_err(gt, "CSME to GSC proxy msg too small: 0x%x\n", size);
+			ret = -EPROTO;
+			goto proxy_error;
+		}
+
+		/* make sure the CSME-to-GSC proxy header is sane */
+		ret = validate_proxy_header(gsc->proxy.from_csme,
+					    GSC_PROXY_ADDRESSING_CSME,
+					    GSC_PROXY_ADDRESSING_GSC,
+					    GSC_PROXY_BUFFER_SIZE - reply_offset);
+		if (ret) {
+			xe_gt_err(gt, "invalid CSME to GSC proxy header! %d\n", ret);
+			goto proxy_error;
+		}
+
+		/* Emit a new header for sending the reply to the GSC */
+		wr_offset = xe_gsc_emit_header(xe, &gsc->proxy.to_gsc, 0,
+					       HECI_MEADDRESS_PROXY, 0, size);
+
+		/* copy the CSME reply and update the total msg size to include the GSC header */
+		xe_map_memcpy_to(xe, &gsc->proxy.to_gsc, wr_offset, gsc->proxy.from_csme, size);
+
+		size += wr_offset;
+	}
+
+proxy_error:
+	return ret < 0 ? ret : 0;
+}
+
+int xe_gsc_proxy_request_handler(struct xe_gsc *gsc)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	int slept;
+	int err;
+
+	if (!gsc->proxy.component_added)
+		return -ENODEV;
+
+	/* when GSC is loaded, we can queue this before the component is bound */
+	for (slept = 0; slept < GSC_PROXY_INIT_TIMEOUT_MS; slept += 100) {
+		if (gsc->proxy.component)
+			break;
+
+		msleep(100);
+	}
+
+	mutex_lock(&gsc->proxy.mutex);
+	if (!gsc->proxy.component) {
+		xe_gt_err(gt, "GSC proxy component not bound!\n");
+		err = -EIO;
+	} else {
+		/*
+		 * clear the pending interrupt and allow new proxy requests to
+		 * be generated while we handle the current one
+		 */
+		gsc_proxy_irq_clear(gsc);
+		err = proxy_query(gsc);
+	}
+	mutex_unlock(&gsc->proxy.mutex);
+	return err;
+}
+
+void xe_gsc_proxy_irq_handler(struct xe_gsc *gsc, u32 iir)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+
+	if (unlikely(!iir))
+		return;
+
+	if (!gsc->proxy.component) {
+		xe_gt_err(gt, "GSC proxy irq received without the component being bound!\n");
+		return;
+	}
+
+	spin_lock(&gsc->lock);
+	gsc->work_actions |= GSC_ACTION_SW_PROXY;
+	spin_unlock(&gsc->lock);
+
+	queue_work(gsc->wq, &gsc->work);
+}
+
+static int xe_gsc_proxy_component_bind(struct device *xe_kdev,
+				       struct device *mei_kdev, void *data)
+{
+	struct xe_device *xe = kdev_to_xe(xe_kdev);
+	struct xe_gt *gt = xe->tiles[0].media_gt;
+	struct xe_gsc *gsc = &gt->uc.gsc;
+
+	mutex_lock(&gsc->proxy.mutex);
+	gsc->proxy.component = data;
+	gsc->proxy.component->mei_dev = mei_kdev;
+	mutex_unlock(&gsc->proxy.mutex);
+
+	return 0;
+}
+
+static void xe_gsc_proxy_component_unbind(struct device *xe_kdev,
+					  struct device *mei_kdev, void *data)
+{
+	struct xe_device *xe = kdev_to_xe(xe_kdev);
+	struct xe_gt *gt = xe->tiles[0].media_gt;
+	struct xe_gsc *gsc = &gt->uc.gsc;
+
+	xe_gsc_wait_for_worker_completion(gsc);
+
+	mutex_lock(&gsc->proxy.mutex);
+	gsc->proxy.component = NULL;
+	mutex_unlock(&gsc->proxy.mutex);
+}
+
+static const struct component_ops xe_gsc_proxy_component_ops = {
+	.bind   = xe_gsc_proxy_component_bind,
+	.unbind = xe_gsc_proxy_component_unbind,
+};
+
+static void proxy_channel_free(struct drm_device *drm, void *arg)
+{
+	struct xe_gsc *gsc = arg;
+
+	if (!gsc->proxy.bo)
+		return;
+
+	if (gsc->proxy.to_csme) {
+		kfree(gsc->proxy.to_csme);
+		gsc->proxy.to_csme = NULL;
+		gsc->proxy.from_csme = NULL;
+	}
+
+	if (gsc->proxy.bo) {
+		iosys_map_clear(&gsc->proxy.to_gsc);
+		iosys_map_clear(&gsc->proxy.from_gsc);
+		xe_bo_unpin_map_no_vm(gsc->proxy.bo);
+		gsc->proxy.bo = NULL;
+	}
+}
+
+static int proxy_channel_alloc(struct xe_gsc *gsc)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_bo *bo;
+	void *csme;
+
+	csme = kzalloc(GSC_PROXY_CHANNEL_SIZE, GFP_KERNEL);
+	if (!csme)
+		return -ENOMEM;
+
+	bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_PROXY_CHANNEL_SIZE,
+				  ttm_bo_type_kernel,
+				  XE_BO_FLAG_SYSTEM |
+				  XE_BO_FLAG_GGTT);
+	if (IS_ERR(bo)) {
+		kfree(csme);
+		return PTR_ERR(bo);
+	}
+
+	gsc->proxy.bo = bo;
+	gsc->proxy.to_gsc = IOSYS_MAP_INIT_OFFSET(&bo->vmap, 0);
+	gsc->proxy.from_gsc = IOSYS_MAP_INIT_OFFSET(&bo->vmap, GSC_PROXY_BUFFER_SIZE);
+	gsc->proxy.to_csme = csme;
+	gsc->proxy.from_csme = csme + GSC_PROXY_BUFFER_SIZE;
+
+	return drmm_add_action_or_reset(&xe->drm, proxy_channel_free, gsc);
+}
+
+/**
+ * xe_gsc_proxy_init() - init objects and MEI component required by GSC proxy
+ * @gsc: the GSC uC
+ *
+ * Return: 0 if the initialization was successful, a negative errno otherwise.
+ */
+int xe_gsc_proxy_init(struct xe_gsc *gsc)
+{
+	int err;
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_device *xe = tile_to_xe(tile);
+
+	mutex_init(&gsc->proxy.mutex);
+
+	if (!IS_ENABLED(CONFIG_INTEL_MEI_GSC_PROXY)) {
+		xe_gt_info(gt, "can't init GSC proxy due to missing mei component\n");
+		return -ENODEV;
+	}
+
+	/* no multi-tile devices with this feature yet */
+	if (tile->id > 0) {
+		xe_gt_err(gt, "unexpected GSC proxy init on tile %u\n", tile->id);
+		return -EINVAL;
+	}
+
+	err = proxy_channel_alloc(gsc);
+	if (err)
+		return err;
+
+	err = component_add_typed(xe->drm.dev, &xe_gsc_proxy_component_ops,
+				  I915_COMPONENT_GSC_PROXY);
+	if (err < 0) {
+		xe_gt_err(gt, "Failed to add GSC_PROXY component (%pe)\n", ERR_PTR(err));
+		return err;
+	}
+
+	gsc->proxy.component_added = true;
+
+	/* the component must be removed before unload, so can't use drmm for cleanup */
+
+	return 0;
+}
+
+/**
+ * xe_gsc_proxy_remove() - remove the GSC proxy MEI component
+ * @gsc: the GSC uC
+ */
+void xe_gsc_proxy_remove(struct xe_gsc *gsc)
+{
+	struct xe_gt *gt = gsc_to_gt(gsc);
+	struct xe_device *xe = gt_to_xe(gt);
+	int err = 0;
+
+	if (!gsc->proxy.component_added)
+		return;
+
+	/* disable HECI2 IRQs */
+	xe_pm_runtime_get(xe);
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC);
+	if (err)
+		xe_gt_err(gt, "failed to get forcewake to disable GSC interrupts\n");
+
+	/* try do disable irq even if forcewake failed */
+	gsc_proxy_irq_toggle(gsc, false);
+
+	if (!err)
+		xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC);
+	xe_pm_runtime_put(xe);
+
+	xe_gsc_wait_for_worker_completion(gsc);
+
+	component_del(xe->drm.dev, &xe_gsc_proxy_component_ops);
+	gsc->proxy.component_added = false;
+}
+
+/**
+ * xe_gsc_proxy_start() - start the proxy by submitting the first request
+ * @gsc: the GSC uC
+ *
+ * Return: 0 if the proxy are now enabled, a negative errno otherwise.
+ */
+int xe_gsc_proxy_start(struct xe_gsc *gsc)
+{
+	int err;
+
+	/* enable the proxy interrupt in the GSC shim layer */
+	gsc_proxy_irq_toggle(gsc, true);
+
+	/*
+	 * The handling of the first proxy request must be manually triggered to
+	 * notify the GSC that we're ready to support the proxy flow.
+	 */
+	err = xe_gsc_proxy_request_handler(gsc);
+	if (err)
+		return err;
+
+	if (!xe_gsc_proxy_init_done(gsc)) {
+		xe_gt_err(gsc_to_gt(gsc), "GSC FW reports proxy init not completed\n");
+		return -EIO;
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.h b/drivers/gpu/drm/xe/xe_gsc_proxy.h
new file mode 100644
index 0000000000..c511ade6b8
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gsc_proxy.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GSC_PROXY_H_
+#define _XE_GSC_PROXY_H_
+
+#include <linux/types.h>
+
+struct xe_gsc;
+
+int xe_gsc_proxy_init(struct xe_gsc *gsc);
+bool xe_gsc_proxy_init_done(struct xe_gsc *gsc);
+void xe_gsc_proxy_remove(struct xe_gsc *gsc);
+int xe_gsc_proxy_start(struct xe_gsc *gsc);
+
+int xe_gsc_proxy_request_handler(struct xe_gsc *gsc);
+void xe_gsc_proxy_irq_handler(struct xe_gsc *gsc, u32 iir);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gsc_submit.c b/drivers/gpu/drm/xe/xe_gsc_submit.c
index 8c5381e591..d34d032488 100644
--- a/drivers/gpu/drm/xe/xe_gsc_submit.c
+++ b/drivers/gpu/drm/xe/xe_gsc_submit.c
@@ -5,6 +5,8 @@
 
 #include "xe_gsc_submit.h"
 
+#include <linux/poison.h>
+
 #include "abi/gsc_command_header_abi.h"
 #include "xe_bb.h"
 #include "xe_exec_queue.h"
@@ -39,6 +41,21 @@ gsc_to_gt(struct xe_gsc *gsc)
 }
 
 /**
+ * xe_gsc_create_host_session_id - Creates a random 64 bit host_session id with
+ * bits 56-63 masked.
+ *
+ * Returns: random host_session_id which can be used to send messages to gsc cs
+ */
+u64 xe_gsc_create_host_session_id(void)
+{
+	u64 host_session_id;
+
+	get_random_bytes(&host_session_id, sizeof(u64));
+	host_session_id &= ~HOST_SESSION_CLIENT_MASK;
+	return host_session_id;
+}
+
+/**
  * xe_gsc_emit_header - write the MTL GSC header in memory
  * @xe: the Xe device
  * @map: the iosys map to write to
@@ -69,6 +86,17 @@ u32 xe_gsc_emit_header(struct xe_device *xe, struct iosys_map *map, u32 offset,
 };
 
 /**
+ * xe_gsc_poison_header - poison the MTL GSC header in memory
+ * @xe: the Xe device
+ * @map: the iosys map to write to
+ * @offset: offset from the start of the map at which the header resides
+ */
+void xe_gsc_poison_header(struct xe_device *xe, struct iosys_map *map, u32 offset)
+{
+	xe_map_memset(xe, map, offset, POISON_FREE, GSC_HDR_SIZE);
+};
+
+/**
  * xe_gsc_check_and_update_pending - check the pending bit and update the input
  * header with the retry handle from the output header
  * @xe: the Xe device
@@ -112,11 +140,18 @@ int xe_gsc_read_out_header(struct xe_device *xe,
 {
 	u32 marker = mtl_gsc_header_rd(xe, map, offset, validity_marker);
 	u32 size = mtl_gsc_header_rd(xe, map, offset, message_size);
+	u32 status = mtl_gsc_header_rd(xe, map, offset, status);
 	u32 payload_size = size - GSC_HDR_SIZE;
 
 	if (marker != GSC_HECI_VALIDITY_MARKER)
 		return -EPROTO;
 
+	if (status != 0) {
+		drm_err(&xe->drm, "GSC header readout indicates error: %d\n",
+			status);
+		return -EINVAL;
+	}
+
 	if (size < GSC_HDR_SIZE || payload_size < min_payload_size)
 		return -ENODATA;
 
diff --git a/drivers/gpu/drm/xe/xe_gsc_submit.h b/drivers/gpu/drm/xe/xe_gsc_submit.h
index 0801da5d44..1416b5745a 100644
--- a/drivers/gpu/drm/xe/xe_gsc_submit.h
+++ b/drivers/gpu/drm/xe/xe_gsc_submit.h
@@ -14,6 +14,7 @@ struct xe_gsc;
 
 u32 xe_gsc_emit_header(struct xe_device *xe, struct iosys_map *map, u32 offset,
 		       u8 heci_client_id, u64 host_session_id, u32 payload_size);
+void xe_gsc_poison_header(struct xe_device *xe, struct iosys_map *map, u32 offset);
 
 bool xe_gsc_check_and_update_pending(struct xe_device *xe,
 				     struct iosys_map *in, u32 offset_in,
@@ -27,4 +28,5 @@ int xe_gsc_read_out_header(struct xe_device *xe,
 int xe_gsc_pkt_submit_kernel(struct xe_gsc *gsc, u64 addr_in, u32 size_in,
 			     u64 addr_out, u32 size_out);
 
+u64 xe_gsc_create_host_session_id(void);
 #endif
diff --git a/drivers/gpu/drm/xe/xe_gsc_types.h b/drivers/gpu/drm/xe/xe_gsc_types.h
index 57fefd66a7..5926de2021 100644
--- a/drivers/gpu/drm/xe/xe_gsc_types.h
+++ b/drivers/gpu/drm/xe/xe_gsc_types.h
@@ -6,12 +6,17 @@
 #ifndef _XE_GSC_TYPES_H_
 #define _XE_GSC_TYPES_H_
 
+#include <linux/iosys-map.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
 #include <linux/workqueue.h>
 
 #include "xe_uc_fw_types.h"
 
 struct xe_bo;
 struct xe_exec_queue;
+struct i915_gsc_proxy_component;
 
 /**
  * struct xe_gsc - GSC
@@ -34,6 +39,35 @@ struct xe_gsc {
 
 	/** @work: delayed load and proxy handling work */
 	struct work_struct work;
+
+	/** @lock: protects access to the work_actions mask */
+	spinlock_t lock;
+
+	/** @work_actions: mask of actions to be performed in the work */
+	u32 work_actions;
+#define GSC_ACTION_FW_LOAD BIT(0)
+#define GSC_ACTION_SW_PROXY BIT(1)
+#define GSC_ACTION_ER_COMPLETE BIT(2)
+
+	/** @proxy: sub-structure containing the SW proxy-related variables */
+	struct {
+		/** @proxy.component: struct for communication with mei component */
+		struct i915_gsc_proxy_component *component;
+		/** @proxy.mutex: protects the component binding and usage */
+		struct mutex mutex;
+		/** @proxy.component_added: whether the component has been added */
+		bool component_added;
+		/** @proxy.bo: object to store message to and from the GSC */
+		struct xe_bo *bo;
+		/** @proxy.to_gsc: map of the memory used to send messages to the GSC */
+		struct iosys_map to_gsc;
+		/** @proxy.from_gsc: map of the memory used to recv messages from the GSC */
+		struct iosys_map from_gsc;
+		/** @proxy.to_csme: pointer to the memory used to send messages to CSME */
+		void *to_csme;
+		/** @proxy.from_csme: pointer to the memory used to recv messages from CSME */
+		void *from_csme;
+	} proxy;
 };
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index d23ee1397c..491d0413de 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -29,6 +29,7 @@
 #include "xe_gt_mcr.h"
 #include "xe_gt_pagefault.h"
 #include "xe_gt_printk.h"
+#include "xe_gt_sriov_pf.h"
 #include "xe_gt_sysfs.h"
 #include "xe_gt_tlb_invalidation.h"
 #include "xe_gt_topology.h"
@@ -43,6 +44,7 @@
 #include "xe_migrate.h"
 #include "xe_mmio.h"
 #include "xe_pat.h"
+#include "xe_pm.h"
 #include "xe_mocs.h"
 #include "xe_reg_sr.h"
 #include "xe_ring_ops.h"
@@ -78,6 +80,19 @@ void xe_gt_sanitize(struct xe_gt *gt)
 	gt->uc.guc.submission_state.enabled = false;
 }
 
+/**
+ * xe_gt_remove() - Clean up the GT structures before driver removal
+ * @gt: the GT object
+ *
+ * This function should only act on objects/structures that must be cleaned
+ * before the driver removal callback is complete and therefore can't be
+ * deferred to a drmm action.
+ */
+void xe_gt_remove(struct xe_gt *gt)
+{
+	xe_uc_remove(&gt->uc);
+}
+
 static void gt_fini(struct drm_device *drm, void *arg)
 {
 	struct xe_gt *gt = arg;
@@ -235,7 +250,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt)
 			return -ENOMEM;
 
 		q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance), 1,
-					 hwe, EXEC_QUEUE_FLAG_KERNEL);
+					 hwe, EXEC_QUEUE_FLAG_KERNEL, 0);
 		if (IS_ERR(q)) {
 			err = PTR_ERR(q);
 			xe_gt_err(gt, "hwe %s: xe_exec_queue_create failed (%pe)\n",
@@ -252,7 +267,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt)
 		}
 
 		nop_q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance),
-					     1, hwe, EXEC_QUEUE_FLAG_KERNEL);
+					     1, hwe, EXEC_QUEUE_FLAG_KERNEL, 0);
 		if (IS_ERR(nop_q)) {
 			err = PTR_ERR(nop_q);
 			xe_gt_err(gt, "hwe %s: nop xe_exec_queue_create failed (%pe)\n",
@@ -297,13 +312,16 @@ int xe_gt_init_early(struct xe_gt *gt)
 {
 	int err;
 
+	if (IS_SRIOV_PF(gt_to_xe(gt))) {
+		err = xe_gt_sriov_pf_init_early(gt);
+		if (err)
+			return err;
+	}
+
 	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
 	if (err)
 		return err;
 
-	xe_gt_topology_init(gt);
-	xe_gt_mcr_init(gt);
-
 	err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
 	if (err)
 		return err;
@@ -327,7 +345,7 @@ static void dump_pat_on_error(struct xe_gt *gt)
 	char prefix[32];
 
 	snprintf(prefix, sizeof(prefix), "[GT%u Error]", gt->info.id);
-	p = drm_debug_printer(prefix);
+	p = drm_dbg_printer(&gt_to_xe(gt)->drm, DRM_UT_DRIVER, prefix);
 
 	xe_pat_dump(gt, &p);
 }
@@ -336,13 +354,10 @@ static int gt_fw_domain_init(struct xe_gt *gt)
 {
 	int err, i;
 
-	xe_device_mem_access_get(gt_to_xe(gt));
 	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
 	if (err)
 		goto err_hw_fence_irq;
 
-	xe_pat_init(gt);
-
 	if (!xe_gt_is_media_type(gt)) {
 		err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt);
 		if (err)
@@ -351,22 +366,10 @@ static int gt_fw_domain_init(struct xe_gt *gt)
 			xe_lmtt_init(&gt_to_tile(gt)->sriov.pf.lmtt);
 	}
 
-	err = xe_uc_init(&gt->uc);
-	if (err)
-		goto err_force_wake;
-
-	/* Raise GT freq to speed up HuC/GuC load */
-	xe_guc_pc_init_early(&gt->uc.guc.pc);
-
-	err = xe_uc_init_hwconfig(&gt->uc);
+	err = xe_gt_idle_sysfs_init(&gt->gtidle);
 	if (err)
 		goto err_force_wake;
 
-	xe_gt_idle_sysfs_init(&gt->gtidle);
-
-	/* XXX: Fake that we pull the engine mask from hwconfig blob */
-	gt->info.engine_mask = gt->info.__engine_mask;
-
 	/* Enable per hw engine IRQs */
 	xe_irq_enable_hwe(gt);
 
@@ -379,18 +382,21 @@ static int gt_fw_domain_init(struct xe_gt *gt)
 
 	err = xe_hw_engine_class_sysfs_init(gt);
 	if (err)
-		drm_warn(&gt_to_xe(gt)->drm,
-			 "failed to register engines sysfs directory, err: %d\n",
-			 err);
+		goto err_force_wake;
 
 	/* Initialize CCS mode sysfs after early initialization of HW engines */
 	err = xe_gt_ccs_mode_sysfs_init(gt);
 	if (err)
 		goto err_force_wake;
 
+	/*
+	 * Stash hardware-reported version.  Since this register does not exist
+	 * on pre-MTL platforms, reading it there will (correctly) return 0.
+	 */
+	gt->info.gmdid = xe_mmio_read32(gt, GMD_ID);
+
 	err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
 	XE_WARN_ON(err);
-	xe_device_mem_access_put(gt_to_xe(gt));
 
 	return 0;
 
@@ -400,7 +406,6 @@ err_force_wake:
 err_hw_fence_irq:
 	for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
 		xe_hw_fence_irq_finish(&gt->fence_irq[i]);
-	xe_device_mem_access_put(gt_to_xe(gt));
 
 	return err;
 }
@@ -409,7 +414,6 @@ static int all_fw_domain_init(struct xe_gt *gt)
 {
 	int err, i;
 
-	xe_device_mem_access_get(gt_to_xe(gt));
 	err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
 	if (err)
 		goto err_hw_fence_irq;
@@ -430,10 +434,6 @@ static int all_fw_domain_init(struct xe_gt *gt)
 	if (err)
 		goto err_force_wake;
 
-	err = xe_uc_init_post_hwconfig(&gt->uc);
-	if (err)
-		goto err_force_wake;
-
 	if (!xe_gt_is_media_type(gt)) {
 		/*
 		 * USM has its only SA pool to non-block behind user operations
@@ -460,6 +460,10 @@ static int all_fw_domain_init(struct xe_gt *gt)
 		}
 	}
 
+	err = xe_uc_init_post_hwconfig(&gt->uc);
+	if (err)
+		goto err_force_wake;
+
 	err = xe_uc_init_hw(&gt->uc);
 	if (err)
 		goto err_force_wake;
@@ -475,7 +479,6 @@ static int all_fw_domain_init(struct xe_gt *gt)
 
 	err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
 	XE_WARN_ON(err);
-	xe_device_mem_access_put(gt_to_xe(gt));
 
 	return 0;
 
@@ -484,11 +487,43 @@ err_force_wake:
 err_hw_fence_irq:
 	for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
 		xe_hw_fence_irq_finish(&gt->fence_irq[i]);
-	xe_device_mem_access_put(gt_to_xe(gt));
 
 	return err;
 }
 
+/*
+ * Initialize enough GT to be able to load GuC in order to obtain hwconfig and
+ * enable CTB communication.
+ */
+int xe_gt_init_hwconfig(struct xe_gt *gt)
+{
+	int err;
+
+	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	if (err)
+		goto out;
+
+	xe_gt_topology_init(gt);
+	xe_gt_mcr_init(gt);
+	xe_pat_init(gt);
+
+	err = xe_uc_init(&gt->uc);
+	if (err)
+		goto out_fw;
+
+	err = xe_uc_init_hwconfig(&gt->uc);
+	if (err)
+		goto out_fw;
+
+	/* XXX: Fake that we pull the engine mask from hwconfig blob */
+	gt->info.engine_mask = gt->info.__engine_mask;
+
+out_fw:
+	xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+out:
+	return err;
+}
+
 int xe_gt_init(struct xe_gt *gt)
 {
 	int err;
@@ -511,13 +546,17 @@ int xe_gt_init(struct xe_gt *gt)
 
 	xe_mocs_init_early(gt);
 
-	xe_gt_sysfs_init(gt);
+	err = xe_gt_sysfs_init(gt);
+	if (err)
+		return err;
 
 	err = gt_fw_domain_init(gt);
 	if (err)
 		return err;
 
-	xe_gt_freq_init(gt);
+	err = xe_gt_freq_init(gt);
+	if (err)
+		return err;
 
 	xe_force_wake_init_engines(gt, gt_to_fw(gt));
 
@@ -525,11 +564,7 @@ int xe_gt_init(struct xe_gt *gt)
 	if (err)
 		return err;
 
-	err = drmm_add_action_or_reset(&gt_to_xe(gt)->drm, gt_fini, gt);
-	if (err)
-		return err;
-
-	return 0;
+	return drmm_add_action_or_reset(&gt_to_xe(gt)->drm, gt_fini, gt);
 }
 
 static int do_gt_reset(struct xe_gt *gt)
@@ -609,9 +644,9 @@ static int gt_reset(struct xe_gt *gt)
 		goto err_fail;
 	}
 
+	xe_pm_runtime_get(gt_to_xe(gt));
 	xe_gt_sanitize(gt);
 
-	xe_device_mem_access_get(gt_to_xe(gt));
 	err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
 	if (err)
 		goto err_msg;
@@ -624,19 +659,19 @@ static int gt_reset(struct xe_gt *gt)
 	if (err)
 		goto err_out;
 
+	xe_gt_tlb_invalidation_reset(gt);
+
 	err = do_gt_reset(gt);
 	if (err)
 		goto err_out;
 
-	xe_gt_tlb_invalidation_reset(gt);
-
 	err = do_gt_restart(gt);
 	if (err)
 		goto err_out;
 
 	err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
-	xe_device_mem_access_put(gt_to_xe(gt));
 	XE_WARN_ON(err);
+	xe_pm_runtime_put(gt_to_xe(gt));
 
 	xe_gt_info(gt, "reset done\n");
 
@@ -646,7 +681,7 @@ err_out:
 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
 err_msg:
 	XE_WARN_ON(xe_uc_start(&gt->uc));
-	xe_device_mem_access_put(gt_to_xe(gt));
+	xe_pm_runtime_put(gt_to_xe(gt));
 err_fail:
 	xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err));
 
@@ -676,22 +711,20 @@ void xe_gt_reset_async(struct xe_gt *gt)
 
 void xe_gt_suspend_prepare(struct xe_gt *gt)
 {
-	xe_device_mem_access_get(gt_to_xe(gt));
 	XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL));
 
 	xe_uc_stop_prepare(&gt->uc);
 
 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
-	xe_device_mem_access_put(gt_to_xe(gt));
 }
 
 int xe_gt_suspend(struct xe_gt *gt)
 {
 	int err;
 
+	xe_gt_dbg(gt, "suspending\n");
 	xe_gt_sanitize(gt);
 
-	xe_device_mem_access_get(gt_to_xe(gt));
 	err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
 	if (err)
 		goto err_msg;
@@ -701,15 +734,13 @@ int xe_gt_suspend(struct xe_gt *gt)
 		goto err_force_wake;
 
 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
-	xe_device_mem_access_put(gt_to_xe(gt));
-	xe_gt_info(gt, "suspended\n");
+	xe_gt_dbg(gt, "suspended\n");
 
 	return 0;
 
 err_force_wake:
 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
 err_msg:
-	xe_device_mem_access_put(gt_to_xe(gt));
 	xe_gt_err(gt, "suspend failed (%pe)\n", ERR_PTR(err));
 
 	return err;
@@ -719,7 +750,7 @@ int xe_gt_resume(struct xe_gt *gt)
 {
 	int err;
 
-	xe_device_mem_access_get(gt_to_xe(gt));
+	xe_gt_dbg(gt, "resuming\n");
 	err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
 	if (err)
 		goto err_msg;
@@ -729,15 +760,13 @@ int xe_gt_resume(struct xe_gt *gt)
 		goto err_force_wake;
 
 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
-	xe_device_mem_access_put(gt_to_xe(gt));
-	xe_gt_info(gt, "resumed\n");
+	xe_gt_dbg(gt, "resumed\n");
 
 	return 0;
 
 err_force_wake:
 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
 err_msg:
-	xe_device_mem_access_put(gt_to_xe(gt));
 	xe_gt_err(gt, "resume failed (%pe)\n", ERR_PTR(err));
 
 	return err;
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index 4486e083f5..ed6ea8057e 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -33,6 +33,7 @@ static inline bool xe_fault_inject_gt_reset(void)
 #endif
 
 struct xe_gt *xe_gt_alloc(struct xe_tile *tile);
+int xe_gt_init_hwconfig(struct xe_gt *gt);
 int xe_gt_init_early(struct xe_gt *gt);
 int xe_gt_init(struct xe_gt *gt);
 int xe_gt_record_default_lrcs(struct xe_gt *gt);
@@ -41,6 +42,7 @@ int xe_gt_suspend(struct xe_gt *gt);
 int xe_gt_resume(struct xe_gt *gt);
 void xe_gt_reset_async(struct xe_gt *gt);
 void xe_gt_sanitize(struct xe_gt *gt);
+void xe_gt_remove(struct xe_gt *gt);
 
 /**
  * xe_gt_any_hw_engine_by_reset_domain - scan the list of engines and return the
diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c
index 937054e31d..c7bca20f6b 100644
--- a/drivers/gpu/drm/xe/xe_gt_clock.c
+++ b/drivers/gpu/drm/xe/xe_gt_clock.c
@@ -78,8 +78,3 @@ int xe_gt_clock_init(struct xe_gt *gt)
 	gt->info.reference_clock = freq;
 	return 0;
 }
-
-u64 xe_gt_clock_cycles_to_ns(const struct xe_gt *gt, u64 count)
-{
-	return DIV_ROUND_CLOSEST_ULL(count * NSEC_PER_SEC, gt->info.reference_clock);
-}
diff --git a/drivers/gpu/drm/xe/xe_gt_clock.h b/drivers/gpu/drm/xe/xe_gt_clock.h
index aa162722f8..44fa0371b9 100644
--- a/drivers/gpu/drm/xe/xe_gt_clock.h
+++ b/drivers/gpu/drm/xe/xe_gt_clock.h
@@ -11,5 +11,5 @@
 struct xe_gt;
 
 int xe_gt_clock_init(struct xe_gt *gt);
-u64 xe_gt_clock_cycles_to_ns(const struct xe_gt *gt, u64 count);
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index c4b67cf09f..8cf0b2625e 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -5,6 +5,8 @@
 
 #include "xe_gt_debugfs.h"
 
+#include <linux/debugfs.h>
+
 #include <drm/drm_debugfs.h>
 #include <drm/drm_managed.h>
 
@@ -18,193 +20,246 @@
 #include "xe_lrc.h"
 #include "xe_macros.h"
 #include "xe_pat.h"
+#include "xe_pm.h"
 #include "xe_reg_sr.h"
 #include "xe_reg_whitelist.h"
 #include "xe_uc_debugfs.h"
 #include "xe_wa.h"
 
-static struct xe_gt *node_to_gt(struct drm_info_node *node)
+/**
+ * xe_gt_debugfs_simple_show - A show callback for struct drm_info_list
+ * @m: the &seq_file
+ * @data: data used by the drm debugfs helpers
+ *
+ * This callback can be used in struct drm_info_list to describe debugfs
+ * files that are &xe_gt specific.
+ *
+ * It is assumed that those debugfs files will be created on directory entry
+ * which struct dentry d_inode->i_private points to &xe_gt.
+ *
+ * This function assumes that &m->private will be set to the &struct
+ * drm_info_node corresponding to the instance of the info on a given &struct
+ * drm_minor (see struct drm_info_list.show for details).
+ *
+ * This function also assumes that struct drm_info_list.data will point to the
+ * function code that will actually print a file content::
+ *
+ *   int (*print)(struct xe_gt *, struct drm_printer *)
+ *
+ * Example::
+ *
+ *    int foo(struct xe_gt *gt, struct drm_printer *p)
+ *    {
+ *        drm_printf(p, "GT%u\n", gt->info.id);
+ *        return 0;
+ *    }
+ *
+ *    static const struct drm_info_list bar[] = {
+ *        { name = "foo", .show = xe_gt_debugfs_simple_show, .data = foo },
+ *    };
+ *
+ *    dir = debugfs_create_dir("gt", parent);
+ *    dir->d_inode->i_private = gt;
+ *    drm_debugfs_create_files(bar, ARRAY_SIZE(bar), dir, minor);
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_debugfs_simple_show(struct seq_file *m, void *data)
 {
-	return node->info_ent->data;
+	struct drm_printer p = drm_seq_file_printer(m);
+	struct drm_info_node *node = m->private;
+	struct dentry *parent = node->dent->d_parent;
+	struct xe_gt *gt = parent->d_inode->i_private;
+	int (*print)(struct xe_gt *, struct drm_printer *) = node->info_ent->data;
+
+	if (WARN_ON(!print))
+		return -EINVAL;
+
+	return print(gt, &p);
 }
 
-static int hw_engines(struct seq_file *m, void *data)
+static int hw_engines(struct xe_gt *gt, struct drm_printer *p)
 {
-	struct xe_gt *gt = node_to_gt(m->private);
 	struct xe_device *xe = gt_to_xe(gt);
-	struct drm_printer p = drm_seq_file_printer(m);
 	struct xe_hw_engine *hwe;
 	enum xe_hw_engine_id id;
 	int err;
 
-	xe_device_mem_access_get(xe);
+	xe_pm_runtime_get(xe);
 	err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
 	if (err) {
-		xe_device_mem_access_put(xe);
+		xe_pm_runtime_put(xe);
 		return err;
 	}
 
 	for_each_hw_engine(hwe, gt, id)
-		xe_hw_engine_print(hwe, &p);
+		xe_hw_engine_print(hwe, p);
 
 	err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
-	xe_device_mem_access_put(xe);
+	xe_pm_runtime_put(xe);
 	if (err)
 		return err;
 
 	return 0;
 }
 
-static int force_reset(struct seq_file *m, void *data)
+static int force_reset(struct xe_gt *gt, struct drm_printer *p)
 {
-	struct xe_gt *gt = node_to_gt(m->private);
-
+	xe_pm_runtime_get(gt_to_xe(gt));
 	xe_gt_reset_async(gt);
+	xe_pm_runtime_put(gt_to_xe(gt));
 
 	return 0;
 }
 
-static int sa_info(struct seq_file *m, void *data)
+static int sa_info(struct xe_gt *gt, struct drm_printer *p)
 {
-	struct xe_tile *tile = gt_to_tile(node_to_gt(m->private));
-	struct drm_printer p = drm_seq_file_printer(m);
+	struct xe_tile *tile = gt_to_tile(gt);
 
-	drm_suballoc_dump_debug_info(&tile->mem.kernel_bb_pool->base, &p,
+	xe_pm_runtime_get(gt_to_xe(gt));
+	drm_suballoc_dump_debug_info(&tile->mem.kernel_bb_pool->base, p,
 				     tile->mem.kernel_bb_pool->gpu_addr);
+	xe_pm_runtime_put(gt_to_xe(gt));
 
 	return 0;
 }
 
-static int topology(struct seq_file *m, void *data)
+static int topology(struct xe_gt *gt, struct drm_printer *p)
 {
-	struct xe_gt *gt = node_to_gt(m->private);
-	struct drm_printer p = drm_seq_file_printer(m);
-
-	xe_gt_topology_dump(gt, &p);
+	xe_pm_runtime_get(gt_to_xe(gt));
+	xe_gt_topology_dump(gt, p);
+	xe_pm_runtime_put(gt_to_xe(gt));
 
 	return 0;
 }
 
-static int steering(struct seq_file *m, void *data)
+static int steering(struct xe_gt *gt, struct drm_printer *p)
 {
-	struct xe_gt *gt = node_to_gt(m->private);
-	struct drm_printer p = drm_seq_file_printer(m);
-
-	xe_gt_mcr_steering_dump(gt, &p);
+	xe_pm_runtime_get(gt_to_xe(gt));
+	xe_gt_mcr_steering_dump(gt, p);
+	xe_pm_runtime_put(gt_to_xe(gt));
 
 	return 0;
 }
 
-static int ggtt(struct seq_file *m, void *data)
+static int ggtt(struct xe_gt *gt, struct drm_printer *p)
 {
-	struct xe_gt *gt = node_to_gt(m->private);
-	struct drm_printer p = drm_seq_file_printer(m);
+	int ret;
+
+	xe_pm_runtime_get(gt_to_xe(gt));
+	ret = xe_ggtt_dump(gt_to_tile(gt)->mem.ggtt, p);
+	xe_pm_runtime_put(gt_to_xe(gt));
 
-	return xe_ggtt_dump(gt_to_tile(gt)->mem.ggtt, &p);
+	return ret;
 }
 
-static int register_save_restore(struct seq_file *m, void *data)
+static int register_save_restore(struct xe_gt *gt, struct drm_printer *p)
 {
-	struct xe_gt *gt = node_to_gt(m->private);
-	struct drm_printer p = drm_seq_file_printer(m);
 	struct xe_hw_engine *hwe;
 	enum xe_hw_engine_id id;
 
-	xe_reg_sr_dump(&gt->reg_sr, &p);
-	drm_printf(&p, "\n");
+	xe_pm_runtime_get(gt_to_xe(gt));
 
-	drm_printf(&p, "Engine\n");
+	xe_reg_sr_dump(&gt->reg_sr, p);
+	drm_printf(p, "\n");
+
+	drm_printf(p, "Engine\n");
 	for_each_hw_engine(hwe, gt, id)
-		xe_reg_sr_dump(&hwe->reg_sr, &p);
-	drm_printf(&p, "\n");
+		xe_reg_sr_dump(&hwe->reg_sr, p);
+	drm_printf(p, "\n");
 
-	drm_printf(&p, "LRC\n");
+	drm_printf(p, "LRC\n");
 	for_each_hw_engine(hwe, gt, id)
-		xe_reg_sr_dump(&hwe->reg_lrc, &p);
-	drm_printf(&p, "\n");
+		xe_reg_sr_dump(&hwe->reg_lrc, p);
+	drm_printf(p, "\n");
 
-	drm_printf(&p, "Whitelist\n");
+	drm_printf(p, "Whitelist\n");
 	for_each_hw_engine(hwe, gt, id)
-		xe_reg_whitelist_dump(&hwe->reg_whitelist, &p);
+		xe_reg_whitelist_dump(&hwe->reg_whitelist, p);
+
+	xe_pm_runtime_put(gt_to_xe(gt));
 
 	return 0;
 }
 
-static int workarounds(struct seq_file *m, void *data)
+static int workarounds(struct xe_gt *gt, struct drm_printer *p)
 {
-	struct xe_gt *gt = node_to_gt(m->private);
-	struct drm_printer p = drm_seq_file_printer(m);
-
-	xe_wa_dump(gt, &p);
+	xe_pm_runtime_get(gt_to_xe(gt));
+	xe_wa_dump(gt, p);
+	xe_pm_runtime_put(gt_to_xe(gt));
 
 	return 0;
 }
 
-static int pat(struct seq_file *m, void *data)
+static int pat(struct xe_gt *gt, struct drm_printer *p)
 {
-	struct xe_gt *gt = node_to_gt(m->private);
-	struct drm_printer p = drm_seq_file_printer(m);
-
-	xe_pat_dump(gt, &p);
+	xe_pm_runtime_get(gt_to_xe(gt));
+	xe_pat_dump(gt, p);
+	xe_pm_runtime_put(gt_to_xe(gt));
 
 	return 0;
 }
 
-static int rcs_default_lrc(struct seq_file *m, void *data)
+static int rcs_default_lrc(struct xe_gt *gt, struct drm_printer *p)
 {
-	struct drm_printer p = drm_seq_file_printer(m);
+	xe_pm_runtime_get(gt_to_xe(gt));
+	xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_RENDER);
+	xe_pm_runtime_put(gt_to_xe(gt));
 
-	xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_RENDER);
 	return 0;
 }
 
-static int ccs_default_lrc(struct seq_file *m, void *data)
+static int ccs_default_lrc(struct xe_gt *gt, struct drm_printer *p)
 {
-	struct drm_printer p = drm_seq_file_printer(m);
+	xe_pm_runtime_get(gt_to_xe(gt));
+	xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_COMPUTE);
+	xe_pm_runtime_put(gt_to_xe(gt));
 
-	xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_COMPUTE);
 	return 0;
 }
 
-static int bcs_default_lrc(struct seq_file *m, void *data)
+static int bcs_default_lrc(struct xe_gt *gt, struct drm_printer *p)
 {
-	struct drm_printer p = drm_seq_file_printer(m);
+	xe_pm_runtime_get(gt_to_xe(gt));
+	xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_COPY);
+	xe_pm_runtime_put(gt_to_xe(gt));
 
-	xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_COPY);
 	return 0;
 }
 
-static int vcs_default_lrc(struct seq_file *m, void *data)
+static int vcs_default_lrc(struct xe_gt *gt, struct drm_printer *p)
 {
-	struct drm_printer p = drm_seq_file_printer(m);
+	xe_pm_runtime_get(gt_to_xe(gt));
+	xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_VIDEO_DECODE);
+	xe_pm_runtime_put(gt_to_xe(gt));
 
-	xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_VIDEO_DECODE);
 	return 0;
 }
 
-static int vecs_default_lrc(struct seq_file *m, void *data)
+static int vecs_default_lrc(struct xe_gt *gt, struct drm_printer *p)
 {
-	struct drm_printer p = drm_seq_file_printer(m);
+	xe_pm_runtime_get(gt_to_xe(gt));
+	xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_VIDEO_ENHANCE);
+	xe_pm_runtime_put(gt_to_xe(gt));
 
-	xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_VIDEO_ENHANCE);
 	return 0;
 }
 
 static const struct drm_info_list debugfs_list[] = {
-	{"hw_engines", hw_engines, 0},
-	{"force_reset", force_reset, 0},
-	{"sa_info", sa_info, 0},
-	{"topology", topology, 0},
-	{"steering", steering, 0},
-	{"ggtt", ggtt, 0},
-	{"register-save-restore", register_save_restore, 0},
-	{"workarounds", workarounds, 0},
-	{"pat", pat, 0},
-	{"default_lrc_rcs", rcs_default_lrc},
-	{"default_lrc_ccs", ccs_default_lrc},
-	{"default_lrc_bcs", bcs_default_lrc},
-	{"default_lrc_vcs", vcs_default_lrc},
-	{"default_lrc_vecs", vecs_default_lrc},
+	{"hw_engines", .show = xe_gt_debugfs_simple_show, .data = hw_engines},
+	{"force_reset", .show = xe_gt_debugfs_simple_show, .data = force_reset},
+	{"sa_info", .show = xe_gt_debugfs_simple_show, .data = sa_info},
+	{"topology", .show = xe_gt_debugfs_simple_show, .data = topology},
+	{"steering", .show = xe_gt_debugfs_simple_show, .data = steering},
+	{"ggtt", .show = xe_gt_debugfs_simple_show, .data = ggtt},
+	{"register-save-restore", .show = xe_gt_debugfs_simple_show, .data = register_save_restore},
+	{"workarounds", .show = xe_gt_debugfs_simple_show, .data = workarounds},
+	{"pat", .show = xe_gt_debugfs_simple_show, .data = pat},
+	{"default_lrc_rcs", .show = xe_gt_debugfs_simple_show, .data = rcs_default_lrc},
+	{"default_lrc_ccs", .show = xe_gt_debugfs_simple_show, .data = ccs_default_lrc},
+	{"default_lrc_bcs", .show = xe_gt_debugfs_simple_show, .data = bcs_default_lrc},
+	{"default_lrc_vcs", .show = xe_gt_debugfs_simple_show, .data = vcs_default_lrc},
+	{"default_lrc_vecs", .show = xe_gt_debugfs_simple_show, .data = vecs_default_lrc},
 };
 
 void xe_gt_debugfs_register(struct xe_gt *gt)
@@ -212,13 +267,11 @@ void xe_gt_debugfs_register(struct xe_gt *gt)
 	struct xe_device *xe = gt_to_xe(gt);
 	struct drm_minor *minor = gt_to_xe(gt)->drm.primary;
 	struct dentry *root;
-	struct drm_info_list *local;
 	char name[8];
-	int i;
 
 	xe_gt_assert(gt, minor->debugfs_root);
 
-	sprintf(name, "gt%d", gt->info.id);
+	snprintf(name, sizeof(name), "gt%d", gt->info.id);
 	root = debugfs_create_dir(name, minor->debugfs_root);
 	if (IS_ERR(root)) {
 		drm_warn(&xe->drm, "Create GT directory failed");
@@ -226,22 +279,13 @@ void xe_gt_debugfs_register(struct xe_gt *gt)
 	}
 
 	/*
-	 * Allocate local copy as we need to pass in the GT to the debugfs
-	 * entry and drm_debugfs_create_files just references the drm_info_list
-	 * passed in (e.g. can't define this on the stack).
+	 * Store the xe_gt pointer as private data of the gt/ directory node
+	 * so other GT specific attributes under that directory may refer to
+	 * it by looking at its parent node private data.
 	 */
-#define DEBUGFS_SIZE	(ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list))
-	local = drmm_kmalloc(&xe->drm, DEBUGFS_SIZE, GFP_KERNEL);
-	if (!local)
-		return;
-
-	memcpy(local, debugfs_list, DEBUGFS_SIZE);
-#undef DEBUGFS_SIZE
-
-	for (i = 0; i < ARRAY_SIZE(debugfs_list); ++i)
-		local[i].data = gt;
+	root->d_inode->i_private = gt;
 
-	drm_debugfs_create_files(local,
+	drm_debugfs_create_files(debugfs_list,
 				 ARRAY_SIZE(debugfs_list),
 				 root, minor);
 
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.h b/drivers/gpu/drm/xe/xe_gt_debugfs.h
index 5a329f118a..05a6cc93c7 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.h
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.h
@@ -6,8 +6,10 @@
 #ifndef _XE_GT_DEBUGFS_H_
 #define _XE_GT_DEBUGFS_H_
 
+struct seq_file;
 struct xe_gt;
 
 void xe_gt_debugfs_register(struct xe_gt *gt);
+int xe_gt_debugfs_simple_show(struct seq_file *m, void *data);
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c
index e5b0f4ecdb..855de40e40 100644
--- a/drivers/gpu/drm/xe/xe_gt_freq.c
+++ b/drivers/gpu/drm/xe/xe_gt_freq.c
@@ -15,6 +15,7 @@
 #include "xe_gt_sysfs.h"
 #include "xe_gt_throttle_sysfs.h"
 #include "xe_guc_pc.h"
+#include "xe_pm.h"
 
 /**
  * DOC: Xe GT Frequency Management
@@ -49,12 +50,23 @@ dev_to_pc(struct device *dev)
 	return &kobj_to_gt(dev->kobj.parent)->uc.guc.pc;
 }
 
+static struct xe_device *
+dev_to_xe(struct device *dev)
+{
+	return gt_to_xe(kobj_to_gt(dev->kobj.parent));
+}
+
 static ssize_t act_freq_show(struct device *dev,
 			     struct device_attribute *attr, char *buf)
 {
 	struct xe_guc_pc *pc = dev_to_pc(dev);
+	u32 freq;
 
-	return sysfs_emit(buf, "%d\n", xe_guc_pc_get_act_freq(pc));
+	xe_pm_runtime_get(dev_to_xe(dev));
+	freq = xe_guc_pc_get_act_freq(pc);
+	xe_pm_runtime_put(dev_to_xe(dev));
+
+	return sysfs_emit(buf, "%d\n", freq);
 }
 static DEVICE_ATTR_RO(act_freq);
 
@@ -65,7 +77,9 @@ static ssize_t cur_freq_show(struct device *dev,
 	u32 freq;
 	ssize_t ret;
 
+	xe_pm_runtime_get(dev_to_xe(dev));
 	ret = xe_guc_pc_get_cur_freq(pc, &freq);
+	xe_pm_runtime_put(dev_to_xe(dev));
 	if (ret)
 		return ret;
 
@@ -77,8 +91,13 @@ static ssize_t rp0_freq_show(struct device *dev,
 			     struct device_attribute *attr, char *buf)
 {
 	struct xe_guc_pc *pc = dev_to_pc(dev);
+	u32 freq;
 
-	return sysfs_emit(buf, "%d\n", xe_guc_pc_get_rp0_freq(pc));
+	xe_pm_runtime_get(dev_to_xe(dev));
+	freq = xe_guc_pc_get_rp0_freq(pc);
+	xe_pm_runtime_put(dev_to_xe(dev));
+
+	return sysfs_emit(buf, "%d\n", freq);
 }
 static DEVICE_ATTR_RO(rp0_freq);
 
@@ -86,8 +105,13 @@ static ssize_t rpe_freq_show(struct device *dev,
 			     struct device_attribute *attr, char *buf)
 {
 	struct xe_guc_pc *pc = dev_to_pc(dev);
+	u32 freq;
+
+	xe_pm_runtime_get(dev_to_xe(dev));
+	freq = xe_guc_pc_get_rpe_freq(pc);
+	xe_pm_runtime_put(dev_to_xe(dev));
 
-	return sysfs_emit(buf, "%d\n", xe_guc_pc_get_rpe_freq(pc));
+	return sysfs_emit(buf, "%d\n", freq);
 }
 static DEVICE_ATTR_RO(rpe_freq);
 
@@ -107,7 +131,9 @@ static ssize_t min_freq_show(struct device *dev,
 	u32 freq;
 	ssize_t ret;
 
+	xe_pm_runtime_get(dev_to_xe(dev));
 	ret = xe_guc_pc_get_min_freq(pc, &freq);
+	xe_pm_runtime_put(dev_to_xe(dev));
 	if (ret)
 		return ret;
 
@@ -125,7 +151,9 @@ static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr,
 	if (ret)
 		return ret;
 
+	xe_pm_runtime_get(dev_to_xe(dev));
 	ret = xe_guc_pc_set_min_freq(pc, freq);
+	xe_pm_runtime_put(dev_to_xe(dev));
 	if (ret)
 		return ret;
 
@@ -140,7 +168,9 @@ static ssize_t max_freq_show(struct device *dev,
 	u32 freq;
 	ssize_t ret;
 
+	xe_pm_runtime_get(dev_to_xe(dev));
 	ret = xe_guc_pc_get_max_freq(pc, &freq);
+	xe_pm_runtime_put(dev_to_xe(dev));
 	if (ret)
 		return ret;
 
@@ -158,7 +188,9 @@ static ssize_t max_freq_store(struct device *dev, struct device_attribute *attr,
 	if (ret)
 		return ret;
 
+	xe_pm_runtime_get(dev_to_xe(dev));
 	ret = xe_guc_pc_set_max_freq(pc, freq);
+	xe_pm_runtime_put(dev_to_xe(dev));
 	if (ret)
 		return ret;
 
@@ -190,33 +222,28 @@ static void freq_fini(struct drm_device *drm, void *arg)
  * @gt: Xe GT object
  *
  * It needs to be initialized after GT Sysfs and GuC PC components are ready.
+ *
+ * Returns: Returns error value for failure and 0 for success.
  */
-void xe_gt_freq_init(struct xe_gt *gt)
+int xe_gt_freq_init(struct xe_gt *gt)
 {
 	struct xe_device *xe = gt_to_xe(gt);
 	int err;
 
 	if (xe->info.skip_guc_pc)
-		return;
+		return 0;
 
 	gt->freq = kobject_create_and_add("freq0", gt->sysfs);
-	if (!gt->freq) {
-		drm_warn(&xe->drm, "failed to add freq0 directory to %s\n",
-			 kobject_name(gt->sysfs));
-		return;
-	}
+	if (!gt->freq)
+		return -ENOMEM;
 
 	err = drmm_add_action_or_reset(&xe->drm, freq_fini, gt->freq);
-	if (err) {
-		drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
-			 __func__, err);
-		return;
-	}
+	if (err)
+		return err;
 
 	err = sysfs_create_files(gt->freq, freq_attrs);
 	if (err)
-		drm_warn(&xe->drm,  "failed to add freq attrs to %s, err: %d\n",
-			 kobject_name(gt->freq), err);
+		return err;
 
-	xe_gt_throttle_sysfs_init(gt);
+	return xe_gt_throttle_sysfs_init(gt);
 }
diff --git a/drivers/gpu/drm/xe/xe_gt_freq.h b/drivers/gpu/drm/xe/xe_gt_freq.h
index f3fe3c9049..b7fddbe7b9 100644
--- a/drivers/gpu/drm/xe/xe_gt_freq.h
+++ b/drivers/gpu/drm/xe/xe_gt_freq.h
@@ -8,6 +8,6 @@
 
 struct xe_gt;
 
-void xe_gt_freq_init(struct xe_gt *gt);
+int xe_gt_freq_init(struct xe_gt *gt);
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c
index 9fcae65b64..944770fb2d 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle.c
+++ b/drivers/gpu/drm/xe/xe_gt_idle.c
@@ -12,6 +12,7 @@
 #include "xe_guc_pc.h"
 #include "regs/xe_gt_regs.h"
 #include "xe_mmio.h"
+#include "xe_pm.h"
 
 /**
  * DOC: Xe GT Idle
@@ -40,6 +41,15 @@ static struct xe_guc_pc *gtidle_to_pc(struct xe_gt_idle *gtidle)
 	return &gtidle_to_gt(gtidle)->uc.guc.pc;
 }
 
+static struct xe_device *
+pc_to_xe(struct xe_guc_pc *pc)
+{
+	struct xe_guc *guc = container_of(pc, struct xe_guc, pc);
+	struct xe_gt *gt = container_of(guc, struct xe_gt, uc.guc);
+
+	return gt_to_xe(gt);
+}
+
 static const char *gt_idle_state_to_string(enum xe_gt_idle_state state)
 {
 	switch (state) {
@@ -86,8 +96,14 @@ static ssize_t name_show(struct device *dev,
 			 struct device_attribute *attr, char *buff)
 {
 	struct xe_gt_idle *gtidle = dev_to_gtidle(dev);
+	struct xe_guc_pc *pc = gtidle_to_pc(gtidle);
+	ssize_t ret;
+
+	xe_pm_runtime_get(pc_to_xe(pc));
+	ret = sysfs_emit(buff, "%s\n", gtidle->name);
+	xe_pm_runtime_put(pc_to_xe(pc));
 
-	return sysfs_emit(buff, "%s\n", gtidle->name);
+	return ret;
 }
 static DEVICE_ATTR_RO(name);
 
@@ -98,7 +114,9 @@ static ssize_t idle_status_show(struct device *dev,
 	struct xe_guc_pc *pc = gtidle_to_pc(gtidle);
 	enum xe_gt_idle_state state;
 
+	xe_pm_runtime_get(pc_to_xe(pc));
 	state = gtidle->idle_status(pc);
+	xe_pm_runtime_put(pc_to_xe(pc));
 
 	return sysfs_emit(buff, "%s\n", gt_idle_state_to_string(state));
 }
@@ -111,7 +129,10 @@ static ssize_t idle_residency_ms_show(struct device *dev,
 	struct xe_guc_pc *pc = gtidle_to_pc(gtidle);
 	u64 residency;
 
+	xe_pm_runtime_get(pc_to_xe(pc));
 	residency = gtidle->idle_residency(pc);
+	xe_pm_runtime_put(pc_to_xe(pc));
+
 	return sysfs_emit(buff, "%llu\n", get_residency_ms(gtidle, residency));
 }
 static DEVICE_ATTR_RO(idle_residency_ms);
@@ -126,12 +147,19 @@ static const struct attribute *gt_idle_attrs[] = {
 static void gt_idle_sysfs_fini(struct drm_device *drm, void *arg)
 {
 	struct kobject *kobj = arg;
+	struct xe_gt *gt = kobj_to_gt(kobj->parent);
+
+	if (gt_to_xe(gt)->info.skip_guc_pc) {
+		XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FW_GT));
+		xe_gt_idle_disable_c6(gt);
+		xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+	}
 
 	sysfs_remove_files(kobj, gt_idle_attrs);
 	kobject_put(kobj);
 }
 
-void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle)
+int xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle)
 {
 	struct xe_gt *gt = gtidle_to_gt(gtidle);
 	struct xe_device *xe = gt_to_xe(gt);
@@ -139,16 +167,14 @@ void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle)
 	int err;
 
 	kobj = kobject_create_and_add("gtidle", gt->sysfs);
-	if (!kobj) {
-		drm_warn(&xe->drm, "%s failed, err: %d\n", __func__, -ENOMEM);
-		return;
-	}
+	if (!kobj)
+		return -ENOMEM;
 
 	if (xe_gt_is_media_type(gt)) {
-		sprintf(gtidle->name, "gt%d-mc", gt->info.id);
+		snprintf(gtidle->name, sizeof(gtidle->name), "gt%d-mc", gt->info.id);
 		gtidle->idle_residency = xe_guc_pc_mc6_residency;
 	} else {
-		sprintf(gtidle->name, "gt%d-rc", gt->info.id);
+		snprintf(gtidle->name, sizeof(gtidle->name), "gt%d-rc", gt->info.id);
 		gtidle->idle_residency = xe_guc_pc_rc6_residency;
 	}
 
@@ -159,14 +185,10 @@ void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle)
 	err = sysfs_create_files(kobj, gt_idle_attrs);
 	if (err) {
 		kobject_put(kobj);
-		drm_warn(&xe->drm, "failed to register gtidle sysfs, err: %d\n", err);
-		return;
+		return err;
 	}
 
-	err = drmm_add_action_or_reset(&xe->drm, gt_idle_sysfs_fini, kobj);
-	if (err)
-		drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
-			 __func__, err);
+	return drmm_add_action_or_reset(&xe->drm, gt_idle_sysfs_fini, kobj);
 }
 
 void xe_gt_idle_enable_c6(struct xe_gt *gt)
@@ -184,7 +206,7 @@ void xe_gt_idle_enable_c6(struct xe_gt *gt)
 void xe_gt_idle_disable_c6(struct xe_gt *gt)
 {
 	xe_device_assert_mem_access(gt_to_xe(gt));
-	xe_force_wake_assert_held(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
 
 	xe_mmio_write32(gt, PG_ENABLE, 0);
 	xe_mmio_write32(gt, RC_CONTROL, 0);
diff --git a/drivers/gpu/drm/xe/xe_gt_idle.h b/drivers/gpu/drm/xe/xe_gt_idle.h
index 69280fd16b..75bd99659b 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle.h
+++ b/drivers/gpu/drm/xe/xe_gt_idle.h
@@ -10,7 +10,7 @@
 
 struct xe_gt;
 
-void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle);
+int xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle);
 void xe_gt_idle_enable_c6(struct xe_gt *gt);
 void xe_gt_idle_disable_c6(struct xe_gt *gt);
 
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index 8546cd3cc5..0443e07880 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -6,10 +6,12 @@
 #include "xe_gt_mcr.h"
 
 #include "regs/xe_gt_regs.h"
+#include "xe_assert.h"
 #include "xe_gt.h"
 #include "xe_gt_topology.h"
 #include "xe_gt_types.h"
 #include "xe_mmio.h"
+#include "xe_sriov.h"
 
 /**
  * DOC: GT Multicast/Replicated (MCR) Register Support
@@ -38,6 +40,8 @@
  * ``init_steering_*()`` functions is to apply the platform-specific rules for
  * each MCR register type to identify a steering target that will select a
  * non-terminated instance.
+ *
+ * MCR registers are not available on Virtual Function (VF).
  */
 
 #define STEER_SEMAPHORE		XE_REG(0xFD0)
@@ -291,14 +295,40 @@ static void init_steering_mslice(struct xe_gt *gt)
 	gt->steering[LNCF].instance_target = 0;		/* unused */
 }
 
-static void init_steering_dss(struct xe_gt *gt)
+static unsigned int dss_per_group(struct xe_gt *gt)
+{
+	if (gt_to_xe(gt)->info.platform == XE_PVC)
+		return 8;
+	else if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250)
+		return 4;
+	else
+		return 6;
+}
+
+/**
+ * xe_gt_mcr_get_dss_steering - Get the group/instance steering for a DSS
+ * @gt: GT structure
+ * @dss: DSS ID to obtain steering for
+ * @group: pointer to storage for steering group ID
+ * @instance: pointer to storage for steering instance ID
+ */
+void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance)
 {
-	unsigned int dss = min(xe_dss_mask_group_ffs(gt->fuse_topo.g_dss_mask, 0, 0),
-			       xe_dss_mask_group_ffs(gt->fuse_topo.c_dss_mask, 0, 0));
-	unsigned int dss_per_grp = gt_to_xe(gt)->info.platform == XE_PVC ? 8 : 4;
+	int dss_per_grp = dss_per_group(gt);
+
+	xe_gt_assert(gt, dss < XE_MAX_DSS_FUSE_BITS);
 
-	gt->steering[DSS].group_target = dss / dss_per_grp;
-	gt->steering[DSS].instance_target = dss % dss_per_grp;
+	*group = dss / dss_per_grp;
+	*instance = dss % dss_per_grp;
+}
+
+static void init_steering_dss(struct xe_gt *gt)
+{
+	xe_gt_mcr_get_dss_steering(gt,
+				   min(xe_dss_mask_group_ffs(gt->fuse_topo.g_dss_mask, 0, 0),
+				       xe_dss_mask_group_ffs(gt->fuse_topo.c_dss_mask, 0, 0)),
+				   &gt->steering[DSS].group_target,
+				   &gt->steering[DSS].instance_target);
 }
 
 static void init_steering_oaddrm(struct xe_gt *gt)
@@ -312,7 +342,7 @@ static void init_steering_oaddrm(struct xe_gt *gt)
 	else
 		gt->steering[OADDRM].group_target = 1;
 
-	gt->steering[DSS].instance_target = 0;		/* unused */
+	gt->steering[OADDRM].instance_target = 0;	/* unused */
 }
 
 static void init_steering_sqidi_psmi(struct xe_gt *gt)
@@ -327,8 +357,8 @@ static void init_steering_sqidi_psmi(struct xe_gt *gt)
 
 static void init_steering_inst0(struct xe_gt *gt)
 {
-	gt->steering[DSS].group_target = 0;		/* unused */
-	gt->steering[DSS].instance_target = 0;		/* unused */
+	gt->steering[INSTANCE0].group_target = 0;	/* unused */
+	gt->steering[INSTANCE0].instance_target = 0;	/* unused */
 }
 
 static const struct {
@@ -352,6 +382,9 @@ void xe_gt_mcr_init(struct xe_gt *gt)
 	BUILD_BUG_ON(IMPLICIT_STEERING + 1 != NUM_STEERING_TYPES);
 	BUILD_BUG_ON(ARRAY_SIZE(xe_steering_types) != NUM_STEERING_TYPES);
 
+	if (IS_SRIOV_VF(xe))
+		return;
+
 	spin_lock_init(&gt->mcr_lock);
 
 	if (gt->info.type == XE_GT_TYPE_MEDIA) {
@@ -405,6 +438,9 @@ void xe_gt_mcr_set_implicit_defaults(struct xe_gt *gt)
 {
 	struct xe_device *xe = gt_to_xe(gt);
 
+	if (IS_SRIOV_VF(xe))
+		return;
+
 	if (xe->info.platform == XE_DG2) {
 		u32 steer_val = REG_FIELD_PREP(MCR_SLICE_MASK, 0) |
 			REG_FIELD_PREP(MCR_SUBSLICE_MASK, 2);
@@ -588,6 +624,8 @@ u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, struct xe_reg_mcr reg_mcr)
 	u32 val;
 	bool steer;
 
+	xe_gt_assert(gt, !IS_SRIOV_VF(gt_to_xe(gt)));
+
 	steer = xe_gt_mcr_get_nonterminated_steering(gt, reg_mcr,
 						     &group, &instance);
 
@@ -619,6 +657,8 @@ u32 xe_gt_mcr_unicast_read(struct xe_gt *gt,
 {
 	u32 val;
 
+	xe_gt_assert(gt, !IS_SRIOV_VF(gt_to_xe(gt)));
+
 	mcr_lock(gt);
 	val = rw_with_mcr_steering(gt, reg_mcr, MCR_OP_READ, group, instance, 0);
 	mcr_unlock(gt);
@@ -640,6 +680,8 @@ u32 xe_gt_mcr_unicast_read(struct xe_gt *gt,
 void xe_gt_mcr_unicast_write(struct xe_gt *gt, struct xe_reg_mcr reg_mcr,
 			     u32 value, int group, int instance)
 {
+	xe_gt_assert(gt, !IS_SRIOV_VF(gt_to_xe(gt)));
+
 	mcr_lock(gt);
 	rw_with_mcr_steering(gt, reg_mcr, MCR_OP_WRITE, group, instance, value);
 	mcr_unlock(gt);
@@ -658,6 +700,8 @@ void xe_gt_mcr_multicast_write(struct xe_gt *gt, struct xe_reg_mcr reg_mcr,
 {
 	struct xe_reg reg = to_xe_reg(reg_mcr);
 
+	xe_gt_assert(gt, !IS_SRIOV_VF(gt_to_xe(gt)));
+
 	/*
 	 * Synchronize with any unicast operations.  Once we have exclusive
 	 * access, the MULTICAST bit should already be set, so there's no need
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h
index 27ca1bc880..a7f4ab1aa5 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.h
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.h
@@ -7,6 +7,7 @@
 #define _XE_GT_MCR_H_
 
 #include "regs/xe_reg_defs.h"
+#include "xe_gt_topology.h"
 
 struct drm_printer;
 struct xe_gt;
@@ -25,5 +26,18 @@ void xe_gt_mcr_multicast_write(struct xe_gt *gt, struct xe_reg_mcr mcr_reg,
 			       u32 value);
 
 void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p);
+void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance);
+
+/*
+ * Loop over each DSS and determine the group and instance IDs that
+ * should be used to steer MCR accesses toward this DSS.
+ * @dss: DSS ID to obtain steering for
+ * @gt: GT structure
+ * @group: steering group ID, data type: u16
+ * @instance: steering instance ID, data type: u16
+ */
+#define for_each_dss_steering(dss, gt, group, instance) \
+	for_each_dss((dss), (gt)) \
+		for_each_if((xe_gt_mcr_get_dss_steering((gt), (dss), &(group), &(instance)), true))
 
 #endif /* _XE_GT_MCR_H_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index ab8536c4fd..fa9e9853c5 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -100,10 +100,9 @@ static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma,
 {
 	struct xe_bo *bo = xe_vma_bo(vma);
 	struct xe_vm *vm = xe_vma_vm(vma);
-	unsigned int num_shared = 2; /* slots for bind + move */
 	int err;
 
-	err = xe_vm_prepare_vma(exec, vma, num_shared);
+	err = xe_vm_lock_vma(exec, vma);
 	if (err)
 		return err;
 
@@ -287,9 +286,9 @@ static bool get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf)
 	bool ret = false;
 
 	spin_lock_irq(&pf_queue->lock);
-	if (pf_queue->head != pf_queue->tail) {
+	if (pf_queue->tail != pf_queue->head) {
 		desc = (const struct xe_guc_pagefault_desc *)
-			(pf_queue->data + pf_queue->head);
+			(pf_queue->data + pf_queue->tail);
 
 		pf->fault_level = FIELD_GET(PFD_FAULT_LEVEL, desc->dw0);
 		pf->trva_fault = FIELD_GET(XE2_PFD_TRVA_FAULT, desc->dw0);
@@ -307,7 +306,7 @@ static bool get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf)
 		pf->page_addr |= FIELD_GET(PFD_VIRTUAL_ADDR_LO, desc->dw2) <<
 			PFD_VIRTUAL_ADDR_LO_SHIFT;
 
-		pf_queue->head = (pf_queue->head + PF_MSG_LEN_DW) %
+		pf_queue->tail = (pf_queue->tail + PF_MSG_LEN_DW) %
 			PF_QUEUE_NUM_DW;
 		ret = true;
 	}
@@ -320,7 +319,7 @@ static bool pf_queue_full(struct pf_queue *pf_queue)
 {
 	lockdep_assert_held(&pf_queue->lock);
 
-	return CIRC_SPACE(pf_queue->tail, pf_queue->head, PF_QUEUE_NUM_DW) <=
+	return CIRC_SPACE(pf_queue->head, pf_queue->tail, PF_QUEUE_NUM_DW) <=
 		PF_MSG_LEN_DW;
 }
 
@@ -333,6 +332,11 @@ int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len)
 	u32 asid;
 	bool full;
 
+	/*
+	 * The below logic doesn't work unless PF_QUEUE_NUM_DW % PF_MSG_LEN_DW == 0
+	 */
+	BUILD_BUG_ON(PF_QUEUE_NUM_DW % PF_MSG_LEN_DW);
+
 	if (unlikely(len != PF_MSG_LEN_DW))
 		return -EPROTO;
 
@@ -342,8 +346,8 @@ int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len)
 	spin_lock_irqsave(&pf_queue->lock, flags);
 	full = pf_queue_full(pf_queue);
 	if (!full) {
-		memcpy(pf_queue->data + pf_queue->tail, msg, len * sizeof(u32));
-		pf_queue->tail = (pf_queue->tail + len) % PF_QUEUE_NUM_DW;
+		memcpy(pf_queue->data + pf_queue->head, msg, len * sizeof(u32));
+		pf_queue->head = (pf_queue->head + len) % PF_QUEUE_NUM_DW;
 		queue_work(gt->usm.pf_wq, &pf_queue->worker);
 	} else {
 		drm_warn(&xe->drm, "PF Queue full, shouldn't be possible");
@@ -389,7 +393,7 @@ static void pf_queue_work_func(struct work_struct *w)
 		send_pagefault_reply(&gt->uc.guc, &reply);
 
 		if (time_after(jiffies, threshold) &&
-		    pf_queue->head != pf_queue->tail) {
+		    pf_queue->tail != pf_queue->head) {
 			queue_work(gt->usm.pf_wq, w);
 			break;
 		}
@@ -564,9 +568,9 @@ static bool get_acc(struct acc_queue *acc_queue, struct acc *acc)
 	bool ret = false;
 
 	spin_lock(&acc_queue->lock);
-	if (acc_queue->head != acc_queue->tail) {
+	if (acc_queue->tail != acc_queue->head) {
 		desc = (const struct xe_guc_acc_desc *)
-			(acc_queue->data + acc_queue->head);
+			(acc_queue->data + acc_queue->tail);
 
 		acc->granularity = FIELD_GET(ACC_GRANULARITY, desc->dw2);
 		acc->sub_granularity = FIELD_GET(ACC_SUBG_HI, desc->dw1) << 31 |
@@ -579,7 +583,7 @@ static bool get_acc(struct acc_queue *acc_queue, struct acc *acc)
 		acc->va_range_base = make_u64(desc->dw3 & ACC_VIRTUAL_ADDR_RANGE_HI,
 					      desc->dw2 & ACC_VIRTUAL_ADDR_RANGE_LO);
 
-		acc_queue->head = (acc_queue->head + ACC_MSG_LEN_DW) %
+		acc_queue->tail = (acc_queue->tail + ACC_MSG_LEN_DW) %
 				  ACC_QUEUE_NUM_DW;
 		ret = true;
 	}
@@ -607,7 +611,7 @@ static void acc_queue_work_func(struct work_struct *w)
 		}
 
 		if (time_after(jiffies, threshold) &&
-		    acc_queue->head != acc_queue->tail) {
+		    acc_queue->tail != acc_queue->head) {
 			queue_work(gt->usm.acc_wq, w);
 			break;
 		}
@@ -618,7 +622,7 @@ static bool acc_queue_full(struct acc_queue *acc_queue)
 {
 	lockdep_assert_held(&acc_queue->lock);
 
-	return CIRC_SPACE(acc_queue->tail, acc_queue->head, ACC_QUEUE_NUM_DW) <=
+	return CIRC_SPACE(acc_queue->head, acc_queue->tail, ACC_QUEUE_NUM_DW) <=
 		ACC_MSG_LEN_DW;
 }
 
@@ -629,6 +633,11 @@ int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len)
 	u32 asid;
 	bool full;
 
+	/*
+	 * The below logic doesn't work unless ACC_QUEUE_NUM_DW % ACC_MSG_LEN_DW == 0
+	 */
+	BUILD_BUG_ON(ACC_QUEUE_NUM_DW % ACC_MSG_LEN_DW);
+
 	if (unlikely(len != ACC_MSG_LEN_DW))
 		return -EPROTO;
 
@@ -638,9 +647,9 @@ int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len)
 	spin_lock(&acc_queue->lock);
 	full = acc_queue_full(acc_queue);
 	if (!full) {
-		memcpy(acc_queue->data + acc_queue->tail, msg,
+		memcpy(acc_queue->data + acc_queue->head, msg,
 		       len * sizeof(u32));
-		acc_queue->tail = (acc_queue->tail + len) % ACC_QUEUE_NUM_DW;
+		acc_queue->head = (acc_queue->head + len) % ACC_QUEUE_NUM_DW;
 		queue_work(gt->usm.acc_wq, &acc_queue->worker);
 	} else {
 		drm_warn(&gt_to_xe(gt)->drm, "ACC Queue full, dropping ACC");
diff --git a/drivers/gpu/drm/xe/xe_gt_printk.h b/drivers/gpu/drm/xe/xe_gt_printk.h
index 5991bcadd4..c2b004d3f4 100644
--- a/drivers/gpu/drm/xe/xe_gt_printk.h
+++ b/drivers/gpu/drm/xe/xe_gt_printk.h
@@ -43,4 +43,48 @@
 #define xe_gt_WARN_ON_ONCE(_gt, _condition) \
 	xe_gt_WARN_ONCE((_gt), _condition, "%s(%s)", "gt_WARN_ON_ONCE", __stringify(_condition))
 
+static inline void __xe_gt_printfn_err(struct drm_printer *p, struct va_format *vaf)
+{
+	struct xe_gt *gt = p->arg;
+
+	xe_gt_err(gt, "%pV", vaf);
+}
+
+static inline void __xe_gt_printfn_info(struct drm_printer *p, struct va_format *vaf)
+{
+	struct xe_gt *gt = p->arg;
+
+	xe_gt_info(gt, "%pV", vaf);
+}
+
+/**
+ * xe_gt_err_printer - Construct a &drm_printer that outputs to xe_gt_err()
+ * @gt: the &xe_gt pointer to use in xe_gt_err()
+ *
+ * Return: The &drm_printer object.
+ */
+static inline struct drm_printer xe_gt_err_printer(struct xe_gt *gt)
+{
+	struct drm_printer p = {
+		.printfn = __xe_gt_printfn_err,
+		.arg = gt,
+	};
+	return p;
+}
+
+/**
+ * xe_gt_info_printer - Construct a &drm_printer that outputs to xe_gt_info()
+ * @gt: the &xe_gt pointer to use in xe_gt_info()
+ *
+ * Return: The &drm_printer object.
+ */
+static inline struct drm_printer xe_gt_info_printer(struct xe_gt *gt)
+{
+	struct drm_printer p = {
+		.printfn = __xe_gt_printfn_info,
+		.arg = gt,
+	};
+	return p;
+}
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
new file mode 100644
index 0000000000..791dcdd767
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "xe_gt_sriov_pf.h"
+#include "xe_gt_sriov_pf_helpers.h"
+
+/*
+ * VF's metadata is maintained in the flexible array where:
+ *   - entry [0] contains metadata for the PF (only if applicable),
+ *   - entries [1..n] contain metadata for VF1..VFn::
+ *
+ *       <--------------------------- 1 + total_vfs ----------->
+ *      +-------+-------+-------+-----------------------+-------+
+ *      |   0   |   1   |   2   |                       |   n   |
+ *      +-------+-------+-------+-----------------------+-------+
+ *      |  PF   |  VF1  |  VF2  |      ...     ...      |  VFn  |
+ *      +-------+-------+-------+-----------------------+-------+
+ */
+static int pf_alloc_metadata(struct xe_gt *gt)
+{
+	unsigned int num_vfs = xe_gt_sriov_pf_get_totalvfs(gt);
+
+	gt->sriov.pf.vfs = drmm_kcalloc(&gt_to_xe(gt)->drm, 1 + num_vfs,
+					sizeof(*gt->sriov.pf.vfs), GFP_KERNEL);
+	if (!gt->sriov.pf.vfs)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/**
+ * xe_gt_sriov_pf_init_early - Prepare SR-IOV PF data structures on PF.
+ * @gt: the &xe_gt to initialize
+ *
+ * Early initialization of the PF data.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_init_early(struct xe_gt *gt)
+{
+	int err;
+
+	err = pf_alloc_metadata(gt);
+	if (err)
+		return err;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
new file mode 100644
index 0000000000..05142ffc43
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#ifndef _XE_GT_SRIOV_PF_H_
+#define _XE_GT_SRIOV_PF_H_
+
+struct xe_gt;
+
+#ifdef CONFIG_PCI_IOV
+int xe_gt_sriov_pf_init_early(struct xe_gt *gt);
+#else
+static inline int xe_gt_sriov_pf_init_early(struct xe_gt *gt)
+{
+	return 0;
+}
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
new file mode 100644
index 0000000000..4f40fe24c6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
@@ -0,0 +1,1990 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#include <linux/string_choices.h>
+#include <linux/wordpart.h>
+
+#include "abi/guc_actions_sriov_abi.h"
+#include "abi/guc_klvs_abi.h"
+
+#include "regs/xe_guc_regs.h"
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_ggtt.h"
+#include "xe_gt.h"
+#include "xe_gt_sriov_pf_config.h"
+#include "xe_gt_sriov_pf_helpers.h"
+#include "xe_gt_sriov_pf_policy.h"
+#include "xe_gt_sriov_printk.h"
+#include "xe_guc.h"
+#include "xe_guc_ct.h"
+#include "xe_guc_db_mgr.h"
+#include "xe_guc_fwif.h"
+#include "xe_guc_id_mgr.h"
+#include "xe_guc_klv_helpers.h"
+#include "xe_guc_submit.h"
+#include "xe_lmtt.h"
+#include "xe_map.h"
+#include "xe_sriov.h"
+#include "xe_ttm_vram_mgr.h"
+#include "xe_wopcm.h"
+
+/*
+ * Return: number of KLVs that were successfully parsed and saved,
+ *         negative error code on failure.
+ */
+static int guc_action_update_vf_cfg(struct xe_guc *guc, u32 vfid,
+				    u64 addr, u32 size)
+{
+	u32 request[] = {
+		GUC_ACTION_PF2GUC_UPDATE_VF_CFG,
+		vfid,
+		lower_32_bits(addr),
+		upper_32_bits(addr),
+		size,
+	};
+
+	return xe_guc_ct_send_block(&guc->ct, request, ARRAY_SIZE(request));
+}
+
+/*
+ * Return: 0 on success, negative error code on failure.
+ */
+static int pf_send_vf_cfg_reset(struct xe_gt *gt, u32 vfid)
+{
+	struct xe_guc *guc = &gt->uc.guc;
+	int ret;
+
+	ret = guc_action_update_vf_cfg(guc, vfid, 0, 0);
+
+	return ret <= 0 ? ret : -EPROTO;
+}
+
+/*
+ * Return: number of KLVs that were successfully parsed and saved,
+ *         negative error code on failure.
+ */
+static int pf_send_vf_cfg_klvs(struct xe_gt *gt, u32 vfid, const u32 *klvs, u32 num_dwords)
+{
+	const u32 bytes = num_dwords * sizeof(u32);
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_device *xe = tile_to_xe(tile);
+	struct xe_guc *guc = &gt->uc.guc;
+	struct xe_bo *bo;
+	int ret;
+
+	bo = xe_bo_create_pin_map(xe, tile, NULL,
+				  ALIGN(bytes, PAGE_SIZE),
+				  ttm_bo_type_kernel,
+				  XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+				  XE_BO_FLAG_GGTT |
+				  XE_BO_FLAG_GGTT_INVALIDATE);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	xe_map_memcpy_to(xe, &bo->vmap, 0, klvs, bytes);
+
+	ret = guc_action_update_vf_cfg(guc, vfid, xe_bo_ggtt_addr(bo), num_dwords);
+
+	xe_bo_unpin_map_no_vm(bo);
+
+	return ret;
+}
+
+/*
+ * Return: 0 on success, -ENOKEY if some KLVs were not updated, -EPROTO if reply was malformed,
+ *         negative error code on failure.
+ */
+static int pf_push_vf_cfg_klvs(struct xe_gt *gt, unsigned int vfid, u32 num_klvs,
+			       const u32 *klvs, u32 num_dwords)
+{
+	int ret;
+
+	xe_gt_assert(gt, num_klvs == xe_guc_klv_count(klvs, num_dwords));
+
+	ret = pf_send_vf_cfg_klvs(gt, vfid, klvs, num_dwords);
+
+	if (ret != num_klvs) {
+		int err = ret < 0 ? ret : ret < num_klvs ? -ENOKEY : -EPROTO;
+		struct drm_printer p = xe_gt_info_printer(gt);
+		char name[8];
+
+		xe_gt_sriov_notice(gt, "Failed to push %s %u config KLV%s (%pe)\n",
+				   xe_sriov_function_name(vfid, name, sizeof(name)),
+				   num_klvs, str_plural(num_klvs), ERR_PTR(err));
+		xe_guc_klv_print(klvs, num_dwords, &p);
+		return err;
+	}
+
+	if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) {
+		struct drm_printer p = xe_gt_info_printer(gt);
+
+		xe_guc_klv_print(klvs, num_dwords, &p);
+	}
+
+	return 0;
+}
+
+static int pf_push_vf_cfg_u32(struct xe_gt *gt, unsigned int vfid, u16 key, u32 value)
+{
+	u32 klv[] = {
+		FIELD_PREP(GUC_KLV_0_KEY, key) | FIELD_PREP(GUC_KLV_0_LEN, 1),
+		value,
+	};
+
+	return pf_push_vf_cfg_klvs(gt, vfid, 1, klv, ARRAY_SIZE(klv));
+}
+
+static int pf_push_vf_cfg_u64(struct xe_gt *gt, unsigned int vfid, u16 key, u64 value)
+{
+	u32 klv[] = {
+		FIELD_PREP(GUC_KLV_0_KEY, key) | FIELD_PREP(GUC_KLV_0_LEN, 2),
+		lower_32_bits(value),
+		upper_32_bits(value),
+	};
+
+	return pf_push_vf_cfg_klvs(gt, vfid, 1, klv, ARRAY_SIZE(klv));
+}
+
+static int pf_push_vf_cfg_ggtt(struct xe_gt *gt, unsigned int vfid, u64 start, u64 size)
+{
+	u32 klvs[] = {
+		PREP_GUC_KLV_TAG(VF_CFG_GGTT_START),
+		lower_32_bits(start),
+		upper_32_bits(start),
+		PREP_GUC_KLV_TAG(VF_CFG_GGTT_SIZE),
+		lower_32_bits(size),
+		upper_32_bits(size),
+	};
+
+	return pf_push_vf_cfg_klvs(gt, vfid, 2, klvs, ARRAY_SIZE(klvs));
+}
+
+static int pf_push_vf_cfg_ctxs(struct xe_gt *gt, unsigned int vfid, u32 begin, u32 num)
+{
+	u32 klvs[] = {
+		PREP_GUC_KLV_TAG(VF_CFG_BEGIN_CONTEXT_ID),
+		begin,
+		PREP_GUC_KLV_TAG(VF_CFG_NUM_CONTEXTS),
+		num,
+	};
+
+	return pf_push_vf_cfg_klvs(gt, vfid, 2, klvs, ARRAY_SIZE(klvs));
+}
+
+static int pf_push_vf_cfg_dbs(struct xe_gt *gt, unsigned int vfid, u32 begin, u32 num)
+{
+	u32 klvs[] = {
+		PREP_GUC_KLV_TAG(VF_CFG_BEGIN_DOORBELL_ID),
+		begin,
+		PREP_GUC_KLV_TAG(VF_CFG_NUM_DOORBELLS),
+		num,
+	};
+
+	return pf_push_vf_cfg_klvs(gt, vfid, 2, klvs, ARRAY_SIZE(klvs));
+}
+
+static int pf_push_vf_cfg_exec_quantum(struct xe_gt *gt, unsigned int vfid, u32 exec_quantum)
+{
+	return pf_push_vf_cfg_u32(gt, vfid, GUC_KLV_VF_CFG_EXEC_QUANTUM_KEY, exec_quantum);
+}
+
+static int pf_push_vf_cfg_preempt_timeout(struct xe_gt *gt, unsigned int vfid, u32 preempt_timeout)
+{
+	return pf_push_vf_cfg_u32(gt, vfid, GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY, preempt_timeout);
+}
+
+static int pf_push_vf_cfg_lmem(struct xe_gt *gt, unsigned int vfid, u64 size)
+{
+	return pf_push_vf_cfg_u64(gt, vfid, GUC_KLV_VF_CFG_LMEM_SIZE_KEY, size);
+}
+
+static struct xe_gt_sriov_config *pf_pick_vf_config(struct xe_gt *gt, unsigned int vfid)
+{
+	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+	xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
+	lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+	return &gt->sriov.pf.vfs[vfid].config;
+}
+
+/* Return: number of configuration dwords written */
+static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config)
+{
+	u32 n = 0;
+
+	if (drm_mm_node_allocated(&config->ggtt_region)) {
+		cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_START);
+		cfg[n++] = lower_32_bits(config->ggtt_region.start);
+		cfg[n++] = upper_32_bits(config->ggtt_region.start);
+
+		cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_SIZE);
+		cfg[n++] = lower_32_bits(config->ggtt_region.size);
+		cfg[n++] = upper_32_bits(config->ggtt_region.size);
+	}
+
+	return n;
+}
+
+/* Return: number of configuration dwords written */
+static u32 encode_config(u32 *cfg, const struct xe_gt_sriov_config *config)
+{
+	u32 n = 0;
+
+	n += encode_config_ggtt(cfg, config);
+
+	cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_BEGIN_CONTEXT_ID);
+	cfg[n++] = config->begin_ctx;
+
+	cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_NUM_CONTEXTS);
+	cfg[n++] = config->num_ctxs;
+
+	cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_BEGIN_DOORBELL_ID);
+	cfg[n++] = config->begin_db;
+
+	cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_NUM_DOORBELLS);
+	cfg[n++] = config->num_dbs;
+
+	if (config->lmem_obj) {
+		cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_LMEM_SIZE);
+		cfg[n++] = lower_32_bits(config->lmem_obj->size);
+		cfg[n++] = upper_32_bits(config->lmem_obj->size);
+	}
+
+	cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_EXEC_QUANTUM);
+	cfg[n++] = config->exec_quantum;
+
+	cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_PREEMPT_TIMEOUT);
+	cfg[n++] = config->preempt_timeout;
+
+	return n;
+}
+
+static int pf_push_full_vf_config(struct xe_gt *gt, unsigned int vfid)
+{
+	struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
+	u32 max_cfg_dwords = SZ_4K / sizeof(u32);
+	u32 num_dwords;
+	int num_klvs;
+	u32 *cfg;
+	int err;
+
+	cfg = kcalloc(max_cfg_dwords, sizeof(u32), GFP_KERNEL);
+	if (!cfg)
+		return -ENOMEM;
+
+	num_dwords = encode_config(cfg, config);
+	xe_gt_assert(gt, num_dwords <= max_cfg_dwords);
+
+	if (xe_gt_is_media_type(gt)) {
+		struct xe_gt *primary = gt->tile->primary_gt;
+		struct xe_gt_sriov_config *other = pf_pick_vf_config(primary, vfid);
+
+		/* media-GT will never include a GGTT config */
+		xe_gt_assert(gt, !encode_config_ggtt(cfg + num_dwords, config));
+
+		/* the GGTT config must be taken from the primary-GT instead */
+		num_dwords += encode_config_ggtt(cfg + num_dwords, other);
+	}
+	xe_gt_assert(gt, num_dwords <= max_cfg_dwords);
+
+	num_klvs = xe_guc_klv_count(cfg, num_dwords);
+	err = pf_push_vf_cfg_klvs(gt, vfid, num_klvs, cfg, num_dwords);
+
+	kfree(cfg);
+	return err;
+}
+
+static u64 pf_get_ggtt_alignment(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	return IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K;
+}
+
+static u64 pf_get_min_spare_ggtt(struct xe_gt *gt)
+{
+	/* XXX: preliminary */
+	return IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV) ?
+		pf_get_ggtt_alignment(gt) : SZ_64M;
+}
+
+static u64 pf_get_spare_ggtt(struct xe_gt *gt)
+{
+	u64 spare;
+
+	xe_gt_assert(gt, !xe_gt_is_media_type(gt));
+	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+	lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+	spare = gt->sriov.pf.spare.ggtt_size;
+	spare = max_t(u64, spare, pf_get_min_spare_ggtt(gt));
+
+	return spare;
+}
+
+static int pf_set_spare_ggtt(struct xe_gt *gt, u64 size)
+{
+	xe_gt_assert(gt, !xe_gt_is_media_type(gt));
+	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+	lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+	if (size && size < pf_get_min_spare_ggtt(gt))
+		return -EINVAL;
+
+	size = round_up(size, pf_get_ggtt_alignment(gt));
+	gt->sriov.pf.spare.ggtt_size = size;
+
+	return 0;
+}
+
+static int pf_distribute_config_ggtt(struct xe_tile *tile, unsigned int vfid, u64 start, u64 size)
+{
+	int err, err2 = 0;
+
+	err = pf_push_vf_cfg_ggtt(tile->primary_gt, vfid, start, size);
+
+	if (tile->media_gt && !err)
+		err2 = pf_push_vf_cfg_ggtt(tile->media_gt, vfid, start, size);
+
+	return err ?: err2;
+}
+
+static void pf_release_ggtt(struct xe_tile *tile, struct drm_mm_node *node)
+{
+	struct xe_ggtt *ggtt = tile->mem.ggtt;
+
+	if (drm_mm_node_allocated(node)) {
+		/*
+		 * explicit GGTT PTE assignment to the PF using xe_ggtt_assign()
+		 * is redundant, as PTE will be implicitly re-assigned to PF by
+		 * the xe_ggtt_clear() called by below xe_ggtt_remove_node().
+		 */
+		xe_ggtt_remove_node(ggtt, node, false);
+	}
+}
+
+static void pf_release_vf_config_ggtt(struct xe_gt *gt, struct xe_gt_sriov_config *config)
+{
+	pf_release_ggtt(gt_to_tile(gt), &config->ggtt_region);
+}
+
+static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size)
+{
+	struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
+	struct drm_mm_node *node = &config->ggtt_region;
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_ggtt *ggtt = tile->mem.ggtt;
+	u64 alignment = pf_get_ggtt_alignment(gt);
+	int err;
+
+	xe_gt_assert(gt, vfid);
+	xe_gt_assert(gt, !xe_gt_is_media_type(gt));
+	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+
+	size = round_up(size, alignment);
+
+	if (drm_mm_node_allocated(node)) {
+		err = pf_distribute_config_ggtt(tile, vfid, 0, 0);
+		if (unlikely(err))
+			return err;
+
+		pf_release_ggtt(tile, node);
+	}
+	xe_gt_assert(gt, !drm_mm_node_allocated(node));
+
+	if (!size)
+		return 0;
+
+	err = xe_ggtt_insert_special_node(ggtt, node, size, alignment);
+	if (unlikely(err))
+		return err;
+
+	xe_ggtt_assign(ggtt, node, vfid);
+	xe_gt_sriov_dbg_verbose(gt, "VF%u assigned GGTT %llx-%llx\n",
+				vfid, node->start, node->start + node->size - 1);
+
+	err = pf_distribute_config_ggtt(gt->tile, vfid, node->start, node->size);
+	if (unlikely(err))
+		return err;
+
+	return 0;
+}
+
+static u64 pf_get_vf_config_ggtt(struct xe_gt *gt, unsigned int vfid)
+{
+	struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
+	struct drm_mm_node *node = &config->ggtt_region;
+
+	xe_gt_assert(gt, !xe_gt_is_media_type(gt));
+	return drm_mm_node_allocated(node) ? node->size : 0;
+}
+
+/**
+ * xe_gt_sriov_pf_config_get_ggtt - Query size of GGTT address space of the VF.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function can only be called on PF.
+ *
+ * Return: size of the VF's assigned (or PF's spare) GGTT address space.
+ */
+u64 xe_gt_sriov_pf_config_get_ggtt(struct xe_gt *gt, unsigned int vfid)
+{
+	u64 size;
+
+	mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+	if (vfid)
+		size = pf_get_vf_config_ggtt(gt_to_tile(gt)->primary_gt, vfid);
+	else
+		size = pf_get_spare_ggtt(gt_to_tile(gt)->primary_gt);
+	mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+	return size;
+}
+
+static int pf_config_set_u64_done(struct xe_gt *gt, unsigned int vfid, u64 value,
+				  u64 actual, const char *what, int err)
+{
+	char size[10];
+	char name[8];
+
+	xe_sriov_function_name(vfid, name, sizeof(name));
+
+	if (unlikely(err)) {
+		string_get_size(value, 1, STRING_UNITS_2, size, sizeof(size));
+		xe_gt_sriov_notice(gt, "Failed to provision %s with %llu (%s) %s (%pe)\n",
+				   name, value, size, what, ERR_PTR(err));
+		string_get_size(actual, 1, STRING_UNITS_2, size, sizeof(size));
+		xe_gt_sriov_info(gt, "%s provisioning remains at %llu (%s) %s\n",
+				 name, actual, size, what);
+		return err;
+	}
+
+	/* the actual value may have changed during provisioning */
+	string_get_size(actual, 1, STRING_UNITS_2, size, sizeof(size));
+	xe_gt_sriov_info(gt, "%s provisioned with %llu (%s) %s\n",
+			 name, actual, size, what);
+	return 0;
+}
+
+/**
+ * xe_gt_sriov_pf_config_set_ggtt - Provision VF with GGTT space.
+ * @gt: the &xe_gt (can't be media)
+ * @vfid: the VF identifier
+ * @size: requested GGTT size
+ *
+ * If &vfid represents PF, then function will change PF's spare GGTT config.
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_set_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size)
+{
+	int err;
+
+	xe_gt_assert(gt, !xe_gt_is_media_type(gt));
+
+	mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+	if (vfid)
+		err = pf_provision_vf_ggtt(gt, vfid, size);
+	else
+		err = pf_set_spare_ggtt(gt, size);
+	mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+	return pf_config_set_u64_done(gt, vfid, size,
+				      xe_gt_sriov_pf_config_get_ggtt(gt, vfid),
+				      vfid ? "GGTT" : "spare GGTT", err);
+}
+
+static int pf_config_bulk_set_u64_done(struct xe_gt *gt, unsigned int first, unsigned int num_vfs,
+				       u64 value, u64 (*get)(struct xe_gt*, unsigned int),
+				       const char *what, unsigned int last, int err)
+{
+	char size[10];
+
+	xe_gt_assert(gt, first);
+	xe_gt_assert(gt, num_vfs);
+	xe_gt_assert(gt, first <= last);
+
+	if (num_vfs == 1)
+		return pf_config_set_u64_done(gt, first, value, get(gt, first), what, err);
+
+	if (unlikely(err)) {
+		xe_gt_sriov_notice(gt, "Failed to bulk provision VF%u..VF%u with %s\n",
+				   first, first + num_vfs - 1, what);
+		if (last > first)
+			pf_config_bulk_set_u64_done(gt, first, last - first, value,
+						    get, what, last, 0);
+		return pf_config_set_u64_done(gt, last, value, get(gt, last), what, err);
+	}
+
+	/* pick actual value from first VF - bulk provisioning shall be equal across all VFs */
+	value = get(gt, first);
+	string_get_size(value, 1, STRING_UNITS_2, size, sizeof(size));
+	xe_gt_sriov_info(gt, "VF%u..VF%u provisioned with %llu (%s) %s\n",
+			 first, first + num_vfs - 1, value, size, what);
+	return 0;
+}
+
+/**
+ * xe_gt_sriov_pf_config_bulk_set_ggtt - Provision many VFs with GGTT.
+ * @gt: the &xe_gt (can't be media)
+ * @vfid: starting VF identifier (can't be 0)
+ * @num_vfs: number of VFs to provision
+ * @size: requested GGTT size
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_bulk_set_ggtt(struct xe_gt *gt, unsigned int vfid,
+					unsigned int num_vfs, u64 size)
+{
+	unsigned int n;
+	int err = 0;
+
+	xe_gt_assert(gt, vfid);
+	xe_gt_assert(gt, !xe_gt_is_media_type(gt));
+
+	if (!num_vfs)
+		return 0;
+
+	mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+	for (n = vfid; n < vfid + num_vfs; n++) {
+		err = pf_provision_vf_ggtt(gt, n, size);
+		if (err)
+			break;
+	}
+	mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+	return pf_config_bulk_set_u64_done(gt, vfid, num_vfs, size,
+					   xe_gt_sriov_pf_config_get_ggtt,
+					   "GGTT", n, err);
+}
+
+/* Return: size of the largest continuous GGTT region */
+static u64 pf_get_max_ggtt(struct xe_gt *gt)
+{
+	struct xe_ggtt *ggtt = gt_to_tile(gt)->mem.ggtt;
+	const struct drm_mm *mm = &ggtt->mm;
+	const struct drm_mm_node *entry;
+	u64 alignment = pf_get_ggtt_alignment(gt);
+	u64 spare = pf_get_spare_ggtt(gt);
+	u64 hole_min_start = xe_wopcm_size(gt_to_xe(gt));
+	u64 hole_start, hole_end, hole_size;
+	u64 max_hole = 0;
+
+	mutex_lock(&ggtt->lock);
+
+	drm_mm_for_each_hole(entry, mm, hole_start, hole_end) {
+		hole_start = max(hole_start, hole_min_start);
+		hole_start = ALIGN(hole_start, alignment);
+		hole_end = ALIGN_DOWN(hole_end, alignment);
+		if (hole_start >= hole_end)
+			continue;
+		hole_size = hole_end - hole_start;
+		xe_gt_sriov_dbg_verbose(gt, "HOLE start %llx size %lluK\n",
+					hole_start, hole_size / SZ_1K);
+		spare -= min3(spare, hole_size, max_hole);
+		max_hole = max(max_hole, hole_size);
+	}
+
+	mutex_unlock(&ggtt->lock);
+
+	xe_gt_sriov_dbg_verbose(gt, "HOLE max %lluK reserved %lluK\n",
+				max_hole / SZ_1K, spare / SZ_1K);
+	return max_hole > spare ? max_hole - spare : 0;
+}
+
+static u64 pf_estimate_fair_ggtt(struct xe_gt *gt, unsigned int num_vfs)
+{
+	u64 available = pf_get_max_ggtt(gt);
+	u64 alignment = pf_get_ggtt_alignment(gt);
+	u64 fair;
+
+	/*
+	 * To simplify the logic we only look at single largest GGTT region
+	 * as that will be always the best fit for 1 VF case, and most likely
+	 * will also nicely cover other cases where VFs are provisioned on the
+	 * fresh and idle PF driver, without any stale GGTT allocations spread
+	 * in the middle of the full GGTT range.
+	 */
+
+	fair = div_u64(available, num_vfs);
+	fair = ALIGN_DOWN(fair, alignment);
+	xe_gt_sriov_dbg_verbose(gt, "GGTT available(%lluK) fair(%u x %lluK)\n",
+				available / SZ_1K, num_vfs, fair / SZ_1K);
+	return fair;
+}
+
+/**
+ * xe_gt_sriov_pf_config_set_fair_ggtt - Provision many VFs with fair GGTT.
+ * @gt: the &xe_gt (can't be media)
+ * @vfid: starting VF identifier (can't be 0)
+ * @num_vfs: number of VFs to provision
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_set_fair_ggtt(struct xe_gt *gt, unsigned int vfid,
+					unsigned int num_vfs)
+{
+	u64 fair;
+
+	xe_gt_assert(gt, vfid);
+	xe_gt_assert(gt, num_vfs);
+	xe_gt_assert(gt, !xe_gt_is_media_type(gt));
+
+	mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+	fair = pf_estimate_fair_ggtt(gt, num_vfs);
+	mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+	if (!fair)
+		return -ENOSPC;
+
+	return xe_gt_sriov_pf_config_bulk_set_ggtt(gt, vfid, num_vfs, fair);
+}
+
+static u32 pf_get_min_spare_ctxs(struct xe_gt *gt)
+{
+	/* XXX: preliminary */
+	return IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV) ?
+		hweight64(gt->info.engine_mask) : SZ_256;
+}
+
+static u32 pf_get_spare_ctxs(struct xe_gt *gt)
+{
+	u32 spare;
+
+	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+	lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+	spare = gt->sriov.pf.spare.num_ctxs;
+	spare = max_t(u32, spare, pf_get_min_spare_ctxs(gt));
+
+	return spare;
+}
+
+static int pf_set_spare_ctxs(struct xe_gt *gt, u32 spare)
+{
+	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+	lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+	if (spare > GUC_ID_MAX)
+		return -EINVAL;
+
+	if (spare && spare < pf_get_min_spare_ctxs(gt))
+		return -EINVAL;
+
+	gt->sriov.pf.spare.num_ctxs = spare;
+
+	return 0;
+}
+
+/* Return: start ID or negative error code on failure */
+static int pf_reserve_ctxs(struct xe_gt *gt, u32 num)
+{
+	struct xe_guc_id_mgr *idm = &gt->uc.guc.submission_state.idm;
+	unsigned int spare = pf_get_spare_ctxs(gt);
+
+	return xe_guc_id_mgr_reserve(idm, num, spare);
+}
+
+static void pf_release_ctxs(struct xe_gt *gt, u32 start, u32 num)
+{
+	struct xe_guc_id_mgr *idm = &gt->uc.guc.submission_state.idm;
+
+	if (num)
+		xe_guc_id_mgr_release(idm, start, num);
+}
+
+static void pf_release_config_ctxs(struct xe_gt *gt, struct xe_gt_sriov_config *config)
+{
+	lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+	pf_release_ctxs(gt, config->begin_ctx, config->num_ctxs);
+	config->begin_ctx = 0;
+	config->num_ctxs = 0;
+}
+
+static int pf_provision_vf_ctxs(struct xe_gt *gt, unsigned int vfid, u32 num_ctxs)
+{
+	struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
+	int ret;
+
+	xe_gt_assert(gt, vfid);
+
+	if (num_ctxs > GUC_ID_MAX)
+		return -EINVAL;
+
+	if (config->num_ctxs) {
+		ret = pf_push_vf_cfg_ctxs(gt, vfid, 0, 0);
+		if (unlikely(ret))
+			return ret;
+
+		pf_release_config_ctxs(gt, config);
+	}
+
+	if (!num_ctxs)
+		return 0;
+
+	ret = pf_reserve_ctxs(gt, num_ctxs);
+	if (unlikely(ret < 0))
+		return ret;
+
+	config->begin_ctx = ret;
+	config->num_ctxs = num_ctxs;
+
+	ret = pf_push_vf_cfg_ctxs(gt, vfid, config->begin_ctx, config->num_ctxs);
+	if (unlikely(ret)) {
+		pf_release_config_ctxs(gt, config);
+		return ret;
+	}
+
+	xe_gt_sriov_dbg_verbose(gt, "VF%u contexts %u-%u\n",
+				vfid, config->begin_ctx, config->begin_ctx + config->num_ctxs - 1);
+	return 0;
+}
+
+static u32 pf_get_vf_config_ctxs(struct xe_gt *gt, unsigned int vfid)
+{
+	struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
+
+	return config->num_ctxs;
+}
+
+/**
+ * xe_gt_sriov_pf_config_get_ctxs - Get VF's GuC contexts IDs quota.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function can only be called on PF.
+ * If &vfid represents a PF then number of PF's spare GuC context IDs is returned.
+ *
+ * Return: VF's quota (or PF's spare).
+ */
+u32 xe_gt_sriov_pf_config_get_ctxs(struct xe_gt *gt, unsigned int vfid)
+{
+	u32 num_ctxs;
+
+	mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+	if (vfid)
+		num_ctxs = pf_get_vf_config_ctxs(gt, vfid);
+	else
+		num_ctxs = pf_get_spare_ctxs(gt);
+	mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+	return num_ctxs;
+}
+
+static const char *no_unit(u32 unused)
+{
+	return "";
+}
+
+static const char *spare_unit(u32 unused)
+{
+	return " spare";
+}
+
+static int pf_config_set_u32_done(struct xe_gt *gt, unsigned int vfid, u32 value, u32 actual,
+				  const char *what, const char *(*unit)(u32), int err)
+{
+	char name[8];
+
+	xe_sriov_function_name(vfid, name, sizeof(name));
+
+	if (unlikely(err)) {
+		xe_gt_sriov_notice(gt, "Failed to provision %s with %u%s %s (%pe)\n",
+				   name, value, unit(value), what, ERR_PTR(err));
+		xe_gt_sriov_info(gt, "%s provisioning remains at %u%s %s\n",
+				 name, actual, unit(actual), what);
+		return err;
+	}
+
+	/* the actual value may have changed during provisioning */
+	xe_gt_sriov_info(gt, "%s provisioned with %u%s %s\n",
+			 name, actual, unit(actual), what);
+	return 0;
+}
+
+/**
+ * xe_gt_sriov_pf_config_set_ctxs - Configure GuC contexts IDs quota for the VF.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ * @num_ctxs: requested number of GuC contexts IDs (0 to release)
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_set_ctxs(struct xe_gt *gt, unsigned int vfid, u32 num_ctxs)
+{
+	int err;
+
+	mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+	if (vfid)
+		err = pf_provision_vf_ctxs(gt, vfid, num_ctxs);
+	else
+		err = pf_set_spare_ctxs(gt, num_ctxs);
+	mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+	return pf_config_set_u32_done(gt, vfid, num_ctxs,
+				      xe_gt_sriov_pf_config_get_ctxs(gt, vfid),
+				      "GuC context IDs", vfid ? no_unit : spare_unit, err);
+}
+
+static int pf_config_bulk_set_u32_done(struct xe_gt *gt, unsigned int first, unsigned int num_vfs,
+				       u32 value, u32 (*get)(struct xe_gt*, unsigned int),
+				       const char *what, const char *(*unit)(u32),
+				       unsigned int last, int err)
+{
+	xe_gt_assert(gt, first);
+	xe_gt_assert(gt, num_vfs);
+	xe_gt_assert(gt, first <= last);
+
+	if (num_vfs == 1)
+		return pf_config_set_u32_done(gt, first, value, get(gt, first), what, unit, err);
+
+	if (unlikely(err)) {
+		xe_gt_sriov_notice(gt, "Failed to bulk provision VF%u..VF%u with %s\n",
+				   first, first + num_vfs - 1, what);
+		if (last > first)
+			pf_config_bulk_set_u32_done(gt, first, last - first, value,
+						    get, what, unit, last, 0);
+		return pf_config_set_u32_done(gt, last, value, get(gt, last), what, unit, err);
+	}
+
+	/* pick actual value from first VF - bulk provisioning shall be equal across all VFs */
+	value = get(gt, first);
+	xe_gt_sriov_info(gt, "VF%u..VF%u provisioned with %u%s %s\n",
+			 first, first + num_vfs - 1, value, unit(value), what);
+	return 0;
+}
+
+/**
+ * xe_gt_sriov_pf_config_bulk_set_ctxs - Provision many VFs with GuC context IDs.
+ * @gt: the &xe_gt
+ * @vfid: starting VF identifier
+ * @num_vfs: number of VFs to provision
+ * @num_ctxs: requested number of GuC contexts IDs (0 to release)
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_bulk_set_ctxs(struct xe_gt *gt, unsigned int vfid,
+					unsigned int num_vfs, u32 num_ctxs)
+{
+	unsigned int n;
+	int err = 0;
+
+	xe_gt_assert(gt, vfid);
+
+	if (!num_vfs)
+		return 0;
+
+	mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+	for (n = vfid; n < vfid + num_vfs; n++) {
+		err = pf_provision_vf_ctxs(gt, n, num_ctxs);
+		if (err)
+			break;
+	}
+	mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+	return pf_config_bulk_set_u32_done(gt, vfid, num_vfs, num_ctxs,
+					   xe_gt_sriov_pf_config_get_ctxs,
+					   "GuC context IDs", no_unit, n, err);
+}
+
+static u32 pf_estimate_fair_ctxs(struct xe_gt *gt, unsigned int num_vfs)
+{
+	struct xe_guc_id_mgr *idm = &gt->uc.guc.submission_state.idm;
+	u32 spare = pf_get_spare_ctxs(gt);
+	u32 fair = (idm->total - spare) / num_vfs;
+	int ret;
+
+	for (; fair; --fair) {
+		ret = xe_guc_id_mgr_reserve(idm, fair * num_vfs, spare);
+		if (ret < 0)
+			continue;
+		xe_guc_id_mgr_release(idm, ret, fair * num_vfs);
+		break;
+	}
+
+	xe_gt_sriov_dbg_verbose(gt, "contexts fair(%u x %u)\n", num_vfs, fair);
+	return fair;
+}
+
+/**
+ * xe_gt_sriov_pf_config_set_fair_ctxs - Provision many VFs with fair GuC context IDs.
+ * @gt: the &xe_gt
+ * @vfid: starting VF identifier (can't be 0)
+ * @num_vfs: number of VFs to provision (can't be 0)
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_set_fair_ctxs(struct xe_gt *gt, unsigned int vfid,
+					unsigned int num_vfs)
+{
+	u32 fair;
+
+	xe_gt_assert(gt, vfid);
+	xe_gt_assert(gt, num_vfs);
+
+	mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+	fair = pf_estimate_fair_ctxs(gt, num_vfs);
+	mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+	if (!fair)
+		return -ENOSPC;
+
+	return xe_gt_sriov_pf_config_bulk_set_ctxs(gt, vfid, num_vfs, fair);
+}
+
+static u32 pf_get_min_spare_dbs(struct xe_gt *gt)
+{
+	/* XXX: preliminary, we don't use doorbells yet! */
+	return IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV) ? 1 : 0;
+}
+
+static u32 pf_get_spare_dbs(struct xe_gt *gt)
+{
+	u32 spare;
+
+	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+	lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+	spare = gt->sriov.pf.spare.num_dbs;
+	spare = max_t(u32, spare, pf_get_min_spare_dbs(gt));
+
+	return spare;
+}
+
+static int pf_set_spare_dbs(struct xe_gt *gt, u32 spare)
+{
+	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+	lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+	if (spare > GUC_NUM_DOORBELLS)
+		return -EINVAL;
+
+	if (spare && spare < pf_get_min_spare_dbs(gt))
+		return -EINVAL;
+
+	gt->sriov.pf.spare.num_dbs = spare;
+	return 0;
+}
+
+/* Return: start ID or negative error code on failure */
+static int pf_reserve_dbs(struct xe_gt *gt, u32 num)
+{
+	struct xe_guc_db_mgr *dbm = &gt->uc.guc.dbm;
+	unsigned int spare = pf_get_spare_dbs(gt);
+
+	return xe_guc_db_mgr_reserve_range(dbm, num, spare);
+}
+
+static void pf_release_dbs(struct xe_gt *gt, u32 start, u32 num)
+{
+	struct xe_guc_db_mgr *dbm = &gt->uc.guc.dbm;
+
+	if (num)
+		xe_guc_db_mgr_release_range(dbm, start, num);
+}
+
+static void pf_release_config_dbs(struct xe_gt *gt, struct xe_gt_sriov_config *config)
+{
+	lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+	pf_release_dbs(gt, config->begin_db, config->num_dbs);
+	config->begin_db = 0;
+	config->num_dbs = 0;
+}
+
+static int pf_provision_vf_dbs(struct xe_gt *gt, unsigned int vfid, u32 num_dbs)
+{
+	struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
+	int ret;
+
+	xe_gt_assert(gt, vfid);
+
+	if (num_dbs > GUC_NUM_DOORBELLS)
+		return -EINVAL;
+
+	if (config->num_dbs) {
+		ret = pf_push_vf_cfg_dbs(gt, vfid, 0, 0);
+		if (unlikely(ret))
+			return ret;
+
+		pf_release_config_dbs(gt, config);
+	}
+
+	if (!num_dbs)
+		return 0;
+
+	ret = pf_reserve_dbs(gt, num_dbs);
+	if (unlikely(ret < 0))
+		return ret;
+
+	config->begin_db = ret;
+	config->num_dbs = num_dbs;
+
+	ret = pf_push_vf_cfg_dbs(gt, vfid, config->begin_db, config->num_dbs);
+	if (unlikely(ret)) {
+		pf_release_config_dbs(gt, config);
+		return ret;
+	}
+
+	xe_gt_sriov_dbg_verbose(gt, "VF%u doorbells %u-%u\n",
+				vfid, config->begin_db, config->begin_db + config->num_dbs - 1);
+	return 0;
+}
+
+static u32 pf_get_vf_config_dbs(struct xe_gt *gt, unsigned int vfid)
+{
+	struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
+
+	return config->num_dbs;
+}
+
+/**
+ * xe_gt_sriov_pf_config_get_dbs - Get VF's GuC doorbells IDs quota.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function can only be called on PF.
+ * If &vfid represents a PF then number of PF's spare GuC doorbells IDs is returned.
+ *
+ * Return: VF's quota (or PF's spare).
+ */
+u32 xe_gt_sriov_pf_config_get_dbs(struct xe_gt *gt, unsigned int vfid)
+{
+	u32 num_dbs;
+
+	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+	xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
+
+	mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+	if (vfid)
+		num_dbs = pf_get_vf_config_dbs(gt, vfid);
+	else
+		num_dbs = pf_get_spare_dbs(gt);
+	mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+	return num_dbs;
+}
+
+/**
+ * xe_gt_sriov_pf_config_set_dbs - Configure GuC doorbells IDs quota for the VF.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ * @num_dbs: requested number of GuC doorbells IDs (0 to release)
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_set_dbs(struct xe_gt *gt, unsigned int vfid, u32 num_dbs)
+{
+	int err;
+
+	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+	xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
+
+	mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+	if (vfid)
+		err = pf_provision_vf_dbs(gt, vfid, num_dbs);
+	else
+		err = pf_set_spare_dbs(gt, num_dbs);
+	mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+	return pf_config_set_u32_done(gt, vfid, num_dbs,
+				      xe_gt_sriov_pf_config_get_dbs(gt, vfid),
+				      "GuC doorbell IDs", vfid ? no_unit : spare_unit, err);
+}
+
+/**
+ * xe_gt_sriov_pf_config_bulk_set_dbs - Provision many VFs with GuC context IDs.
+ * @gt: the &xe_gt
+ * @vfid: starting VF identifier (can't be 0)
+ * @num_vfs: number of VFs to provision
+ * @num_dbs: requested number of GuC doorbell IDs (0 to release)
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_bulk_set_dbs(struct xe_gt *gt, unsigned int vfid,
+				       unsigned int num_vfs, u32 num_dbs)
+{
+	unsigned int n;
+	int err = 0;
+
+	xe_gt_assert(gt, vfid);
+
+	if (!num_vfs)
+		return 0;
+
+	mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+	for (n = vfid; n < vfid + num_vfs; n++) {
+		err = pf_provision_vf_dbs(gt, n, num_dbs);
+		if (err)
+			break;
+	}
+	mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+	return pf_config_bulk_set_u32_done(gt, vfid, num_vfs, num_dbs,
+					   xe_gt_sriov_pf_config_get_dbs,
+					   "GuC doorbell IDs", no_unit, n, err);
+}
+
+static u32 pf_estimate_fair_dbs(struct xe_gt *gt, unsigned int num_vfs)
+{
+	struct xe_guc_db_mgr *dbm = &gt->uc.guc.dbm;
+	u32 spare = pf_get_spare_dbs(gt);
+	u32 fair = (GUC_NUM_DOORBELLS - spare) / num_vfs;
+	int ret;
+
+	for (; fair; --fair) {
+		ret = xe_guc_db_mgr_reserve_range(dbm, fair * num_vfs, spare);
+		if (ret < 0)
+			continue;
+		xe_guc_db_mgr_release_range(dbm, ret, fair * num_vfs);
+		break;
+	}
+
+	xe_gt_sriov_dbg_verbose(gt, "doorbells fair(%u x %u)\n", num_vfs, fair);
+	return fair;
+}
+
+/**
+ * xe_gt_sriov_pf_config_set_fair_dbs - Provision many VFs with fair GuC doorbell  IDs.
+ * @gt: the &xe_gt
+ * @vfid: starting VF identifier (can't be 0)
+ * @num_vfs: number of VFs to provision (can't be 0)
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_set_fair_dbs(struct xe_gt *gt, unsigned int vfid,
+				       unsigned int num_vfs)
+{
+	u32 fair;
+
+	xe_gt_assert(gt, vfid);
+	xe_gt_assert(gt, num_vfs);
+
+	mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+	fair = pf_estimate_fair_dbs(gt, num_vfs);
+	mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+	if (!fair)
+		return -ENOSPC;
+
+	return xe_gt_sriov_pf_config_bulk_set_dbs(gt, vfid, num_vfs, fair);
+}
+
+static u64 pf_get_lmem_alignment(struct xe_gt *gt)
+{
+	/* this might be platform dependent */
+	return SZ_2M;
+}
+
+static u64 pf_get_min_spare_lmem(struct xe_gt *gt)
+{
+	/* this might be platform dependent */
+	return SZ_128M; /* XXX: preliminary */
+}
+
+static u64 pf_get_spare_lmem(struct xe_gt *gt)
+{
+	u64 spare;
+
+	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+	lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+	spare = gt->sriov.pf.spare.lmem_size;
+	spare = max_t(u64, spare, pf_get_min_spare_lmem(gt));
+
+	return spare;
+}
+
+static int pf_set_spare_lmem(struct xe_gt *gt, u64 size)
+{
+	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+	lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+	if (size && size < pf_get_min_spare_lmem(gt))
+		return -EINVAL;
+
+	gt->sriov.pf.spare.lmem_size = size;
+	return 0;
+}
+
+static u64 pf_get_vf_config_lmem(struct xe_gt *gt, unsigned int vfid)
+{
+	struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
+	struct xe_bo *bo;
+
+	bo = config->lmem_obj;
+	return bo ? bo->size : 0;
+}
+
+static int pf_distribute_config_lmem(struct xe_gt *gt, unsigned int vfid, u64 size)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_tile *tile;
+	unsigned int tid;
+	int err;
+
+	for_each_tile(tile, xe, tid) {
+		if (tile->primary_gt == gt) {
+			err = pf_push_vf_cfg_lmem(gt, vfid, size);
+		} else {
+			u64 lmem = pf_get_vf_config_lmem(tile->primary_gt, vfid);
+
+			if (!lmem)
+				continue;
+			err = pf_push_vf_cfg_lmem(gt, vfid, lmem);
+		}
+		if (unlikely(err))
+			return err;
+	}
+	return 0;
+}
+
+static void pf_force_lmtt_invalidate(struct xe_device *xe)
+{
+	/* TODO */
+}
+
+static void pf_reset_vf_lmtt(struct xe_device *xe, unsigned int vfid)
+{
+	struct xe_lmtt *lmtt;
+	struct xe_tile *tile;
+	unsigned int tid;
+
+	xe_assert(xe, IS_DGFX(xe));
+	xe_assert(xe, IS_SRIOV_PF(xe));
+
+	for_each_tile(tile, xe, tid) {
+		lmtt = &tile->sriov.pf.lmtt;
+		xe_lmtt_drop_pages(lmtt, vfid);
+	}
+}
+
+static int pf_update_vf_lmtt(struct xe_device *xe, unsigned int vfid)
+{
+	struct xe_gt_sriov_config *config;
+	struct xe_tile *tile;
+	struct xe_lmtt *lmtt;
+	struct xe_bo *bo;
+	struct xe_gt *gt;
+	u64 total, offset;
+	unsigned int gtid;
+	unsigned int tid;
+	int err;
+
+	xe_assert(xe, IS_DGFX(xe));
+	xe_assert(xe, IS_SRIOV_PF(xe));
+
+	total = 0;
+	for_each_tile(tile, xe, tid)
+		total += pf_get_vf_config_lmem(tile->primary_gt, vfid);
+
+	for_each_tile(tile, xe, tid) {
+		lmtt = &tile->sriov.pf.lmtt;
+
+		xe_lmtt_drop_pages(lmtt, vfid);
+		if (!total)
+			continue;
+
+		err  = xe_lmtt_prepare_pages(lmtt, vfid, total);
+		if (err)
+			goto fail;
+
+		offset = 0;
+		for_each_gt(gt, xe, gtid) {
+			if (xe_gt_is_media_type(gt))
+				continue;
+
+			config = pf_pick_vf_config(gt, vfid);
+			bo = config->lmem_obj;
+			if (!bo)
+				continue;
+
+			err = xe_lmtt_populate_pages(lmtt, vfid, bo, offset);
+			if (err)
+				goto fail;
+			offset += bo->size;
+		}
+	}
+
+	pf_force_lmtt_invalidate(xe);
+	return 0;
+
+fail:
+	for_each_tile(tile, xe, tid) {
+		lmtt = &tile->sriov.pf.lmtt;
+		xe_lmtt_drop_pages(lmtt, vfid);
+	}
+	return err;
+}
+
+static void pf_release_vf_config_lmem(struct xe_gt *gt, struct xe_gt_sriov_config *config)
+{
+	xe_gt_assert(gt, IS_DGFX(gt_to_xe(gt)));
+	xe_gt_assert(gt, !xe_gt_is_media_type(gt));
+	lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+	if (config->lmem_obj) {
+		xe_bo_unpin_map_no_vm(config->lmem_obj);
+		config->lmem_obj = NULL;
+	}
+}
+
+static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size)
+{
+	struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_bo *bo;
+	int err;
+
+	xe_gt_assert(gt, vfid);
+	xe_gt_assert(gt, IS_DGFX(xe));
+	xe_gt_assert(gt, !xe_gt_is_media_type(gt));
+
+	size = round_up(size, pf_get_lmem_alignment(gt));
+
+	if (config->lmem_obj) {
+		err = pf_distribute_config_lmem(gt, vfid, 0);
+		if (unlikely(err))
+			return err;
+
+		pf_reset_vf_lmtt(xe, vfid);
+		pf_release_vf_config_lmem(gt, config);
+	}
+	xe_gt_assert(gt, !config->lmem_obj);
+
+	if (!size)
+		return 0;
+
+	xe_gt_assert(gt, pf_get_lmem_alignment(gt) == SZ_2M);
+	bo = xe_bo_create_pin_map(xe, tile, NULL,
+				  ALIGN(size, PAGE_SIZE),
+				  ttm_bo_type_kernel,
+				  XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+				  XE_BO_FLAG_PINNED);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	config->lmem_obj = bo;
+
+	err = pf_update_vf_lmtt(xe, vfid);
+	if (unlikely(err))
+		goto release;
+
+	err = pf_push_vf_cfg_lmem(gt, vfid, bo->size);
+	if (unlikely(err))
+		goto reset_lmtt;
+
+	xe_gt_sriov_dbg_verbose(gt, "VF%u LMEM %zu (%zuM)\n",
+				vfid, bo->size, bo->size / SZ_1M);
+	return 0;
+
+reset_lmtt:
+	pf_reset_vf_lmtt(xe, vfid);
+release:
+	pf_release_vf_config_lmem(gt, config);
+	return err;
+}
+
+/**
+ * xe_gt_sriov_pf_config_get_lmem - Get VF's LMEM quota.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function can only be called on PF.
+ *
+ * Return: VF's (or PF's spare) LMEM quota.
+ */
+u64 xe_gt_sriov_pf_config_get_lmem(struct xe_gt *gt, unsigned int vfid)
+{
+	u64 size;
+
+	mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+	if (vfid)
+		size = pf_get_vf_config_lmem(gt, vfid);
+	else
+		size = pf_get_spare_lmem(gt);
+	mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+	return size;
+}
+
+/**
+ * xe_gt_sriov_pf_config_set_lmem - Provision VF with LMEM.
+ * @gt: the &xe_gt (can't be media)
+ * @vfid: the VF identifier
+ * @size: requested LMEM size
+ *
+ * This function can only be called on PF.
+ */
+int xe_gt_sriov_pf_config_set_lmem(struct xe_gt *gt, unsigned int vfid, u64 size)
+{
+	int err;
+
+	mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+	if (vfid)
+		err = pf_provision_vf_lmem(gt, vfid, size);
+	else
+		err = pf_set_spare_lmem(gt, size);
+	mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+	return pf_config_set_u64_done(gt, vfid, size,
+				      xe_gt_sriov_pf_config_get_lmem(gt, vfid),
+				      vfid ? "LMEM" : "spare LMEM", err);
+}
+
+/**
+ * xe_gt_sriov_pf_config_bulk_set_lmem - Provision many VFs with LMEM.
+ * @gt: the &xe_gt (can't be media)
+ * @vfid: starting VF identifier (can't be 0)
+ * @num_vfs: number of VFs to provision
+ * @size: requested LMEM size
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_bulk_set_lmem(struct xe_gt *gt, unsigned int vfid,
+					unsigned int num_vfs, u64 size)
+{
+	unsigned int n;
+	int err = 0;
+
+	xe_gt_assert(gt, vfid);
+	xe_gt_assert(gt, !xe_gt_is_media_type(gt));
+
+	if (!num_vfs)
+		return 0;
+
+	mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+	for (n = vfid; n < vfid + num_vfs; n++) {
+		err = pf_provision_vf_lmem(gt, n, size);
+		if (err)
+			break;
+	}
+	mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+	return pf_config_bulk_set_u64_done(gt, vfid, num_vfs, size,
+					   xe_gt_sriov_pf_config_get_lmem,
+					   "LMEM", n, err);
+}
+
+static u64 pf_query_free_lmem(struct xe_gt *gt)
+{
+	struct xe_tile *tile = gt->tile;
+
+	return xe_ttm_vram_get_avail(&tile->mem.vram_mgr->manager);
+}
+
+static u64 pf_query_max_lmem(struct xe_gt *gt)
+{
+	u64 alignment = pf_get_lmem_alignment(gt);
+	u64 spare = pf_get_spare_lmem(gt);
+	u64 free = pf_query_free_lmem(gt);
+	u64 avail;
+
+	/* XXX: need to account for 2MB blocks only */
+	avail = free > spare ? free - spare : 0;
+	avail = round_down(avail, alignment);
+
+	return avail;
+}
+
+#ifdef CONFIG_DRM_XE_DEBUG_SRIOV
+#define MAX_FAIR_LMEM	SZ_128M	/* XXX: make it small for the driver bringup */
+#else
+#define MAX_FAIR_LMEM	SZ_2G	/* XXX: known issue with allocating BO over 2GiB */
+#endif
+
+static u64 pf_estimate_fair_lmem(struct xe_gt *gt, unsigned int num_vfs)
+{
+	u64 available = pf_query_max_lmem(gt);
+	u64 alignment = pf_get_lmem_alignment(gt);
+	u64 fair;
+
+	fair = div_u64(available, num_vfs);
+	fair = rounddown_pow_of_two(fair);	/* XXX: ttm_vram_mgr & drm_buddy limitation */
+	fair = ALIGN_DOWN(fair, alignment);
+#ifdef MAX_FAIR_LMEM
+	fair = min_t(u64, MAX_FAIR_LMEM, fair);
+#endif
+	xe_gt_sriov_dbg_verbose(gt, "LMEM available(%lluM) fair(%u x %lluM)\n",
+				available / SZ_1M, num_vfs, fair / SZ_1M);
+	return fair;
+}
+
+/**
+ * xe_gt_sriov_pf_config_set_fair_lmem - Provision many VFs with fair LMEM.
+ * @gt: the &xe_gt (can't be media)
+ * @vfid: starting VF identifier (can't be 0)
+ * @num_vfs: number of VFs to provision (can't be 0)
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_set_fair_lmem(struct xe_gt *gt, unsigned int vfid,
+					unsigned int num_vfs)
+{
+	u64 fair;
+
+	xe_gt_assert(gt, vfid);
+	xe_gt_assert(gt, num_vfs);
+	xe_gt_assert(gt, !xe_gt_is_media_type(gt));
+
+	if (!IS_DGFX(gt_to_xe(gt)))
+		return 0;
+
+	mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+	fair = pf_estimate_fair_lmem(gt, num_vfs);
+	mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+	if (!fair)
+		return -ENOSPC;
+
+	return xe_gt_sriov_pf_config_bulk_set_lmem(gt, vfid, num_vfs, fair);
+}
+
+/**
+ * xe_gt_sriov_pf_config_set_fair - Provision many VFs with fair resources.
+ * @gt: the &xe_gt
+ * @vfid: starting VF identifier (can't be 0)
+ * @num_vfs: number of VFs to provision (can't be 0)
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_set_fair(struct xe_gt *gt, unsigned int vfid,
+				   unsigned int num_vfs)
+{
+	int result = 0;
+	int err;
+
+	xe_gt_assert(gt, vfid);
+	xe_gt_assert(gt, num_vfs);
+
+	if (!xe_gt_is_media_type(gt)) {
+		err = xe_gt_sriov_pf_config_set_fair_ggtt(gt, vfid, num_vfs);
+		result = result ?: err;
+		err = xe_gt_sriov_pf_config_set_fair_lmem(gt, vfid, num_vfs);
+		result = result ?: err;
+	}
+	err = xe_gt_sriov_pf_config_set_fair_ctxs(gt, vfid, num_vfs);
+	result = result ?: err;
+	err = xe_gt_sriov_pf_config_set_fair_dbs(gt, vfid, num_vfs);
+	result = result ?: err;
+
+	return result;
+}
+
+static const char *exec_quantum_unit(u32 exec_quantum)
+{
+	return exec_quantum ? "ms" : "(infinity)";
+}
+
+static int pf_provision_exec_quantum(struct xe_gt *gt, unsigned int vfid,
+				     u32 exec_quantum)
+{
+	struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
+	int err;
+
+	err = pf_push_vf_cfg_exec_quantum(gt, vfid, exec_quantum);
+	if (unlikely(err))
+		return err;
+
+	config->exec_quantum = exec_quantum;
+	return 0;
+}
+
+static int pf_get_exec_quantum(struct xe_gt *gt, unsigned int vfid)
+{
+	struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
+
+	return config->exec_quantum;
+}
+
+/**
+ * xe_gt_sriov_pf_config_set_exec_quantum - Configure execution quantum for the VF.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ * @exec_quantum: requested execution quantum in milliseconds (0 is infinity)
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_set_exec_quantum(struct xe_gt *gt, unsigned int vfid,
+					   u32 exec_quantum)
+{
+	int err;
+
+	mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+	err = pf_provision_exec_quantum(gt, vfid, exec_quantum);
+	mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+	return pf_config_set_u32_done(gt, vfid, exec_quantum,
+				      xe_gt_sriov_pf_config_get_exec_quantum(gt, vfid),
+				      "execution quantum", exec_quantum_unit, err);
+}
+
+/**
+ * xe_gt_sriov_pf_config_get_exec_quantum - Get VF's execution quantum.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function can only be called on PF.
+ *
+ * Return: VF's (or PF's) execution quantum in milliseconds.
+ */
+u32 xe_gt_sriov_pf_config_get_exec_quantum(struct xe_gt *gt, unsigned int vfid)
+{
+	u32 exec_quantum;
+
+	mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+	exec_quantum = pf_get_exec_quantum(gt, vfid);
+	mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+	return exec_quantum;
+}
+
+static const char *preempt_timeout_unit(u32 preempt_timeout)
+{
+	return preempt_timeout ? "us" : "(infinity)";
+}
+
+static int pf_provision_preempt_timeout(struct xe_gt *gt, unsigned int vfid,
+					u32 preempt_timeout)
+{
+	struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
+	int err;
+
+	err = pf_push_vf_cfg_preempt_timeout(gt, vfid, preempt_timeout);
+	if (unlikely(err))
+		return err;
+
+	config->preempt_timeout = preempt_timeout;
+
+	return 0;
+}
+
+static int pf_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid)
+{
+	struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
+
+	return config->preempt_timeout;
+}
+
+/**
+ * xe_gt_sriov_pf_config_set_preempt_timeout - Configure preemption timeout for the VF.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ * @preempt_timeout: requested preemption timeout in microseconds (0 is infinity)
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_set_preempt_timeout(struct xe_gt *gt, unsigned int vfid,
+					      u32 preempt_timeout)
+{
+	int err;
+
+	mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+	err = pf_provision_preempt_timeout(gt, vfid, preempt_timeout);
+	mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+	return pf_config_set_u32_done(gt, vfid, preempt_timeout,
+				      xe_gt_sriov_pf_config_get_preempt_timeout(gt, vfid),
+				      "preemption timeout", preempt_timeout_unit, err);
+}
+
+/**
+ * xe_gt_sriov_pf_config_get_preempt_timeout - Get VF's preemption timeout.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function can only be called on PF.
+ *
+ * Return: VF's (or PF's) preemption timeout in microseconds.
+ */
+u32 xe_gt_sriov_pf_config_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid)
+{
+	u32 preempt_timeout;
+
+	mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+	preempt_timeout = pf_get_preempt_timeout(gt, vfid);
+	mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+	return preempt_timeout;
+}
+
+static void pf_reset_config_sched(struct xe_gt *gt, struct xe_gt_sriov_config *config)
+{
+	lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+	config->exec_quantum = 0;
+	config->preempt_timeout = 0;
+}
+
+static void pf_release_vf_config(struct xe_gt *gt, unsigned int vfid)
+{
+	struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
+	struct xe_device *xe = gt_to_xe(gt);
+
+	if (!xe_gt_is_media_type(gt)) {
+		pf_release_vf_config_ggtt(gt, config);
+		if (IS_DGFX(xe)) {
+			pf_release_vf_config_lmem(gt, config);
+			pf_update_vf_lmtt(xe, vfid);
+		}
+	}
+	pf_release_config_ctxs(gt, config);
+	pf_release_config_dbs(gt, config);
+	pf_reset_config_sched(gt, config);
+}
+
+/**
+ * xe_gt_sriov_pf_config_release - Release and reset VF configuration.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier (can't be PF)
+ * @force: force configuration release
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_release(struct xe_gt *gt, unsigned int vfid, bool force)
+{
+	int err;
+
+	xe_gt_assert(gt, vfid);
+
+	mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+	err = pf_send_vf_cfg_reset(gt, vfid);
+	if (!err || force)
+		pf_release_vf_config(gt, vfid);
+	mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+	if (unlikely(err)) {
+		xe_gt_sriov_notice(gt, "VF%u unprovisioning failed with error (%pe)%s\n",
+				   vfid, ERR_PTR(err),
+				   force ? " but all resources were released anyway!" : "");
+	}
+
+	return force ? 0 : err;
+}
+
+/**
+ * xe_gt_sriov_pf_config_push - Reprovision VF's configuration.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier (can't be PF)
+ * @refresh: explicit refresh
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_push(struct xe_gt *gt, unsigned int vfid, bool refresh)
+{
+	int err = 0;
+
+	xe_gt_assert(gt, vfid);
+
+	mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+	if (refresh)
+		err = pf_send_vf_cfg_reset(gt, vfid);
+	if (!err)
+		err = pf_push_full_vf_config(gt, vfid);
+	mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+	if (unlikely(err)) {
+		xe_gt_sriov_notice(gt, "Failed to %s VF%u configuration (%pe)\n",
+				   refresh ? "refresh" : "push", vfid, ERR_PTR(err));
+	}
+
+	return err;
+}
+
+/**
+ * xe_gt_sriov_pf_config_print_ggtt - Print GGTT configurations.
+ * @gt: the &xe_gt
+ * @p: the &drm_printer
+ *
+ * Print GGTT configuration data for all VFs.
+ * VFs without provisioned GGTT are ignored.
+ *
+ * This function can only be called on PF.
+ */
+int xe_gt_sriov_pf_config_print_ggtt(struct xe_gt *gt, struct drm_printer *p)
+{
+	unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(gt_to_xe(gt));
+	const struct xe_gt_sriov_config *config;
+	char buf[10];
+
+	for (n = 1; n <= total_vfs; n++) {
+		config = &gt->sriov.pf.vfs[n].config;
+		if (!drm_mm_node_allocated(&config->ggtt_region))
+			continue;
+
+		string_get_size(config->ggtt_region.size, 1, STRING_UNITS_2, buf, sizeof(buf));
+		drm_printf(p, "VF%u:\t%#0llx-%#llx\t(%s)\n",
+			   n, config->ggtt_region.start,
+			   config->ggtt_region.start + config->ggtt_region.size - 1, buf);
+	}
+
+	return 0;
+}
+
+/**
+ * xe_gt_sriov_pf_config_print_ctxs - Print GuC context IDs configurations.
+ * @gt: the &xe_gt
+ * @p: the &drm_printer
+ *
+ * Print GuC context ID allocations across all VFs.
+ * VFs without GuC context IDs are skipped.
+ *
+ * This function can only be called on PF.
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_print_ctxs(struct xe_gt *gt, struct drm_printer *p)
+{
+	unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(gt_to_xe(gt));
+	const struct xe_gt_sriov_config *config;
+
+	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+	mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+
+	for (n = 1; n <= total_vfs; n++) {
+		config = &gt->sriov.pf.vfs[n].config;
+		if (!config->num_ctxs)
+			continue;
+
+		drm_printf(p, "VF%u:\t%u-%u\t(%u)\n",
+			   n,
+			   config->begin_ctx,
+			   config->begin_ctx + config->num_ctxs - 1,
+			   config->num_ctxs);
+	}
+
+	mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+	return 0;
+}
+
+/**
+ * xe_gt_sriov_pf_config_print_dbs - Print GuC doorbell ID configurations.
+ * @gt: the &xe_gt
+ * @p: the &drm_printer
+ *
+ * Print GuC doorbell IDs allocations across all VFs.
+ * VFs without GuC doorbell IDs are skipped.
+ *
+ * This function can only be called on PF.
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_print_dbs(struct xe_gt *gt, struct drm_printer *p)
+{
+	unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(gt_to_xe(gt));
+	const struct xe_gt_sriov_config *config;
+
+	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+	mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+
+	for (n = 1; n <= total_vfs; n++) {
+		config = &gt->sriov.pf.vfs[n].config;
+		if (!config->num_dbs)
+			continue;
+
+		drm_printf(p, "VF%u:\t%u-%u\t(%u)\n",
+			   n,
+			   config->begin_db,
+			   config->begin_db + config->num_dbs - 1,
+			   config->num_dbs);
+	}
+
+	mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+	return 0;
+}
+
+/**
+ * xe_gt_sriov_pf_config_print_available_ggtt - Print available GGTT ranges.
+ * @gt: the &xe_gt
+ * @p: the &drm_printer
+ *
+ * Print GGTT ranges that are available for the provisioning.
+ *
+ * This function can only be called on PF.
+ */
+int xe_gt_sriov_pf_config_print_available_ggtt(struct xe_gt *gt, struct drm_printer *p)
+{
+	struct xe_ggtt *ggtt = gt_to_tile(gt)->mem.ggtt;
+	const struct drm_mm *mm = &ggtt->mm;
+	const struct drm_mm_node *entry;
+	u64 alignment = pf_get_ggtt_alignment(gt);
+	u64 hole_min_start = xe_wopcm_size(gt_to_xe(gt));
+	u64 hole_start, hole_end, hole_size;
+	u64 spare, avail, total = 0;
+	char buf[10];
+
+	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+
+	mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+
+	spare = pf_get_spare_ggtt(gt);
+
+	mutex_lock(&ggtt->lock);
+
+	drm_mm_for_each_hole(entry, mm, hole_start, hole_end) {
+		hole_start = max(hole_start, hole_min_start);
+		hole_start = ALIGN(hole_start, alignment);
+		hole_end = ALIGN_DOWN(hole_end, alignment);
+		if (hole_start >= hole_end)
+			continue;
+		hole_size = hole_end - hole_start;
+		total += hole_size;
+
+		string_get_size(hole_size, 1, STRING_UNITS_2, buf, sizeof(buf));
+		drm_printf(p, "range:\t%#llx-%#llx\t(%s)\n",
+			   hole_start, hole_end - 1, buf);
+	}
+
+	mutex_unlock(&ggtt->lock);
+	mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+	string_get_size(total, 1, STRING_UNITS_2, buf, sizeof(buf));
+	drm_printf(p, "total:\t%llu\t(%s)\n", total, buf);
+
+	string_get_size(spare, 1, STRING_UNITS_2, buf, sizeof(buf));
+	drm_printf(p, "spare:\t%llu\t(%s)\n", spare, buf);
+
+	avail = total > spare ? total - spare : 0;
+
+	string_get_size(avail, 1, STRING_UNITS_2, buf, sizeof(buf));
+	drm_printf(p, "avail:\t%llu\t(%s)\n", avail, buf);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
new file mode 100644
index 0000000000..5e6b36f00b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#ifndef _XE_GT_SRIOV_PF_CONFIG_H_
+#define _XE_GT_SRIOV_PF_CONFIG_H_
+
+#include <linux/types.h>
+
+struct drm_printer;
+struct xe_gt;
+
+u64 xe_gt_sriov_pf_config_get_ggtt(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_config_set_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size);
+int xe_gt_sriov_pf_config_set_fair_ggtt(struct xe_gt *gt,
+					unsigned int vfid, unsigned int num_vfs);
+int xe_gt_sriov_pf_config_bulk_set_ggtt(struct xe_gt *gt,
+					unsigned int vfid, unsigned int num_vfs, u64 size);
+
+u32 xe_gt_sriov_pf_config_get_ctxs(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_config_set_ctxs(struct xe_gt *gt, unsigned int vfid, u32 num_ctxs);
+int xe_gt_sriov_pf_config_set_fair_ctxs(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs);
+int xe_gt_sriov_pf_config_bulk_set_ctxs(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs,
+					u32 num_ctxs);
+
+u32 xe_gt_sriov_pf_config_get_dbs(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_config_set_dbs(struct xe_gt *gt, unsigned int vfid, u32 num_dbs);
+int xe_gt_sriov_pf_config_set_fair_dbs(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs);
+int xe_gt_sriov_pf_config_bulk_set_dbs(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs,
+				       u32 num_dbs);
+
+u64 xe_gt_sriov_pf_config_get_lmem(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_config_set_lmem(struct xe_gt *gt, unsigned int vfid, u64 size);
+int xe_gt_sriov_pf_config_set_fair_lmem(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs);
+int xe_gt_sriov_pf_config_bulk_set_lmem(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs,
+					u64 size);
+
+u32 xe_gt_sriov_pf_config_get_exec_quantum(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_config_set_exec_quantum(struct xe_gt *gt, unsigned int vfid, u32 exec_quantum);
+
+u32 xe_gt_sriov_pf_config_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_config_set_preempt_timeout(struct xe_gt *gt, unsigned int vfid,
+					      u32 preempt_timeout);
+
+int xe_gt_sriov_pf_config_set_fair(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs);
+int xe_gt_sriov_pf_config_release(struct xe_gt *gt, unsigned int vfid, bool force);
+int xe_gt_sriov_pf_config_push(struct xe_gt *gt, unsigned int vfid, bool refresh);
+
+int xe_gt_sriov_pf_config_print_ggtt(struct xe_gt *gt, struct drm_printer *p);
+int xe_gt_sriov_pf_config_print_ctxs(struct xe_gt *gt, struct drm_printer *p);
+int xe_gt_sriov_pf_config_print_dbs(struct xe_gt *gt, struct drm_printer *p);
+
+int xe_gt_sriov_pf_config_print_available_ggtt(struct xe_gt *gt, struct drm_printer *p);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h
new file mode 100644
index 0000000000..d3745c3559
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#ifndef _XE_GT_SRIOV_PF_CONFIG_TYPES_H_
+#define _XE_GT_SRIOV_PF_CONFIG_TYPES_H_
+
+#include <drm/drm_mm.h>
+
+struct xe_bo;
+
+/**
+ * struct xe_gt_sriov_config - GT level per-VF configuration data.
+ *
+ * Used by the PF driver to maintain per-VF provisioning data.
+ */
+struct xe_gt_sriov_config {
+	/** @ggtt_region: GGTT region assigned to the VF. */
+	struct drm_mm_node ggtt_region;
+	/** @lmem_obj: LMEM allocation for use by the VF. */
+	struct xe_bo *lmem_obj;
+	/** @num_ctxs: number of GuC contexts IDs.  */
+	u16 num_ctxs;
+	/** @begin_ctx: start index of GuC context ID range. */
+	u16 begin_ctx;
+	/** @num_dbs: number of GuC doorbells IDs. */
+	u16 num_dbs;
+	/** @begin_db: start index of GuC doorbell ID range. */
+	u16 begin_db;
+	/** @exec_quantum: execution-quantum in milliseconds. */
+	u32 exec_quantum;
+	/** @preempt_timeout: preemption timeout in microseconds. */
+	u32 preempt_timeout;
+};
+
+/**
+ * struct xe_gt_sriov_spare_config - GT-level PF spare configuration data.
+ *
+ * Used by the PF driver to maintain it's own reserved (spare) provisioning
+ * data that is not applicable to be tracked in struct xe_gt_sriov_config.
+ */
+struct xe_gt_sriov_spare_config {
+	/** @ggtt_size: GGTT size. */
+	u64 ggtt_size;
+	/** @lmem_size: LMEM size. */
+	u64 lmem_size;
+	/** @num_ctxs: number of GuC submission contexts. */
+	u16 num_ctxs;
+	/** @num_dbs: number of GuC doorbells. */
+	u16 num_dbs;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
new file mode 100644
index 0000000000..40b8f881fe
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
@@ -0,0 +1,257 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#include "abi/guc_actions_sriov_abi.h"
+
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_gt_sriov_pf_control.h"
+#include "xe_gt_sriov_printk.h"
+#include "xe_guc_ct.h"
+#include "xe_sriov.h"
+
+static const char *control_cmd_to_string(u32 cmd)
+{
+	switch (cmd) {
+	case GUC_PF_TRIGGER_VF_PAUSE:
+		return "PAUSE";
+	case GUC_PF_TRIGGER_VF_RESUME:
+		return "RESUME";
+	case GUC_PF_TRIGGER_VF_STOP:
+		return "STOP";
+	case GUC_PF_TRIGGER_VF_FLR_START:
+		return "FLR_START";
+	case GUC_PF_TRIGGER_VF_FLR_FINISH:
+		return "FLR_FINISH";
+	default:
+		return "<unknown>";
+	}
+}
+
+static int guc_action_vf_control_cmd(struct xe_guc *guc, u32 vfid, u32 cmd)
+{
+	u32 request[PF2GUC_VF_CONTROL_REQUEST_MSG_LEN] = {
+		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_PF2GUC_VF_CONTROL),
+		FIELD_PREP(PF2GUC_VF_CONTROL_REQUEST_MSG_1_VFID, vfid),
+		FIELD_PREP(PF2GUC_VF_CONTROL_REQUEST_MSG_2_COMMAND, cmd),
+	};
+	int ret;
+
+	/* XXX those two commands are now sent from the G2H handler */
+	if (cmd == GUC_PF_TRIGGER_VF_FLR_START || cmd == GUC_PF_TRIGGER_VF_FLR_FINISH)
+		return xe_guc_ct_send_g2h_handler(&guc->ct, request, ARRAY_SIZE(request));
+
+	ret = xe_guc_ct_send_block(&guc->ct, request, ARRAY_SIZE(request));
+	return ret > 0 ? -EPROTO : ret;
+}
+
+static int pf_send_vf_control_cmd(struct xe_gt *gt, unsigned int vfid, u32 cmd)
+{
+	int err;
+
+	xe_gt_assert(gt, vfid != PFID);
+
+	err = guc_action_vf_control_cmd(&gt->uc.guc, vfid, cmd);
+	if (unlikely(err))
+		xe_gt_sriov_err(gt, "VF%u control command %s failed (%pe)\n",
+				vfid, control_cmd_to_string(cmd), ERR_PTR(err));
+	return err;
+}
+
+static int pf_send_vf_pause(struct xe_gt *gt, unsigned int vfid)
+{
+	return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_PAUSE);
+}
+
+static int pf_send_vf_resume(struct xe_gt *gt, unsigned int vfid)
+{
+	return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_RESUME);
+}
+
+static int pf_send_vf_stop(struct xe_gt *gt, unsigned int vfid)
+{
+	return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_STOP);
+}
+
+static int pf_send_vf_flr_start(struct xe_gt *gt, unsigned int vfid)
+{
+	return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_FLR_START);
+}
+
+static int pf_send_vf_flr_finish(struct xe_gt *gt, unsigned int vfid)
+{
+	return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_FLR_FINISH);
+}
+
+/**
+ * xe_gt_sriov_pf_control_pause_vf - Pause a VF.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid)
+{
+	return pf_send_vf_pause(gt, vfid);
+}
+
+/**
+ * xe_gt_sriov_pf_control_resume_vf - Resume a VF.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid)
+{
+	return pf_send_vf_resume(gt, vfid);
+}
+
+/**
+ * xe_gt_sriov_pf_control_stop_vf - Stop a VF.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid)
+{
+	return pf_send_vf_stop(gt, vfid);
+}
+
+/**
+ * DOC: The VF FLR Flow with GuC
+ *
+ *          PF                        GUC             PCI
+ * ========================================================
+ *          |                          |               |
+ * (1)      |                         [ ] <----- FLR --|
+ *          |                         [ ]              :
+ * (2)     [ ] <-------- NOTIFY FLR --[ ]
+ *         [ ]                         |
+ * (3)     [ ]                         |
+ *         [ ]                         |
+ *         [ ]-- START FLR ---------> [ ]
+ *          |                         [ ]
+ * (4)      |                         [ ]
+ *          |                         [ ]
+ *         [ ] <--------- FLR DONE -- [ ]
+ *         [ ]                         |
+ * (5)     [ ]                         |
+ *         [ ]                         |
+ *         [ ]-- FINISH FLR --------> [ ]
+ *          |                          |
+ *
+ * Step 1: PCI HW generates interrupt to the GuC about VF FLR
+ * Step 2: GuC FW sends G2H notification to the PF about VF FLR
+ * Step 2a: on some platforms G2H is only received from root GuC
+ * Step 3: PF sends H2G request to the GuC to start VF FLR sequence
+ * Step 3a: on some platforms PF must send H2G to all other GuCs
+ * Step 4: GuC FW performs VF FLR cleanups and notifies the PF when done
+ * Step 5: PF performs VF FLR cleanups and notifies the GuC FW when finished
+ */
+
+static bool needs_dispatch_flr(struct xe_device *xe)
+{
+	return xe->info.platform == XE_PVC;
+}
+
+static void pf_handle_vf_flr(struct xe_gt *gt, u32 vfid)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_gt *gtit;
+	unsigned int gtid;
+
+	xe_gt_sriov_info(gt, "VF%u FLR\n", vfid);
+
+	if (needs_dispatch_flr(xe)) {
+		for_each_gt(gtit, xe, gtid)
+			pf_send_vf_flr_start(gtit, vfid);
+	} else {
+		pf_send_vf_flr_start(gt, vfid);
+	}
+}
+
+static void pf_handle_vf_flr_done(struct xe_gt *gt, u32 vfid)
+{
+	pf_send_vf_flr_finish(gt, vfid);
+}
+
+static int pf_handle_vf_event(struct xe_gt *gt, u32 vfid, u32 eventid)
+{
+	switch (eventid) {
+	case GUC_PF_NOTIFY_VF_FLR:
+		pf_handle_vf_flr(gt, vfid);
+		break;
+	case GUC_PF_NOTIFY_VF_FLR_DONE:
+		pf_handle_vf_flr_done(gt, vfid);
+		break;
+	case GUC_PF_NOTIFY_VF_PAUSE_DONE:
+		break;
+	case GUC_PF_NOTIFY_VF_FIXUP_DONE:
+		break;
+	default:
+		return -ENOPKG;
+	}
+	return 0;
+}
+
+static int pf_handle_pf_event(struct xe_gt *gt, u32 eventid)
+{
+	switch (eventid) {
+	case GUC_PF_NOTIFY_VF_ENABLE:
+		xe_gt_sriov_dbg_verbose(gt, "VFs %s/%s\n",
+					str_enabled_disabled(true),
+					str_enabled_disabled(false));
+		break;
+	default:
+		return -ENOPKG;
+	}
+	return 0;
+}
+
+/**
+ * xe_gt_sriov_pf_control_process_guc2pf - Handle VF state notification from GuC.
+ * @gt: the &xe_gt
+ * @msg: the G2H message
+ * @len: the length of the G2H message
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_control_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len)
+{
+	u32 vfid;
+	u32 eventid;
+
+	xe_gt_assert(gt, len);
+	xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_GUC);
+	xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]) == GUC_HXG_TYPE_EVENT);
+	xe_gt_assert(gt, FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[0]) ==
+		     GUC_ACTION_GUC2PF_VF_STATE_NOTIFY);
+
+	if (unlikely(!xe_device_is_sriov_pf(gt_to_xe(gt))))
+		return -EPROTO;
+
+	if (unlikely(FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_0_MBZ, msg[0])))
+		return -EPFNOSUPPORT;
+
+	if (unlikely(len != GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_LEN))
+		return -EPROTO;
+
+	vfid = FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_1_VFID, msg[1]);
+	eventid = FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_2_EVENT, msg[2]);
+
+	return vfid ? pf_handle_vf_event(gt, vfid, eventid) : pf_handle_pf_event(gt, eventid);
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h
new file mode 100644
index 0000000000..850a3e3766
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#ifndef _XE_GT_SRIOV_PF_CONTROL_H_
+#define _XE_GT_SRIOV_PF_CONTROL_H_
+
+#include <linux/errno.h>
+#include <linux/types.h>
+
+struct xe_gt;
+
+int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid);
+
+#ifdef CONFIG_PCI_IOV
+int xe_gt_sriov_pf_control_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len);
+#else
+static inline int xe_gt_sriov_pf_control_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len)
+{
+	return -EPROTO;
+}
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h
new file mode 100644
index 0000000000..0bf12d89ce
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#ifndef _XE_GT_SRIOV_PF_HELPERS_H_
+#define _XE_GT_SRIOV_PF_HELPERS_H_
+
+#include "xe_gt_types.h"
+#include "xe_sriov_pf_helpers.h"
+
+/**
+ * xe_gt_sriov_pf_assert_vfid() - warn if &id is not a supported VF number when debugging.
+ * @gt: the PF &xe_gt to assert on
+ * @vfid: the VF number to assert
+ *
+ * Assert that &gt belongs to the Physical Function (PF) device and provided &vfid
+ * is within a range of supported VF numbers (up to maximum number of VFs that
+ * driver can support, including VF0 that represents the PF itself).
+ *
+ * Note: Effective only on debug builds. See `Xe ASSERTs`_ for more information.
+ */
+#define xe_gt_sriov_pf_assert_vfid(gt, vfid)	xe_sriov_pf_assert_vfid(gt_to_xe(gt), (vfid))
+
+static inline int xe_gt_sriov_pf_get_totalvfs(struct xe_gt *gt)
+{
+	return xe_sriov_pf_get_totalvfs(gt_to_xe(gt));
+}
+
+static inline struct mutex *xe_gt_sriov_pf_master_mutex(struct xe_gt *gt)
+{
+	return xe_sriov_pf_master_mutex(gt_to_xe(gt));
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c
new file mode 100644
index 0000000000..fae5be5a2a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c
@@ -0,0 +1,418 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#include "abi/guc_actions_sriov_abi.h"
+
+#include "xe_bo.h"
+#include "xe_gt.h"
+#include "xe_gt_sriov_pf_helpers.h"
+#include "xe_gt_sriov_pf_policy.h"
+#include "xe_gt_sriov_printk.h"
+#include "xe_guc_ct.h"
+#include "xe_guc_klv_helpers.h"
+#include "xe_pm.h"
+
+/*
+ * Return: number of KLVs that were successfully parsed and saved,
+ *         negative error code on failure.
+ */
+static int guc_action_update_vgt_policy(struct xe_guc *guc, u64 addr, u32 size)
+{
+	u32 request[] = {
+		GUC_ACTION_PF2GUC_UPDATE_VGT_POLICY,
+		lower_32_bits(addr),
+		upper_32_bits(addr),
+		size,
+	};
+
+	return xe_guc_ct_send_block(&guc->ct, request, ARRAY_SIZE(request));
+}
+
+/*
+ * Return: number of KLVs that were successfully parsed and saved,
+ *         negative error code on failure.
+ */
+static int pf_send_policy_klvs(struct xe_gt *gt, const u32 *klvs, u32 num_dwords)
+{
+	const u32 bytes = num_dwords * sizeof(u32);
+	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_device *xe = tile_to_xe(tile);
+	struct xe_guc *guc = &gt->uc.guc;
+	struct xe_bo *bo;
+	int ret;
+
+	bo = xe_bo_create_pin_map(xe, tile, NULL,
+				  ALIGN(bytes, PAGE_SIZE),
+				  ttm_bo_type_kernel,
+				  XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+				  XE_BO_FLAG_GGTT);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	xe_map_memcpy_to(xe, &bo->vmap, 0, klvs, bytes);
+
+	ret = guc_action_update_vgt_policy(guc, xe_bo_ggtt_addr(bo), num_dwords);
+
+	xe_bo_unpin_map_no_vm(bo);
+
+	return ret;
+}
+
+/*
+ * Return: 0 on success, -ENOKEY if some KLVs were not updated, -EPROTO if reply was malformed,
+ *         negative error code on failure.
+ */
+static int pf_push_policy_klvs(struct xe_gt *gt, u32 num_klvs,
+			       const u32 *klvs, u32 num_dwords)
+{
+	int ret;
+
+	xe_gt_assert(gt, num_klvs == xe_guc_klv_count(klvs, num_dwords));
+
+	ret = pf_send_policy_klvs(gt, klvs, num_dwords);
+
+	if (ret != num_klvs) {
+		int err = ret < 0 ? ret : ret < num_klvs ? -ENOKEY : -EPROTO;
+		struct drm_printer p = xe_gt_info_printer(gt);
+
+		xe_gt_sriov_notice(gt, "Failed to push %u policy KLV%s (%pe)\n",
+				   num_klvs, str_plural(num_klvs), ERR_PTR(err));
+		xe_guc_klv_print(klvs, num_dwords, &p);
+		return err;
+	}
+
+	return 0;
+}
+
+static int pf_push_policy_u32(struct xe_gt *gt, u16 key, u32 value)
+{
+	u32 klv[] = {
+		PREP_GUC_KLV(key, 1),
+		value,
+	};
+
+	return pf_push_policy_klvs(gt, 1, klv, ARRAY_SIZE(klv));
+}
+
+static int pf_update_policy_bool(struct xe_gt *gt, u16 key, bool *policy, bool value)
+{
+	int err;
+
+	err = pf_push_policy_u32(gt, key, value);
+	if (unlikely(err)) {
+		xe_gt_sriov_notice(gt, "Failed to update policy %#x '%s' to '%s' (%pe)\n",
+				   key, xe_guc_klv_key_to_string(key),
+				   str_enabled_disabled(value), ERR_PTR(err));
+		return err;
+	}
+
+	xe_gt_sriov_dbg(gt, "policy key %#x '%s' updated to '%s'\n",
+			key, xe_guc_klv_key_to_string(key),
+			str_enabled_disabled(value));
+
+	*policy = value;
+	return 0;
+}
+
+static int pf_update_policy_u32(struct xe_gt *gt, u16 key, u32 *policy, u32 value)
+{
+	int err;
+
+	err = pf_push_policy_u32(gt, key, value);
+	if (unlikely(err)) {
+		xe_gt_sriov_notice(gt, "Failed to update policy %#x '%s' to '%s' (%pe)\n",
+				   key, xe_guc_klv_key_to_string(key),
+				   str_enabled_disabled(value), ERR_PTR(err));
+		return err;
+	}
+
+	xe_gt_sriov_dbg(gt, "policy key %#x '%s' updated to %u\n",
+			key, xe_guc_klv_key_to_string(key), value);
+
+	*policy = value;
+	return 0;
+}
+
+static int pf_provision_sched_if_idle(struct xe_gt *gt, bool enable)
+{
+	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+	lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+	return pf_update_policy_bool(gt, GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_KEY,
+				     &gt->sriov.pf.policy.guc.sched_if_idle,
+				     enable);
+}
+
+static int pf_reprovision_sched_if_idle(struct xe_gt *gt)
+{
+	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+	lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+	return pf_provision_sched_if_idle(gt, gt->sriov.pf.policy.guc.sched_if_idle);
+}
+
+static void pf_sanitize_sched_if_idle(struct xe_gt *gt)
+{
+	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+	lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+	gt->sriov.pf.policy.guc.sched_if_idle = false;
+}
+
+/**
+ * xe_gt_sriov_pf_policy_set_sched_if_idle - Control the 'sched_if_idle' policy.
+ * @gt: the &xe_gt where to apply the policy
+ * @enable: the value of the 'sched_if_idle' policy
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_policy_set_sched_if_idle(struct xe_gt *gt, bool enable)
+{
+	int err;
+
+	mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+	err = pf_provision_sched_if_idle(gt, enable);
+	mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+	return err;
+}
+
+/**
+ * xe_gt_sriov_pf_policy_get_sched_if_idle - Retrieve value of 'sched_if_idle' policy.
+ * @gt: the &xe_gt where to read the policy from
+ *
+ * This function can only be called on PF.
+ *
+ * Return: value of 'sched_if_idle' policy.
+ */
+bool xe_gt_sriov_pf_policy_get_sched_if_idle(struct xe_gt *gt)
+{
+	bool enable;
+
+	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+
+	mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+	enable = gt->sriov.pf.policy.guc.sched_if_idle;
+	mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+	return enable;
+}
+
+static int pf_provision_reset_engine(struct xe_gt *gt, bool enable)
+{
+	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+	lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+	return pf_update_policy_bool(gt, GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH_KEY,
+				     &gt->sriov.pf.policy.guc.reset_engine, enable);
+}
+
+static int pf_reprovision_reset_engine(struct xe_gt *gt)
+{
+	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+	lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+	return pf_provision_reset_engine(gt, gt->sriov.pf.policy.guc.reset_engine);
+}
+
+static void pf_sanitize_reset_engine(struct xe_gt *gt)
+{
+	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+	lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+	gt->sriov.pf.policy.guc.reset_engine = false;
+}
+
+/**
+ * xe_gt_sriov_pf_policy_set_reset_engine - Control the 'reset_engine' policy.
+ * @gt: the &xe_gt where to apply the policy
+ * @enable: the value of the 'reset_engine' policy
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_policy_set_reset_engine(struct xe_gt *gt, bool enable)
+{
+	int err;
+
+	mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+	err = pf_provision_reset_engine(gt, enable);
+	mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+	return err;
+}
+
+/**
+ * xe_gt_sriov_pf_policy_get_reset_engine - Retrieve value of 'reset_engine' policy.
+ * @gt: the &xe_gt where to read the policy from
+ *
+ * This function can only be called on PF.
+ *
+ * Return: value of 'reset_engine' policy.
+ */
+bool xe_gt_sriov_pf_policy_get_reset_engine(struct xe_gt *gt)
+{
+	bool enable;
+
+	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+
+	mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+	enable = gt->sriov.pf.policy.guc.reset_engine;
+	mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+	return enable;
+}
+
+static int pf_provision_sample_period(struct xe_gt *gt, u32 value)
+{
+	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+	lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+	return pf_update_policy_u32(gt, GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD_KEY,
+				    &gt->sriov.pf.policy.guc.sample_period, value);
+}
+
+static int pf_reprovision_sample_period(struct xe_gt *gt)
+{
+	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+	lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+	return pf_provision_sample_period(gt, gt->sriov.pf.policy.guc.sample_period);
+}
+
+static void pf_sanitize_sample_period(struct xe_gt *gt)
+{
+	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+	lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+	gt->sriov.pf.policy.guc.sample_period = 0;
+}
+
+/**
+ * xe_gt_sriov_pf_policy_set_sample_period - Control the 'sample_period' policy.
+ * @gt: the &xe_gt where to apply the policy
+ * @value: the value of the 'sample_period' policy
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_policy_set_sample_period(struct xe_gt *gt, u32 value)
+{
+	int err;
+
+	mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+	err = pf_provision_sample_period(gt, value);
+	mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+	return err;
+}
+
+/**
+ * xe_gt_sriov_pf_policy_get_sample_period - Retrieve value of 'sample_period' policy.
+ * @gt: the &xe_gt where to read the policy from
+ *
+ * This function can only be called on PF.
+ *
+ * Return: value of 'sample_period' policy.
+ */
+u32 xe_gt_sriov_pf_policy_get_sample_period(struct xe_gt *gt)
+{
+	u32 value;
+
+	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+
+	mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+	value = gt->sriov.pf.policy.guc.sample_period;
+	mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+	return value;
+}
+
+static void pf_sanitize_guc_policies(struct xe_gt *gt)
+{
+	pf_sanitize_sched_if_idle(gt);
+	pf_sanitize_reset_engine(gt);
+	pf_sanitize_sample_period(gt);
+}
+
+/**
+ * xe_gt_sriov_pf_policy_sanitize - Reset policy settings.
+ * @gt: the &xe_gt
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+void xe_gt_sriov_pf_policy_sanitize(struct xe_gt *gt)
+{
+	mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+	pf_sanitize_guc_policies(gt);
+	mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+}
+
+/**
+ * xe_gt_sriov_pf_policy_reprovision - Reprovision (and optionally reset) policy settings.
+ * @gt: the &xe_gt
+ * @reset: if true will reprovision using default values instead of latest
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_policy_reprovision(struct xe_gt *gt, bool reset)
+{
+	int err = 0;
+
+	xe_pm_runtime_get_noresume(gt_to_xe(gt));
+
+	mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+	if (reset)
+		pf_sanitize_guc_policies(gt);
+	err |= pf_reprovision_sched_if_idle(gt);
+	err |= pf_reprovision_reset_engine(gt);
+	err |= pf_reprovision_sample_period(gt);
+	mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+	xe_pm_runtime_put(gt_to_xe(gt));
+
+	return err ? -ENXIO : 0;
+}
+
+static void print_guc_policies(struct drm_printer *p, struct xe_gt_sriov_guc_policies *policy)
+{
+	drm_printf(p, "%s:\t%s\n",
+		   xe_guc_klv_key_to_string(GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_KEY),
+		   str_enabled_disabled(policy->sched_if_idle));
+	drm_printf(p, "%s:\t%s\n",
+		   xe_guc_klv_key_to_string(GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH_KEY),
+		   str_enabled_disabled(policy->reset_engine));
+	drm_printf(p, "%s:\t%u %s\n",
+		   xe_guc_klv_key_to_string(GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD_KEY),
+		   policy->sample_period, policy->sample_period ? "ms" : "(disabled)");
+}
+
+/**
+ * xe_gt_sriov_pf_policy_print - Dump actual policy values.
+ * @gt: the &xe_gt where to read the policy from
+ * @p: the &drm_printer
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_policy_print(struct xe_gt *gt, struct drm_printer *p)
+{
+	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+
+	mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+	print_guc_policies(p, &gt->sriov.pf.policy.guc);
+	mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.h
new file mode 100644
index 0000000000..2a5dc33dc6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#ifndef _XE_GT_SRIOV_PF_POLICY_H_
+#define _XE_GT_SRIOV_PF_POLICY_H_
+
+#include <linux/types.h>
+
+struct drm_printer;
+struct xe_gt;
+
+int xe_gt_sriov_pf_policy_set_sched_if_idle(struct xe_gt *gt, bool enable);
+bool xe_gt_sriov_pf_policy_get_sched_if_idle(struct xe_gt *gt);
+int xe_gt_sriov_pf_policy_set_reset_engine(struct xe_gt *gt, bool enable);
+bool xe_gt_sriov_pf_policy_get_reset_engine(struct xe_gt *gt);
+int xe_gt_sriov_pf_policy_set_sample_period(struct xe_gt *gt, u32 value);
+u32 xe_gt_sriov_pf_policy_get_sample_period(struct xe_gt *gt);
+
+void xe_gt_sriov_pf_policy_sanitize(struct xe_gt *gt);
+int xe_gt_sriov_pf_policy_reprovision(struct xe_gt *gt, bool reset);
+int xe_gt_sriov_pf_policy_print(struct xe_gt *gt, struct drm_printer *p);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy_types.h
new file mode 100644
index 0000000000..4de532af13
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy_types.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#ifndef _XE_GT_SRIOV_PF_POLICY_TYPES_H_
+#define _XE_GT_SRIOV_PF_POLICY_TYPES_H_
+
+#include <linux/types.h>
+
+/**
+ * struct xe_gt_sriov_guc_policies - GuC SR-IOV policies.
+ * @sched_if_idle: controls strict scheduling policy.
+ * @reset_engine: controls engines reset on VF switch policy.
+ * @sample_period: adverse events sampling period (in milliseconds).
+ */
+struct xe_gt_sriov_guc_policies {
+	bool sched_if_idle;
+	bool reset_engine;
+	u32 sample_period;
+};
+
+/**
+ * struct xe_gt_sriov_pf_policy - PF policy data.
+ * @guc: GuC scheduling policies.
+ */
+struct xe_gt_sriov_pf_policy {
+	struct xe_gt_sriov_guc_policies guc;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h
new file mode 100644
index 0000000000..faf9ee8266
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#ifndef _XE_GT_SRIOV_PF_TYPES_H_
+#define _XE_GT_SRIOV_PF_TYPES_H_
+
+#include <linux/types.h>
+
+#include "xe_gt_sriov_pf_config_types.h"
+#include "xe_gt_sriov_pf_policy_types.h"
+
+/**
+ * struct xe_gt_sriov_metadata - GT level per-VF metadata.
+ */
+struct xe_gt_sriov_metadata {
+	/** @config: per-VF provisioning data. */
+	struct xe_gt_sriov_config config;
+};
+
+/**
+ * struct xe_gt_sriov_pf - GT level PF virtualization data.
+ * @policy: policy data.
+ * @spare: PF-only provisioning configuration.
+ * @vfs: metadata for all VFs.
+ */
+struct xe_gt_sriov_pf {
+	struct xe_gt_sriov_pf_policy policy;
+	struct xe_gt_sriov_spare_config spare;
+	struct xe_gt_sriov_metadata *vfs;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_printk.h b/drivers/gpu/drm/xe/xe_gt_sriov_printk.h
new file mode 100644
index 0000000000..17624b1630
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_printk.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GT_SRIOV_PRINTK_H_
+#define _XE_GT_SRIOV_PRINTK_H_
+
+#include "xe_gt_printk.h"
+#include "xe_sriov_printk.h"
+
+#define __xe_gt_sriov_printk(gt, _level, fmt, ...) \
+	xe_gt_printk((gt), _level, "%s" fmt, xe_sriov_printk_prefix(gt_to_xe(gt)), ##__VA_ARGS__)
+
+#define xe_gt_sriov_err(_gt, _fmt, ...) \
+	__xe_gt_sriov_printk(_gt, err, _fmt, ##__VA_ARGS__)
+
+#define xe_gt_sriov_notice(_gt, _fmt, ...) \
+	__xe_gt_sriov_printk(_gt, notice, _fmt, ##__VA_ARGS__)
+
+#define xe_gt_sriov_info(_gt, _fmt, ...) \
+	__xe_gt_sriov_printk(_gt, info, _fmt, ##__VA_ARGS__)
+
+#define xe_gt_sriov_dbg(_gt, _fmt, ...) \
+	__xe_gt_sriov_printk(_gt, dbg, _fmt, ##__VA_ARGS__)
+
+/* for low level noisy debug messages */
+#ifdef CONFIG_DRM_XE_DEBUG_SRIOV
+#define xe_gt_sriov_dbg_verbose(_gt, _fmt, ...) xe_gt_sriov_dbg(_gt, _fmt, ##__VA_ARGS__)
+#else
+#define xe_gt_sriov_dbg_verbose(_gt, _fmt, ...) typecheck(struct xe_gt *, (_gt))
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.c b/drivers/gpu/drm/xe/xe_gt_sysfs.c
index c69d2e8a0f..1e5971072b 100644
--- a/drivers/gpu/drm/xe/xe_gt_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_sysfs.c
@@ -29,7 +29,7 @@ static void gt_sysfs_fini(struct drm_device *drm, void *arg)
 	kobject_put(gt->sysfs);
 }
 
-void xe_gt_sysfs_init(struct xe_gt *gt)
+int xe_gt_sysfs_init(struct xe_gt *gt)
 {
 	struct xe_tile *tile = gt_to_tile(gt);
 	struct xe_device *xe = gt_to_xe(gt);
@@ -38,24 +38,18 @@ void xe_gt_sysfs_init(struct xe_gt *gt)
 
 	kg = kzalloc(sizeof(*kg), GFP_KERNEL);
 	if (!kg)
-		return;
+		return -ENOMEM;
 
 	kobject_init(&kg->base, &xe_gt_sysfs_kobj_type);
 	kg->gt = gt;
 
 	err = kobject_add(&kg->base, tile->sysfs, "gt%d", gt->info.id);
 	if (err) {
-		drm_warn(&xe->drm, "failed to add GT sysfs directory, err: %d\n", err);
 		kobject_put(&kg->base);
-		return;
+		return err;
 	}
 
 	gt->sysfs = &kg->base;
 
-	err = drmm_add_action_or_reset(&xe->drm, gt_sysfs_fini, gt);
-	if (err) {
-		drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
-			 __func__, err);
-		return;
-	}
+	return drmm_add_action_or_reset(&xe->drm, gt_sysfs_fini, gt);
 }
diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.h b/drivers/gpu/drm/xe/xe_gt_sysfs.h
index e3ec278ca0..ecbfcc5c7d 100644
--- a/drivers/gpu/drm/xe/xe_gt_sysfs.h
+++ b/drivers/gpu/drm/xe/xe_gt_sysfs.h
@@ -8,7 +8,7 @@
 
 #include "xe_gt_sysfs_types.h"
 
-void xe_gt_sysfs_init(struct xe_gt *gt);
+int xe_gt_sysfs_init(struct xe_gt *gt);
 
 static inline struct xe_gt *
 kobj_to_gt(struct kobject *kobj)
diff --git a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c
index 63d640591a..fbe21a8599 100644
--- a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c
@@ -11,6 +11,7 @@
 #include "xe_gt_sysfs.h"
 #include "xe_gt_throttle_sysfs.h"
 #include "xe_mmio.h"
+#include "xe_pm.h"
 
 /**
  * DOC: Xe GT Throttle
@@ -38,10 +39,12 @@ static u32 read_perf_limit_reasons(struct xe_gt *gt)
 {
 	u32 reg;
 
+	xe_pm_runtime_get(gt_to_xe(gt));
 	if (xe_gt_is_media_type(gt))
 		reg = xe_mmio_read32(gt, MTL_MEDIA_PERF_LIMIT_REASONS);
 	else
 		reg = xe_mmio_read32(gt, GT0_PERF_LIMIT_REASONS);
+	xe_pm_runtime_put(gt_to_xe(gt));
 
 	return reg;
 }
@@ -233,19 +236,14 @@ static void gt_throttle_sysfs_fini(struct drm_device *drm, void *arg)
 	sysfs_remove_group(gt->freq, &throttle_group_attrs);
 }
 
-void xe_gt_throttle_sysfs_init(struct xe_gt *gt)
+int xe_gt_throttle_sysfs_init(struct xe_gt *gt)
 {
 	struct xe_device *xe = gt_to_xe(gt);
 	int err;
 
 	err = sysfs_create_group(gt->freq, &throttle_group_attrs);
-	if (err) {
-		drm_warn(&xe->drm, "failed to register throttle sysfs, err: %d\n", err);
-		return;
-	}
-
-	err = drmm_add_action_or_reset(&xe->drm, gt_throttle_sysfs_fini, gt);
 	if (err)
-		drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
-			 __func__, err);
+		return err;
+
+	return drmm_add_action_or_reset(&xe->drm, gt_throttle_sysfs_fini, gt);
 }
diff --git a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h
index 3ecfd4beff..6c61e6f228 100644
--- a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h
+++ b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h
@@ -10,7 +10,7 @@
 
 struct xe_gt;
 
-void xe_gt_throttle_sysfs_init(struct xe_gt *gt);
+int xe_gt_throttle_sysfs_init(struct xe_gt *gt);
 
 #endif /* _XE_GT_THROTTLE_SYSFS_H_ */
 
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index f4c485289d..93df2d7969 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -8,9 +8,12 @@
 #include "abi/guc_actions_abi.h"
 #include "xe_device.h"
 #include "xe_gt.h"
+#include "xe_gt_printk.h"
 #include "xe_guc.h"
 #include "xe_guc_ct.h"
+#include "xe_mmio.h"
 #include "xe_trace.h"
+#include "regs/xe_guc_regs.h"
 
 #define TLB_TIMEOUT	(HZ / 4)
 
@@ -30,8 +33,8 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work)
 			break;
 
 		trace_xe_gt_tlb_invalidation_fence_timeout(fence);
-		drm_err(&gt_to_xe(gt)->drm, "gt%d: TLB invalidation fence timeout, seqno=%d recv=%d",
-			gt->info.id, fence->seqno, gt->tlb_invalidation.seqno_recv);
+		xe_gt_err(gt, "TLB invalidation fence timeout, seqno=%d recv=%d",
+			  fence->seqno, gt->tlb_invalidation.seqno_recv);
 
 		list_del(&fence->link);
 		fence->base.error = -ETIME;
@@ -60,7 +63,6 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
 	INIT_LIST_HEAD(&gt->tlb_invalidation.pending_fences);
 	spin_lock_init(&gt->tlb_invalidation.pending_lock);
 	spin_lock_init(&gt->tlb_invalidation.lock);
-	gt->tlb_invalidation.fence_context = dma_fence_context_alloc(1);
 	INIT_DELAYED_WORK(&gt->tlb_invalidation.fence_tdr,
 			  xe_gt_tlb_fence_timeout);
 
@@ -209,7 +211,7 @@ static int send_tlb_invalidation(struct xe_guc *guc,
  * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
  * negative error code on error.
  */
-int xe_gt_tlb_invalidation_guc(struct xe_gt *gt)
+static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt)
 {
 	u32 action[] = {
 		XE_GUC_ACTION_TLB_INVALIDATION,
@@ -222,6 +224,45 @@ int xe_gt_tlb_invalidation_guc(struct xe_gt *gt)
 }
 
 /**
+ * xe_gt_tlb_invalidation_ggtt - Issue a TLB invalidation on this GT for the GGTT
+ * @gt: graphics tile
+ *
+ * Issue a TLB invalidation for the GGTT. Completion of TLB invalidation is
+ * synchronous.
+ *
+ * Return: 0 on success, negative error code on error
+ */
+int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+
+	if (xe_guc_ct_enabled(&gt->uc.guc.ct) &&
+	    gt->uc.guc.submission_state.enabled) {
+		int seqno;
+
+		seqno = xe_gt_tlb_invalidation_guc(gt);
+		if (seqno <= 0)
+			return seqno;
+
+		xe_gt_tlb_invalidation_wait(gt, seqno);
+	} else if (xe_device_uc_enabled(xe)) {
+		xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GT));
+		if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) {
+			xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1,
+					PVC_GUC_TLB_INV_DESC1_INVALIDATE);
+			xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC0,
+					PVC_GUC_TLB_INV_DESC0_VALID);
+		} else {
+			xe_mmio_write32(gt, GUC_TLB_INV_CR,
+					GUC_TLB_INV_CR_INVALIDATE);
+		}
+		xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+	}
+
+	return 0;
+}
+
+/**
  * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA
  * @gt: graphics tile
  * @fence: invalidation fence which will be signal on TLB invalidation
@@ -320,9 +361,7 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
  */
 int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno)
 {
-	struct xe_device *xe = gt_to_xe(gt);
 	struct xe_guc *guc = &gt->uc.guc;
-	struct drm_printer p = drm_err_printer(__func__);
 	int ret;
 
 	/* Execlists not supported */
@@ -337,8 +376,10 @@ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno)
 				 tlb_invalidation_seqno_past(gt, seqno),
 				 TLB_TIMEOUT);
 	if (!ret) {
-		drm_err(&xe->drm, "gt%d: TLB invalidation time'd out, seqno=%d, recv=%d\n",
-			gt->info.id, seqno, gt->tlb_invalidation.seqno_recv);
+		struct drm_printer p = xe_gt_err_printer(gt);
+
+		xe_gt_err(gt, "TLB invalidation time'd out, seqno=%d, recv=%d\n",
+			  seqno, gt->tlb_invalidation.seqno_recv);
 		xe_guc_ct_print(&guc->ct, &p, true);
 		return -ETIME;
 	}
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
index b333c17093..fbb743d80d 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
@@ -16,7 +16,7 @@ struct xe_vma;
 
 int xe_gt_tlb_invalidation_init(struct xe_gt *gt);
 void xe_gt_tlb_invalidation_reset(struct xe_gt *gt);
-int xe_gt_tlb_invalidation_guc(struct xe_gt *gt);
+int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt);
 int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
 			       struct xe_gt_tlb_invalidation_fence *fence,
 			       struct xe_vma *vma);
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
index a8d7f272c3..3733e7a686 100644
--- a/drivers/gpu/drm/xe/xe_gt_topology.c
+++ b/drivers/gpu/drm/xe/xe_gt_topology.c
@@ -8,12 +8,10 @@
 #include <linux/bitmap.h>
 
 #include "regs/xe_gt_regs.h"
+#include "xe_assert.h"
 #include "xe_gt.h"
 #include "xe_mmio.h"
 
-#define XE_MAX_DSS_FUSE_BITS (32 * XE_MAX_DSS_FUSE_REGS)
-#define XE_MAX_EU_FUSE_BITS (32 * XE_MAX_EU_FUSE_REGS)
-
 static void
 load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...)
 {
@@ -62,6 +60,114 @@ load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask)
 	bitmap_from_arr32(mask, &val, XE_MAX_EU_FUSE_BITS);
 }
 
+/**
+ * gen_l3_mask_from_pattern - Replicate a bit pattern according to a mask
+ *
+ * It is used to compute the L3 bank masks in a generic format on
+ * various platforms where the internal representation of L3 node
+ * and masks from registers are different.
+ *
+ * @xe: device
+ * @dst: destination
+ * @pattern: pattern to replicate
+ * @patternbits: size of the pattern, in bits
+ * @mask: mask describing where to replicate the pattern
+ *
+ * Example 1:
+ * ----------
+ * @pattern =    0b1111
+ *                 └┬─┘
+ * @patternbits =   4 (bits)
+ * @mask = 0b0101
+ *           ││││
+ *           │││└────────────────── 0b1111 (=1×0b1111)
+ *           ││└──────────── 0b0000    │   (=0×0b1111)
+ *           │└────── 0b1111    │      │   (=1×0b1111)
+ *           └ 0b0000    │      │      │   (=0×0b1111)
+ *                │      │      │      │
+ * @dst =      0b0000 0b1111 0b0000 0b1111
+ *
+ * Example 2:
+ * ----------
+ * @pattern =    0b11111111
+ *                 └┬─────┘
+ * @patternbits =   8 (bits)
+ * @mask = 0b10
+ *           ││
+ *           ││
+ *           ││
+ *           │└────────── 0b00000000 (=0×0b11111111)
+ *           └ 0b11111111      │     (=1×0b11111111)
+ *                  │          │
+ * @dst =      0b11111111 0b00000000
+ */
+static void
+gen_l3_mask_from_pattern(struct xe_device *xe, xe_l3_bank_mask_t dst,
+			 xe_l3_bank_mask_t pattern, int patternbits,
+			 unsigned long mask)
+{
+	unsigned long bit;
+
+	xe_assert(xe, fls(mask) <= patternbits);
+	for_each_set_bit(bit, &mask, 32) {
+		xe_l3_bank_mask_t shifted_pattern = {};
+
+		bitmap_shift_left(shifted_pattern, pattern, bit * patternbits,
+				  XE_MAX_L3_BANK_MASK_BITS);
+		bitmap_or(dst, dst, shifted_pattern, XE_MAX_L3_BANK_MASK_BITS);
+	}
+}
+
+static void
+load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	u32 fuse3 = xe_mmio_read32(gt, MIRROR_FUSE3);
+
+	if (GRAPHICS_VER(xe) >= 20) {
+		xe_l3_bank_mask_t per_node = {};
+		u32 meml3_en = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, fuse3);
+		u32 bank_val = REG_FIELD_GET(XE2_GT_L3_MODE_MASK, fuse3);
+
+		bitmap_from_arr32(per_node, &bank_val, 32);
+		gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 4,
+					 meml3_en);
+	} else if (GRAPHICS_VERx100(xe) >= 1270) {
+		xe_l3_bank_mask_t per_node = {};
+		xe_l3_bank_mask_t per_mask_bit = {};
+		u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
+		u32 fuse4 = xe_mmio_read32(gt, XEHP_FUSE4);
+		u32 bank_val = REG_FIELD_GET(GT_L3_EXC_MASK, fuse4);
+
+		bitmap_set_value8(per_mask_bit, 0x3, 0);
+		gen_l3_mask_from_pattern(xe, per_node, per_mask_bit, 2, bank_val);
+		gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 4,
+					 meml3_en);
+	} else if (xe->info.platform == XE_PVC) {
+		xe_l3_bank_mask_t per_node = {};
+		xe_l3_bank_mask_t per_mask_bit = {};
+		u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
+		u32 bank_val = REG_FIELD_GET(XEHPC_GT_L3_MODE_MASK, fuse3);
+
+		bitmap_set_value8(per_mask_bit, 0xf, 0);
+		gen_l3_mask_from_pattern(xe, per_node, per_mask_bit, 4,
+					 bank_val);
+		gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 16,
+					 meml3_en);
+	} else if (xe->info.platform == XE_DG2) {
+		xe_l3_bank_mask_t per_node = {};
+		u32 mask = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
+
+		bitmap_set_value8(per_node, 0xff, 0);
+		gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 8, mask);
+	} else {
+		/* 1:1 register bit to mask bit (inverted register bits) */
+		u32 mask = REG_FIELD_GET(XELP_GT_L3_MODE_MASK, ~fuse3);
+
+		bitmap_from_arr32(l3_bank_mask, &mask, 32);
+	}
+}
+
 static void
 get_num_dss_regs(struct xe_device *xe, int *geometry_regs, int *compute_regs)
 {
@@ -84,7 +190,7 @@ void
 xe_gt_topology_init(struct xe_gt *gt)
 {
 	struct xe_device *xe = gt_to_xe(gt);
-	struct drm_printer p = drm_debug_printer("GT topology");
+	struct drm_printer p;
 	int num_geometry_regs, num_compute_regs;
 
 	get_num_dss_regs(xe, &num_geometry_regs, &num_compute_regs);
@@ -106,6 +212,9 @@ xe_gt_topology_init(struct xe_gt *gt)
 		      XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,
 		      XE2_GT_COMPUTE_DSS_2);
 	load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss);
+	load_l3_bank_mask(gt, gt->fuse_topo.l3_bank_mask);
+
+	p = drm_dbg_printer(&gt_to_xe(gt)->drm, DRM_UT_DRIVER, "GT topology");
 
 	xe_gt_topology_dump(gt, &p);
 }
@@ -121,6 +230,8 @@ xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p)
 	drm_printf(p, "EU mask per DSS:     %*pb\n", XE_MAX_EU_FUSE_BITS,
 		   gt->fuse_topo.eu_mask_per_dss);
 
+	drm_printf(p, "L3 bank mask:        %*pb\n", XE_MAX_L3_BANK_MASK_BITS,
+		   gt->fuse_topo.l3_bank_mask);
 }
 
 /*
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h
index d1b54fb52e..b3e357777a 100644
--- a/drivers/gpu/drm/xe/xe_gt_topology.h
+++ b/drivers/gpu/drm/xe/xe_gt_topology.h
@@ -8,6 +8,17 @@
 
 #include "xe_gt_types.h"
 
+/*
+ * Loop over each DSS with the bit is 1 in geometry or compute mask
+ * @dss: iterated DSS bit from the DSS mask
+ * @gt: GT structure
+ */
+#define for_each_dss(dss, gt) \
+	for_each_or_bit((dss), \
+			(gt)->fuse_topo.g_dss_mask, \
+			(gt)->fuse_topo.c_dss_mask, \
+			XE_MAX_DSS_FUSE_BITS)
+
 struct drm_printer;
 
 void xe_gt_topology_init(struct xe_gt *gt);
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index f746846604..cfdc761ff7 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -8,6 +8,7 @@
 
 #include "xe_force_wake_types.h"
 #include "xe_gt_idle_types.h"
+#include "xe_gt_sriov_pf_types.h"
 #include "xe_hw_engine_types.h"
 #include "xe_hw_fence_types.h"
 #include "xe_reg_sr_types.h"
@@ -24,11 +25,15 @@ enum xe_gt_type {
 	XE_GT_TYPE_MEDIA,
 };
 
-#define XE_MAX_DSS_FUSE_REGS	3
-#define XE_MAX_EU_FUSE_REGS	1
+#define XE_MAX_DSS_FUSE_REGS		3
+#define XE_MAX_DSS_FUSE_BITS		(32 * XE_MAX_DSS_FUSE_REGS)
+#define XE_MAX_EU_FUSE_REGS		1
+#define XE_MAX_EU_FUSE_BITS		(32 * XE_MAX_EU_FUSE_REGS)
+#define XE_MAX_L3_BANK_MASK_BITS	64
 
-typedef unsigned long xe_dss_mask_t[BITS_TO_LONGS(32 * XE_MAX_DSS_FUSE_REGS)];
-typedef unsigned long xe_eu_mask_t[BITS_TO_LONGS(32 * XE_MAX_EU_FUSE_REGS)];
+typedef unsigned long xe_dss_mask_t[BITS_TO_LONGS(XE_MAX_DSS_FUSE_BITS)];
+typedef unsigned long xe_eu_mask_t[BITS_TO_LONGS(XE_MAX_EU_FUSE_BITS)];
+typedef unsigned long xe_l3_bank_mask_t[BITS_TO_LONGS(XE_MAX_L3_BANK_MASK_BITS)];
 
 struct xe_mmio_range {
 	u32 start;
@@ -103,20 +108,22 @@ struct xe_gt {
 
 	/** @info: GT info */
 	struct {
-		/** @type: type of GT */
+		/** @info.type: type of GT */
 		enum xe_gt_type type;
-		/** @id: Unique ID of this GT within the PCI Device */
+		/** @info.id: Unique ID of this GT within the PCI Device */
 		u8 id;
-		/** @reference_clock: clock frequency */
+		/** @info.reference_clock: clock frequency */
 		u32 reference_clock;
-		/** @engine_mask: mask of engines present on GT */
+		/** @info.engine_mask: mask of engines present on GT */
 		u64 engine_mask;
 		/**
-		 * @__engine_mask: mask of engines present on GT read from
+		 * @info.__engine_mask: mask of engines present on GT read from
 		 * xe_pci.c, used to fake reading the engine_mask from the
 		 * hwconfig blob.
 		 */
 		u64 __engine_mask;
+		/** @info.gmdid: raw GMD_ID value from hardware */
+		u32 gmdid;
 	} info;
 
 	/**
@@ -125,17 +132,23 @@ struct xe_gt {
 	 * specific offset, as well as their own forcewake handling.
 	 */
 	struct {
-		/** @fw: force wake for GT */
+		/** @mmio.fw: force wake for GT */
 		struct xe_force_wake fw;
 		/**
-		 * @adj_limit: adjust MMIO address if address is below this
+		 * @mmio.adj_limit: adjust MMIO address if address is below this
 		 * value
 		 */
 		u32 adj_limit;
-		/** @adj_offset: offect to add to MMIO address when adjusting */
+		/** @mmio.adj_offset: offect to add to MMIO address when adjusting */
 		u32 adj_offset;
 	} mmio;
 
+	/** @sriov: virtualization data related to GT */
+	union {
+		/** @sriov.pf: PF data. Valid only if driver is running as PF */
+		struct xe_gt_sriov_pf pf;
+	} sriov;
+
 	/**
 	 * @reg_sr: table with registers to be restored on GT init/resume/reset
 	 */
@@ -144,7 +157,7 @@ struct xe_gt {
 	/** @reset: state for GT resets */
 	struct {
 		/**
-		 * @worker: work so GT resets can done async allowing to reset
+		 * @reset.worker: work so GT resets can done async allowing to reset
 		 * code to safely flush all code paths
 		 */
 		struct work_struct worker;
@@ -152,36 +165,30 @@ struct xe_gt {
 
 	/** @tlb_invalidation: TLB invalidation state */
 	struct {
-		/** @seqno: TLB invalidation seqno, protected by CT lock */
+		/** @tlb_invalidation.seqno: TLB invalidation seqno, protected by CT lock */
 #define TLB_INVALIDATION_SEQNO_MAX	0x100000
 		int seqno;
 		/**
-		 * @seqno_recv: last received TLB invalidation seqno, protected by CT lock
+		 * @tlb_invalidation.seqno_recv: last received TLB invalidation seqno,
+		 * protected by CT lock
 		 */
 		int seqno_recv;
 		/**
-		 * @pending_fences: list of pending fences waiting TLB
+		 * @tlb_invalidation.pending_fences: list of pending fences waiting TLB
 		 * invaliations, protected by CT lock
 		 */
 		struct list_head pending_fences;
 		/**
-		 * @pending_lock: protects @pending_fences and updating
-		 * @seqno_recv.
+		 * @tlb_invalidation.pending_lock: protects @tlb_invalidation.pending_fences
+		 * and updating @tlb_invalidation.seqno_recv.
 		 */
 		spinlock_t pending_lock;
 		/**
-		 * @fence_tdr: schedules a delayed call to
+		 * @tlb_invalidation.fence_tdr: schedules a delayed call to
 		 * xe_gt_tlb_fence_timeout after the timeut interval is over.
 		 */
 		struct delayed_work fence_tdr;
-		/** @fence_context: context for TLB invalidation fences */
-		u64 fence_context;
-		/**
-		 * @fence_seqno: seqno to TLB invalidation fences, protected by
-		 * tlb_invalidation.lock
-		 */
-		u32 fence_seqno;
-		/** @lock: protects TLB invalidation fences */
+		/** @tlb_invalidation.lock: protects TLB invalidation fences */
 		spinlock_t lock;
 	} tlb_invalidation;
 
@@ -196,7 +203,7 @@ struct xe_gt {
 	/** @usm: unified shared memory state */
 	struct {
 		/**
-		 * @bb_pool: Pool from which batchbuffers, for USM operations
+		 * @usm.bb_pool: Pool from which batchbuffers, for USM operations
 		 * (e.g. migrations, fixing page tables), are allocated.
 		 * Dedicated pool needed so USM operations to not get blocked
 		 * behind any user operations which may have resulted in a
@@ -204,66 +211,67 @@ struct xe_gt {
 		 */
 		struct xe_sa_manager *bb_pool;
 		/**
-		 * @reserved_bcs_instance: reserved BCS instance used for USM
+		 * @usm.reserved_bcs_instance: reserved BCS instance used for USM
 		 * operations (e.g. mmigrations, fixing page tables)
 		 */
 		u16 reserved_bcs_instance;
-		/** @pf_wq: page fault work queue, unbound, high priority */
+		/** @usm.pf_wq: page fault work queue, unbound, high priority */
 		struct workqueue_struct *pf_wq;
-		/** @acc_wq: access counter work queue, unbound, high priority */
+		/** @usm.acc_wq: access counter work queue, unbound, high priority */
 		struct workqueue_struct *acc_wq;
 		/**
-		 * @pf_queue: Page fault queue used to sync faults so faults can
+		 * @usm.pf_queue: Page fault queue used to sync faults so faults can
 		 * be processed not under the GuC CT lock. The queue is sized so
 		 * it can sync all possible faults (1 per physical engine).
 		 * Multiple queues exists for page faults from different VMs are
 		 * be processed in parallel.
 		 */
 		struct pf_queue {
-			/** @gt: back pointer to GT */
+			/** @usm.pf_queue.gt: back pointer to GT */
 			struct xe_gt *gt;
 #define PF_QUEUE_NUM_DW	128
-			/** @data: data in the page fault queue */
+			/** @usm.pf_queue.data: data in the page fault queue */
 			u32 data[PF_QUEUE_NUM_DW];
 			/**
-			 * @head: head pointer in DWs for page fault queue,
-			 * moved by worker which processes faults.
+			 * @usm.pf_queue.tail: tail pointer in DWs for page fault queue,
+			 * moved by worker which processes faults (consumer).
 			 */
-			u16 head;
+			u16 tail;
 			/**
-			 * @tail: tail pointer in DWs for page fault queue,
-			 * moved by G2H handler.
+			 * @usm.pf_queue.head: head pointer in DWs for page fault queue,
+			 * moved by G2H handler (producer).
 			 */
-			u16 tail;
-			/** @lock: protects page fault queue */
+			u16 head;
+			/** @usm.pf_queue.lock: protects page fault queue */
 			spinlock_t lock;
-			/** @worker: to process page faults */
+			/** @usm.pf_queue.worker: to process page faults */
 			struct work_struct worker;
 #define NUM_PF_QUEUE	4
 		} pf_queue[NUM_PF_QUEUE];
 		/**
-		 * @acc_queue: Same as page fault queue, cannot process access
+		 * @usm.acc_queue: Same as page fault queue, cannot process access
 		 * counters under CT lock.
 		 */
 		struct acc_queue {
-			/** @gt: back pointer to GT */
+			/** @usm.acc_queue.gt: back pointer to GT */
 			struct xe_gt *gt;
 #define ACC_QUEUE_NUM_DW	128
-			/** @data: data in the page fault queue */
+			/** @usm.acc_queue.data: data in the page fault queue */
 			u32 data[ACC_QUEUE_NUM_DW];
 			/**
-			 * @head: head pointer in DWs for page fault queue,
-			 * moved by worker which processes faults.
+			 * @usm.acc_queue.tail: tail pointer in DWs for access counter queue,
+			 * moved by worker which processes counters
+			 * (consumer).
 			 */
-			u16 head;
+			u16 tail;
 			/**
-			 * @tail: tail pointer in DWs for page fault queue,
-			 * moved by G2H handler.
+			 * @usm.acc_queue.head: head pointer in DWs for access counter queue,
+			 * moved by G2H handler (producer).
 			 */
-			u16 tail;
-			/** @lock: protects page fault queue */
+			u16 head;
+			/** @usm.acc_queue.lock: protects page fault queue */
 			spinlock_t lock;
-			/** @worker: to process access counters */
+			/** @usm.acc_queue.worker: to process access counters */
 			struct work_struct worker;
 #define NUM_ACC_QUEUE	4
 		} acc_queue[NUM_ACC_QUEUE];
@@ -300,7 +308,7 @@ struct xe_gt {
 
 	/** @pcode: GT's PCODE */
 	struct {
-		/** @lock: protecting GT's PCODE mailbox data */
+		/** @pcode.lock: protecting GT's PCODE mailbox data */
 		struct mutex lock;
 	} pcode;
 
@@ -312,32 +320,35 @@ struct xe_gt {
 
 	/** @mocs: info */
 	struct {
-		/** @uc_index: UC index */
+		/** @mocs.uc_index: UC index */
 		u8 uc_index;
-		/** @wb_index: WB index, only used on L3_CCS platforms */
+		/** @mocs.wb_index: WB index, only used on L3_CCS platforms */
 		u8 wb_index;
 	} mocs;
 
 	/** @fuse_topo: GT topology reported by fuse registers */
 	struct {
-		/** @g_dss_mask: dual-subslices usable by geometry */
+		/** @fuse_topo.g_dss_mask: dual-subslices usable by geometry */
 		xe_dss_mask_t g_dss_mask;
 
-		/** @c_dss_mask: dual-subslices usable by compute */
+		/** @fuse_topo.c_dss_mask: dual-subslices usable by compute */
 		xe_dss_mask_t c_dss_mask;
 
-		/** @eu_mask_per_dss: EU mask per DSS*/
+		/** @fuse_topo.eu_mask_per_dss: EU mask per DSS*/
 		xe_eu_mask_t eu_mask_per_dss;
+
+		/** @fuse_topo.l3_bank_mask: L3 bank mask */
+		xe_l3_bank_mask_t l3_bank_mask;
 	} fuse_topo;
 
 	/** @steering: register steering for individual HW units */
 	struct {
-		/* @ranges: register ranges used for this steering type */
+		/** @steering.ranges: register ranges used for this steering type */
 		const struct xe_mmio_range *ranges;
 
-		/** @group_target: target to steer accesses to */
+		/** @steering.group_target: target to steer accesses to */
 		u16 group_target;
-		/** @instance_target: instance to steer accesses to */
+		/** @steering.instance_target: instance to steer accesses to */
 		u16 instance_target;
 	} steering[NUM_STEERING_TYPES];
 
@@ -349,13 +360,13 @@ struct xe_gt {
 
 	/** @wa_active: keep track of active workarounds */
 	struct {
-		/** @gt: bitmap with active GT workarounds */
+		/** @wa_active.gt: bitmap with active GT workarounds */
 		unsigned long *gt;
-		/** @engine: bitmap with active engine workarounds */
+		/** @wa_active.engine: bitmap with active engine workarounds */
 		unsigned long *engine;
-		/** @lrc: bitmap with active LRC workarounds */
+		/** @wa_active.lrc: bitmap with active LRC workarounds */
 		unsigned long *lrc;
-		/** @oob: bitmap with active OOB workaroudns */
+		/** @wa_active.oob: bitmap with active OOB workaroudns */
 		unsigned long *oob;
 	} wa_active;
 };
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 0a61390c64..5faca4fc2f 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -7,36 +7,41 @@
 
 #include <drm/drm_managed.h>
 
+#include <generated/xe_wa_oob.h>
+
 #include "abi/guc_actions_abi.h"
 #include "abi/guc_errors_abi.h"
-#include "generated/xe_wa_oob.h"
 #include "regs/xe_gt_regs.h"
+#include "regs/xe_gtt_defs.h"
 #include "regs/xe_guc_regs.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_force_wake.h"
 #include "xe_gt.h"
+#include "xe_gt_printk.h"
 #include "xe_guc_ads.h"
 #include "xe_guc_ct.h"
 #include "xe_guc_hwconfig.h"
 #include "xe_guc_log.h"
 #include "xe_guc_pc.h"
+#include "xe_guc_relay.h"
 #include "xe_guc_submit.h"
+#include "xe_memirq.h"
 #include "xe_mmio.h"
 #include "xe_platform_types.h"
+#include "xe_sriov.h"
 #include "xe_uc.h"
 #include "xe_uc_fw.h"
 #include "xe_wa.h"
 #include "xe_wopcm.h"
 
-/* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */
-#define GUC_GGTT_TOP    0xFEE00000
 static u32 guc_bo_ggtt_addr(struct xe_guc *guc,
 			    struct xe_bo *bo)
 {
 	struct xe_device *xe = guc_to_xe(guc);
 	u32 addr = xe_bo_ggtt_addr(bo);
 
+	/* GuC addresses above GUC_GGTT_TOP don't map through the GTT */
 	xe_assert(xe, addr >= xe_wopcm_size(guc_to_xe(guc)));
 	xe_assert(xe, addr < GUC_GGTT_TOP);
 	xe_assert(xe, bo->size <= GUC_GGTT_TOP - addr);
@@ -138,13 +143,10 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc)
 	if (XE_WA(gt, 22012773006))
 		flags |= GUC_WA_POLLCS;
 
-	if (XE_WA(gt, 16011759253))
-		flags |= GUC_WA_GAM_CREDITS;
-
 	if (XE_WA(gt, 14014475959))
 		flags |= GUC_WA_HOLD_CCS_SWITCHOUT;
 
-	if (XE_WA(gt, 22011391025) || XE_WA(gt, 14012197797))
+	if (XE_WA(gt, 22011391025))
 		flags |= GUC_WA_DUAL_QUEUE;
 
 	/*
@@ -155,19 +157,19 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc)
 	if (GRAPHICS_VERx100(xe) < 1270)
 		flags |= GUC_WA_PRE_PARSER;
 
-	if (XE_WA(gt, 16011777198))
-		flags |= GUC_WA_RCS_RESET_BEFORE_RC6;
-
 	if (XE_WA(gt, 22012727170) || XE_WA(gt, 22012727685))
 		flags |= GUC_WA_CONTEXT_ISOLATION;
 
-	if ((XE_WA(gt, 16015675438) || XE_WA(gt, 18020744125)) &&
+	if (XE_WA(gt, 18020744125) &&
 	    !xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_RENDER))
 		flags |= GUC_WA_RCS_REGS_IN_CCS_REGS_LIST;
 
 	if (XE_WA(gt, 1509372804))
 		flags |= GUC_WA_RENDER_RST_RC6_EXIT;
 
+	if (XE_WA(gt, 14018913170))
+		flags |= GUC_WA_ENABLE_TSC_CHECK_ON_RC6;
+
 	return flags;
 }
 
@@ -178,15 +180,23 @@ static u32 guc_ctl_devid(struct xe_guc *guc)
 	return (((u32)xe->info.devid) << 16) | xe->info.revid;
 }
 
-static void guc_init_params(struct xe_guc *guc)
+static void guc_print_params(struct xe_guc *guc)
 {
-	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_gt *gt = guc_to_gt(guc);
 	u32 *params = guc->params;
 	int i;
 
 	BUILD_BUG_ON(sizeof(guc->params) != GUC_CTL_MAX_DWORDS * sizeof(u32));
 	BUILD_BUG_ON(GUC_CTL_MAX_DWORDS + 2 != SOFT_SCRATCH_COUNT);
 
+	for (i = 0; i < GUC_CTL_MAX_DWORDS; i++)
+		xe_gt_dbg(gt, "GuC param[%2d] = 0x%08x\n", i, params[i]);
+}
+
+static void guc_init_params(struct xe_guc *guc)
+{
+	u32 *params = guc->params;
+
 	params[GUC_CTL_LOG_PARAMS] = guc_ctl_log_params_flags(guc);
 	params[GUC_CTL_FEATURE] = 0;
 	params[GUC_CTL_DEBUG] = guc_ctl_debug_flags(guc);
@@ -194,18 +204,12 @@ static void guc_init_params(struct xe_guc *guc)
 	params[GUC_CTL_WA] = 0;
 	params[GUC_CTL_DEVID] = guc_ctl_devid(guc);
 
-	for (i = 0; i < GUC_CTL_MAX_DWORDS; i++)
-		drm_dbg(&xe->drm, "GuC param[%2d] = 0x%08x\n", i, params[i]);
+	guc_print_params(guc);
 }
 
 static void guc_init_params_post_hwconfig(struct xe_guc *guc)
 {
-	struct xe_device *xe = guc_to_xe(guc);
 	u32 *params = guc->params;
-	int i;
-
-	BUILD_BUG_ON(sizeof(guc->params) != GUC_CTL_MAX_DWORDS * sizeof(u32));
-	BUILD_BUG_ON(GUC_CTL_MAX_DWORDS + 2 != SOFT_SCRATCH_COUNT);
 
 	params[GUC_CTL_LOG_PARAMS] = guc_ctl_log_params_flags(guc);
 	params[GUC_CTL_FEATURE] = guc_ctl_feature_flags(guc);
@@ -214,8 +218,7 @@ static void guc_init_params_post_hwconfig(struct xe_guc *guc)
 	params[GUC_CTL_WA] = guc_ctl_wa_flags(guc);
 	params[GUC_CTL_DEVID] = guc_ctl_devid(guc);
 
-	for (i = 0; i < GUC_CTL_MAX_DWORDS; i++)
-		drm_dbg(&xe->drm, "GuC param[%2d] = 0x%08x\n", i, params[i]);
+	guc_print_params(guc);
 }
 
 /*
@@ -239,11 +242,55 @@ static void guc_write_params(struct xe_guc *guc)
 static void guc_fini(struct drm_device *drm, void *arg)
 {
 	struct xe_guc *guc = arg;
+	struct xe_gt *gt = guc_to_gt(guc);
 
-	xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
-	xe_guc_pc_fini(&guc->pc);
+	xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL));
 	xe_uc_fini_hw(&guc_to_gt(guc)->uc);
-	xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
+	xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+}
+
+/**
+ * xe_guc_comm_init_early - early initialization of GuC communication
+ * @guc: the &xe_guc to initialize
+ *
+ * Must be called prior to first MMIO communication with GuC firmware.
+ */
+void xe_guc_comm_init_early(struct xe_guc *guc)
+{
+	struct xe_gt *gt = guc_to_gt(guc);
+
+	if (xe_gt_is_media_type(gt))
+		guc->notify_reg = MED_GUC_HOST_INTERRUPT;
+	else
+		guc->notify_reg = GUC_HOST_INTERRUPT;
+}
+
+static int xe_guc_realloc_post_hwconfig(struct xe_guc *guc)
+{
+	struct xe_tile *tile = gt_to_tile(guc_to_gt(guc));
+	struct xe_device *xe = guc_to_xe(guc);
+	int ret;
+
+	if (!IS_DGFX(guc_to_xe(guc)))
+		return 0;
+
+	ret = xe_managed_bo_reinit_in_vram(xe, tile, &guc->fw.bo);
+	if (ret)
+		return ret;
+
+	ret = xe_managed_bo_reinit_in_vram(xe, tile, &guc->log.bo);
+	if (ret)
+		return ret;
+
+	ret = xe_managed_bo_reinit_in_vram(xe, tile, &guc->ads.bo);
+	if (ret)
+		return ret;
+
+	ret = xe_managed_bo_reinit_in_vram(xe, tile, &guc->ct.bo);
+	if (ret)
+		return ret;
+
+	return 0;
 }
 
 int xe_guc_init(struct xe_guc *guc)
@@ -272,27 +319,24 @@ int xe_guc_init(struct xe_guc *guc)
 	if (ret)
 		goto out;
 
-	ret = xe_guc_pc_init(&guc->pc);
+	ret = xe_guc_relay_init(&guc->relay);
 	if (ret)
 		goto out;
 
-	ret = drmm_add_action_or_reset(&gt_to_xe(gt)->drm, guc_fini, guc);
+	ret = drmm_add_action_or_reset(&xe->drm, guc_fini, guc);
 	if (ret)
 		goto out;
 
 	guc_init_params(guc);
 
-	if (xe_gt_is_media_type(gt))
-		guc->notify_reg = MED_GUC_HOST_INTERRUPT;
-	else
-		guc->notify_reg = GUC_HOST_INTERRUPT;
+	xe_guc_comm_init_early(guc);
 
 	xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE);
 
 	return 0;
 
 out:
-	drm_err(&xe->drm, "GuC init failed with %d", ret);
+	xe_gt_err(gt, "GuC init failed with %pe\n", ERR_PTR(ret));
 	return ret;
 }
 
@@ -304,8 +348,18 @@ out:
  */
 int xe_guc_init_post_hwconfig(struct xe_guc *guc)
 {
+	int ret;
+
+	ret = xe_guc_realloc_post_hwconfig(guc);
+	if (ret)
+		return ret;
+
 	guc_init_params_post_hwconfig(guc);
 
+	ret = xe_guc_pc_init(&guc->pc);
+	if (ret)
+		return ret;
+
 	return xe_guc_ads_init_post_hwconfig(&guc->ads);
 }
 
@@ -319,7 +373,6 @@ int xe_guc_post_load_init(struct xe_guc *guc)
 
 int xe_guc_reset(struct xe_guc *guc)
 {
-	struct xe_device *xe = guc_to_xe(guc);
 	struct xe_gt *gt = guc_to_gt(guc);
 	u32 guc_status, gdrst;
 	int ret;
@@ -330,16 +383,14 @@ int xe_guc_reset(struct xe_guc *guc)
 
 	ret = xe_mmio_wait32(gt, GDRST, GRDOM_GUC, 0, 5000, &gdrst, false);
 	if (ret) {
-		drm_err(&xe->drm, "GuC reset timed out, GDRST=0x%8x\n",
-			gdrst);
+		xe_gt_err(gt, "GuC reset timed out, GDRST=%#x\n", gdrst);
 		goto err_out;
 	}
 
 	guc_status = xe_mmio_read32(gt, GUC_STATUS);
 	if (!(guc_status & GS_MIA_IN_RESET)) {
-		drm_err(&xe->drm,
-			"GuC status: 0x%x, MIA core expected to be in reset\n",
-			guc_status);
+		xe_gt_err(gt, "GuC status: %#x, MIA core expected to be in reset\n",
+			  guc_status);
 		ret = -EIO;
 		goto err_out;
 	}
@@ -402,7 +453,7 @@ static int guc_xfer_rsa(struct xe_guc *guc)
 
 static int guc_wait_ucode(struct xe_guc *guc)
 {
-	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_gt *gt = guc_to_gt(guc);
 	u32 status;
 	int ret;
 
@@ -423,38 +474,32 @@ static int guc_wait_ucode(struct xe_guc *guc)
 	 * 200ms. Even at slowest clock, this should be sufficient. And
 	 * in the working case, a larger timeout makes no difference.
 	 */
-	ret = xe_mmio_wait32(guc_to_gt(guc), GUC_STATUS, GS_UKERNEL_MASK,
+	ret = xe_mmio_wait32(gt, GUC_STATUS, GS_UKERNEL_MASK,
 			     FIELD_PREP(GS_UKERNEL_MASK, XE_GUC_LOAD_STATUS_READY),
 			     200000, &status, false);
 
 	if (ret) {
-		struct drm_device *drm = &xe->drm;
-		struct drm_printer p = drm_info_printer(drm->dev);
-
-		drm_info(drm, "GuC load failed: status = 0x%08X\n", status);
-		drm_info(drm, "GuC load failed: status: Reset = %d, BootROM = 0x%02X, UKernel = 0x%02X, MIA = 0x%02X, Auth = 0x%02X\n",
-			 REG_FIELD_GET(GS_MIA_IN_RESET, status),
-			 REG_FIELD_GET(GS_BOOTROM_MASK, status),
-			 REG_FIELD_GET(GS_UKERNEL_MASK, status),
-			 REG_FIELD_GET(GS_MIA_MASK, status),
-			 REG_FIELD_GET(GS_AUTH_STATUS_MASK, status));
+		xe_gt_info(gt, "GuC load failed: status = 0x%08X\n", status);
+		xe_gt_info(gt, "GuC status: Reset = %u, BootROM = %#X, UKernel = %#X, MIA = %#X, Auth = %#X\n",
+			   REG_FIELD_GET(GS_MIA_IN_RESET, status),
+			   REG_FIELD_GET(GS_BOOTROM_MASK, status),
+			   REG_FIELD_GET(GS_UKERNEL_MASK, status),
+			   REG_FIELD_GET(GS_MIA_MASK, status),
+			   REG_FIELD_GET(GS_AUTH_STATUS_MASK, status));
 
 		if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) {
-			drm_info(drm, "GuC firmware signature verification failed\n");
+			xe_gt_info(gt, "GuC firmware signature verification failed\n");
 			ret = -ENOEXEC;
 		}
 
 		if (REG_FIELD_GET(GS_UKERNEL_MASK, status) ==
 		    XE_GUC_LOAD_STATUS_EXCEPTION) {
-			drm_info(drm, "GuC firmware exception. EIP: %#x\n",
-				 xe_mmio_read32(guc_to_gt(guc),
-						SOFT_SCRATCH(13)));
+			xe_gt_info(gt, "GuC firmware exception. EIP: %#x\n",
+				   xe_mmio_read32(gt, SOFT_SCRATCH(13)));
 			ret = -ENXIO;
 		}
-
-		xe_guc_log_print(&guc->log, &p);
 	} else {
-		drm_dbg(&xe->drm, "GuC successfully loaded");
+		xe_gt_dbg(gt, "GuC successfully loaded\n");
 	}
 
 	return ret;
@@ -516,6 +561,9 @@ int xe_guc_min_load_for_hwconfig(struct xe_guc *guc)
 
 	xe_guc_ads_populate_minimal(&guc->ads);
 
+	/* Raise GT freq to speed up HuC/GuC load */
+	xe_guc_pc_init_early(&guc->pc);
+
 	ret = __xe_guc_upload(guc);
 	if (ret)
 		return ret;
@@ -543,6 +591,9 @@ static void guc_handle_mmio_msg(struct xe_guc *guc)
 	struct xe_gt *gt = guc_to_gt(guc);
 	u32 msg;
 
+	if (IS_SRIOV_VF(guc_to_xe(guc)))
+		return;
+
 	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
 
 	msg = xe_mmio_read32(gt, SOFT_SCRATCH(15));
@@ -551,12 +602,10 @@ static void guc_handle_mmio_msg(struct xe_guc *guc)
 	xe_mmio_write32(gt, SOFT_SCRATCH(15), 0);
 
 	if (msg & XE_GUC_RECV_MSG_CRASH_DUMP_POSTED)
-		drm_err(&guc_to_xe(guc)->drm,
-			"Received early GuC crash dump notification!\n");
+		xe_gt_err(gt, "Received early GuC crash dump notification!\n");
 
 	if (msg & XE_GUC_RECV_MSG_EXCEPTION)
-		drm_err(&guc_to_xe(guc)->drm,
-			"Received early GuC exception notification!\n");
+		xe_gt_err(gt, "Received early GuC exception notification!\n");
 }
 
 static void guc_enable_irq(struct xe_guc *guc)
@@ -579,9 +628,19 @@ static void guc_enable_irq(struct xe_guc *guc)
 
 int xe_guc_enable_communication(struct xe_guc *guc)
 {
+	struct xe_device *xe = guc_to_xe(guc);
 	int err;
 
-	guc_enable_irq(guc);
+	if (IS_SRIOV_VF(xe) && xe_device_has_memirq(xe)) {
+		struct xe_gt *gt = guc_to_gt(guc);
+		struct xe_tile *tile = gt_to_tile(gt);
+
+		err = xe_memirq_init_guc(&tile->sriov.vf.memirq, guc);
+		if (err)
+			return err;
+	} else {
+		guc_enable_irq(guc);
+	}
 
 	xe_mmio_rmw32(guc_to_gt(guc), PMINTRMSK,
 		      ARAT_EXPIRED_INTRMSK, 0);
@@ -597,15 +656,15 @@ int xe_guc_enable_communication(struct xe_guc *guc)
 
 int xe_guc_suspend(struct xe_guc *guc)
 {
-	int ret;
+	struct xe_gt *gt = guc_to_gt(guc);
 	u32 action[] = {
 		XE_GUC_ACTION_CLIENT_SOFT_RESET,
 	};
+	int ret;
 
 	ret = xe_guc_mmio_send(guc, action, ARRAY_SIZE(action));
 	if (ret) {
-		drm_err(&guc_to_xe(guc)->drm,
-			"GuC suspend: CLIENT_SOFT_RESET fail: %d!\n", ret);
+		xe_gt_err(gt, "GuC suspend failed: %pe\n", ERR_PTR(ret));
 		return ret;
 	}
 
@@ -650,7 +709,7 @@ int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request,
 
 	BUILD_BUG_ON(VF_SW_FLAG_COUNT != MED_VF_SW_FLAG_COUNT);
 
-	xe_assert(xe, !guc->ct.enabled);
+	xe_assert(xe, !xe_guc_ct_enabled(&guc->ct));
 	xe_assert(xe, len);
 	xe_assert(xe, len <= VF_SW_FLAG_COUNT);
 	xe_assert(xe, len <= MED_VF_SW_FLAG_COUNT);
@@ -680,8 +739,8 @@ retry:
 			     50000, &reply, false);
 	if (ret) {
 timeout:
-		drm_err(&xe->drm, "mmio request %#x: no reply %#x\n",
-			request[0], reply);
+		xe_gt_err(gt, "GuC mmio request %#x: no reply %#x\n",
+			  request[0], reply);
 		return ret;
 	}
 
@@ -707,16 +766,20 @@ timeout:
 		if (unlikely(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) !=
 			     GUC_HXG_ORIGIN_GUC))
 			goto proto;
-		if (unlikely(ret))
+		if (unlikely(ret)) {
+			if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) !=
+			    GUC_HXG_TYPE_NO_RESPONSE_BUSY)
+				goto proto;
 			goto timeout;
+		}
 	}
 
 	if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) ==
 	    GUC_HXG_TYPE_NO_RESPONSE_RETRY) {
 		u32 reason = FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, header);
 
-		drm_dbg(&xe->drm, "mmio request %#x: retrying, reason %#x\n",
-			request[0], reason);
+		xe_gt_dbg(gt, "GuC mmio request %#x: retrying, reason %#x\n",
+			  request[0], reason);
 		goto retry;
 	}
 
@@ -725,16 +788,16 @@ timeout:
 		u32 hint = FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, header);
 		u32 error = FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, header);
 
-		drm_err(&xe->drm, "mmio request %#x: failure %#x/%#x\n",
-			request[0], error, hint);
+		xe_gt_err(gt, "GuC mmio request %#x: failure %#x hint %#x\n",
+			  request[0], error, hint);
 		return -ENXIO;
 	}
 
 	if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) !=
 	    GUC_HXG_TYPE_RESPONSE_SUCCESS) {
 proto:
-		drm_err(&xe->drm, "mmio request %#x: unexpected reply %#x\n",
-			request[0], header);
+		xe_gt_err(gt, "GuC mmio request %#x: unexpected reply %#x\n",
+			  request[0], header);
 		return -EPROTO;
 	}
 
@@ -832,7 +895,7 @@ int xe_guc_stop(struct xe_guc *guc)
 {
 	int ret;
 
-	xe_guc_ct_disable(&guc->ct);
+	xe_guc_ct_stop(&guc->ct);
 
 	ret = xe_guc_submit_stop(guc);
 	if (ret)
diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h
index d3e49e7fd7..94f2dc5f6f 100644
--- a/drivers/gpu/drm/xe/xe_guc.h
+++ b/drivers/gpu/drm/xe/xe_guc.h
@@ -13,6 +13,7 @@
 
 struct drm_printer;
 
+void xe_guc_comm_init_early(struct xe_guc *guc);
 int xe_guc_init(struct xe_guc *guc);
 int xe_guc_init_post_hwconfig(struct xe_guc *guc);
 int xe_guc_post_load_init(struct xe_guc *guc);
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index 390e6f1bf4..7f5a523795 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -7,6 +7,8 @@
 
 #include <drm/drm_managed.h>
 
+#include <generated/xe_wa_oob.h>
+
 #include "regs/xe_engine_regs.h"
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_guc_regs.h"
@@ -19,6 +21,7 @@
 #include "xe_map.h"
 #include "xe_mmio.h"
 #include "xe_platform_types.h"
+#include "xe_wa.h"
 
 /* Slack of a few additional entries per engine */
 #define ADS_REGSET_EXTRA_MAX	8
@@ -80,6 +83,10 @@ ads_to_map(struct xe_guc_ads *ads)
  *      +---------------------------------------+
  *      | padding                               |
  *      +---------------------------------------+ <== 4K aligned
+ *      | w/a KLVs                              |
+ *      +---------------------------------------+
+ *      | padding                               |
+ *      +---------------------------------------+ <== 4K aligned
  *      | capture lists                         |
  *      +---------------------------------------+
  *      | padding                               |
@@ -100,7 +107,7 @@ struct __guc_ads_blob {
 	struct guc_engine_usage engine_usage;
 	struct guc_um_init_params um_init_params;
 	/* From here on, location is dynamic! Refer to above diagram. */
-	struct guc_mmio_reg regset[0];
+	struct guc_mmio_reg regset[];
 } __packed;
 
 #define ads_blob_read(ads_, field_) \
@@ -131,6 +138,11 @@ static size_t guc_ads_golden_lrc_size(struct xe_guc_ads *ads)
 	return PAGE_ALIGN(ads->golden_lrc_size);
 }
 
+static u32 guc_ads_waklv_size(struct xe_guc_ads *ads)
+{
+	return PAGE_ALIGN(ads->ads_waklv_size);
+}
+
 static size_t guc_ads_capture_size(struct xe_guc_ads *ads)
 {
 	/* FIXME: Allocate a proper capture list */
@@ -167,12 +179,22 @@ static size_t guc_ads_golden_lrc_offset(struct xe_guc_ads *ads)
 	return PAGE_ALIGN(offset);
 }
 
+static size_t guc_ads_waklv_offset(struct xe_guc_ads *ads)
+{
+	u32 offset;
+
+	offset = guc_ads_golden_lrc_offset(ads) +
+		 guc_ads_golden_lrc_size(ads);
+
+	return PAGE_ALIGN(offset);
+}
+
 static size_t guc_ads_capture_offset(struct xe_guc_ads *ads)
 {
 	size_t offset;
 
-	offset = guc_ads_golden_lrc_offset(ads) +
-		guc_ads_golden_lrc_size(ads);
+	offset = guc_ads_waklv_offset(ads) +
+		 guc_ads_waklv_size(ads);
 
 	return PAGE_ALIGN(offset);
 }
@@ -260,6 +282,110 @@ static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads)
 	return total_size;
 }
 
+static void guc_waklv_enable_one_word(struct xe_guc_ads *ads,
+				      enum xe_guc_klv_ids klv_id,
+				      u32 value,
+				      u32 *offset, u32 *remain)
+{
+	u32 size;
+	u32 klv_entry[] = {
+		/* 16:16 key/length */
+		FIELD_PREP(GUC_KLV_0_KEY, klv_id) |
+		FIELD_PREP(GUC_KLV_0_LEN, 1),
+		value,
+		/* 1 dword data */
+	};
+
+	size = sizeof(klv_entry);
+
+	if (*remain < size) {
+		drm_warn(&ads_to_xe(ads)->drm,
+			 "w/a klv buffer too small to add klv id %d\n", klv_id);
+	} else {
+		xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), *offset,
+				 klv_entry, size);
+		*offset += size;
+		*remain -= size;
+	}
+}
+
+static void guc_waklv_enable_simple(struct xe_guc_ads *ads,
+				    enum xe_guc_klv_ids klv_id, u32 *offset, u32 *remain)
+{
+	u32 klv_entry[] = {
+		/* 16:16 key/length */
+		FIELD_PREP(GUC_KLV_0_KEY, klv_id) |
+		FIELD_PREP(GUC_KLV_0_LEN, 0),
+		/* 0 dwords data */
+	};
+	u32 size;
+
+	size = sizeof(klv_entry);
+
+	if (xe_gt_WARN(ads_to_gt(ads), *remain < size,
+		       "w/a klv buffer too small to add klv id %d\n", klv_id))
+		return;
+
+	xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), *offset,
+			 klv_entry, size);
+	*offset += size;
+	*remain -= size;
+}
+
+static void guc_waklv_init(struct xe_guc_ads *ads)
+{
+	struct xe_gt *gt = ads_to_gt(ads);
+	u64 addr_ggtt;
+	u32 offset, remain, size;
+
+	offset = guc_ads_waklv_offset(ads);
+	remain = guc_ads_waklv_size(ads);
+
+	if (XE_WA(gt, 14019882105))
+		guc_waklv_enable_simple(ads,
+					GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED,
+					&offset, &remain);
+	if (XE_WA(gt, 18024947630))
+		guc_waklv_enable_simple(ads,
+					GUC_WORKAROUND_KLV_ID_GAM_PFQ_SHADOW_TAIL_POLLING,
+					&offset, &remain);
+	if (XE_WA(gt, 16022287689))
+		guc_waklv_enable_simple(ads,
+					GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE,
+					&offset, &remain);
+
+	/*
+	 * On RC6 exit, GuC will write register 0xB04 with the default value provided. As of now,
+	 * the default value for this register is determined to be 0xC40. This could change in the
+	 * future, so GuC depends on KMD to send it the correct value.
+	 */
+	if (XE_WA(gt, 13011645652))
+		guc_waklv_enable_one_word(ads,
+					  GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE,
+					  0xC40,
+					  &offset, &remain);
+
+	size = guc_ads_waklv_size(ads) - remain;
+	if (!size)
+		return;
+
+	offset = guc_ads_waklv_offset(ads);
+	addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset;
+
+	ads_blob_write(ads, ads.wa_klv_addr_lo, lower_32_bits(addr_ggtt));
+	ads_blob_write(ads, ads.wa_klv_addr_hi, upper_32_bits(addr_ggtt));
+	ads_blob_write(ads, ads.wa_klv_size, size);
+}
+
+static int calculate_waklv_size(struct xe_guc_ads *ads)
+{
+	/*
+	 * A single page is both the minimum size possible and
+	 * is sufficiently large enough for all current platforms.
+	 */
+	return SZ_4K;
+}
+
 #define MAX_GOLDEN_LRC_SIZE	(SZ_4K * 64)
 
 int xe_guc_ads_init(struct xe_guc_ads *ads)
@@ -271,10 +397,12 @@ int xe_guc_ads_init(struct xe_guc_ads *ads)
 
 	ads->golden_lrc_size = calculate_golden_lrc_size(ads);
 	ads->regset_size = calculate_regset_size(gt);
+	ads->ads_waklv_size = calculate_waklv_size(ads);
 
 	bo = xe_managed_bo_create_pin_map(xe, tile, guc_ads_size(ads) + MAX_GOLDEN_LRC_SIZE,
-					  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
-					  XE_BO_CREATE_GGTT_BIT);
+					  XE_BO_FLAG_SYSTEM |
+					  XE_BO_FLAG_GGTT |
+					  XE_BO_FLAG_GGTT_INVALIDATE);
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
 
@@ -597,6 +725,7 @@ void xe_guc_ads_populate(struct xe_guc_ads *ads)
 	guc_mapping_table_init(gt, &info_map);
 	guc_capture_list_init(ads);
 	guc_doorbell_init(ads);
+	guc_waklv_init(ads);
 
 	if (xe->info.has_usm) {
 		guc_um_init_params(ads);
diff --git a/drivers/gpu/drm/xe/xe_guc_ads_types.h b/drivers/gpu/drm/xe/xe_guc_ads_types.h
index 4afe44bece..2de5decfe0 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_ads_types.h
@@ -20,6 +20,8 @@ struct xe_guc_ads {
 	size_t golden_lrc_size;
 	/** @regset_size: size of register set passed to GuC for save/restore */
 	u32 regset_size;
+	/** @ads_waklv_size: total waklv size supported by platform */
+	u32 ads_waklv_size;
 };
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 24a33fa364..0151d29b3c 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -9,16 +9,22 @@
 #include <linux/circ_buf.h>
 #include <linux/delay.h>
 
+#include <kunit/static_stub.h>
+
 #include <drm/drm_managed.h>
 
 #include "abi/guc_actions_abi.h"
+#include "abi/guc_actions_sriov_abi.h"
 #include "abi/guc_klvs_abi.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_gt.h"
 #include "xe_gt_pagefault.h"
+#include "xe_gt_printk.h"
+#include "xe_gt_sriov_pf_control.h"
 #include "xe_gt_tlb_invalidation.h"
 #include "xe_guc.h"
+#include "xe_guc_relay.h"
 #include "xe_guc_submit.h"
 #include "xe_map.h"
 #include "xe_pm.h"
@@ -28,6 +34,7 @@
 struct g2h_fence {
 	u32 *response_buffer;
 	u32 seqno;
+	u32 response_data;
 	u16 response_len;
 	u16 error;
 	u16 hint;
@@ -40,6 +47,7 @@ struct g2h_fence {
 static void g2h_fence_init(struct g2h_fence *g2h_fence, u32 *response_buffer)
 {
 	g2h_fence->response_buffer = response_buffer;
+	g2h_fence->response_data = 0;
 	g2h_fence->response_len = 0;
 	g2h_fence->fail = false;
 	g2h_fence->retry = false;
@@ -113,6 +121,7 @@ static void guc_ct_fini(struct drm_device *drm, void *arg)
 {
 	struct xe_guc_ct *ct = arg;
 
+	destroy_workqueue(ct->g2h_wq);
 	xa_destroy(&ct->fence_lookup);
 }
 
@@ -136,20 +145,28 @@ int xe_guc_ct_init(struct xe_guc_ct *ct)
 	struct xe_bo *bo;
 	int err;
 
-	xe_assert(xe, !(guc_ct_size() % PAGE_SIZE));
+	xe_gt_assert(gt, !(guc_ct_size() % PAGE_SIZE));
+
+	ct->g2h_wq = alloc_ordered_workqueue("xe-g2h-wq", 0);
+	if (!ct->g2h_wq)
+		return -ENOMEM;
 
-	drmm_mutex_init(&xe->drm, &ct->lock);
 	spin_lock_init(&ct->fast_lock);
 	xa_init(&ct->fence_lookup);
 	INIT_WORK(&ct->g2h_worker, g2h_worker_func);
 	init_waitqueue_head(&ct->wq);
 	init_waitqueue_head(&ct->g2h_fence_wq);
 
+	err = drmm_mutex_init(&xe->drm, &ct->lock);
+	if (err)
+		return err;
+
 	primelockdep(ct);
 
 	bo = xe_managed_bo_create_pin_map(xe, tile, guc_ct_size(),
-					  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
-					  XE_BO_CREATE_GGTT_BIT);
+					  XE_BO_FLAG_SYSTEM |
+					  XE_BO_FLAG_GGTT |
+					  XE_BO_FLAG_GGTT_INVALIDATE);
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
 
@@ -159,6 +176,8 @@ int xe_guc_ct_init(struct xe_guc_ct *ct)
 	if (err)
 		return err;
 
+	xe_gt_assert(gt, ct->state == XE_GUC_CT_STATE_NOT_INITIALIZED);
+	ct->state = XE_GUC_CT_STATE_DISABLED;
 	return 0;
 }
 
@@ -278,12 +297,36 @@ static int guc_ct_control_toggle(struct xe_guc_ct *ct, bool enable)
 	return ret > 0 ? -EPROTO : ret;
 }
 
+static void xe_guc_ct_set_state(struct xe_guc_ct *ct,
+				enum xe_guc_ct_state state)
+{
+	mutex_lock(&ct->lock);		/* Serialise dequeue_one_g2h() */
+	spin_lock_irq(&ct->fast_lock);	/* Serialise CT fast-path */
+
+	xe_gt_assert(ct_to_gt(ct), ct->g2h_outstanding == 0 ||
+		     state == XE_GUC_CT_STATE_STOPPED);
+
+	ct->g2h_outstanding = 0;
+	ct->state = state;
+
+	spin_unlock_irq(&ct->fast_lock);
+
+	/*
+	 * Lockdep doesn't like this under the fast lock and he destroy only
+	 * needs to be serialized with the send path which ct lock provides.
+	 */
+	xa_destroy(&ct->fence_lookup);
+
+	mutex_unlock(&ct->lock);
+}
+
 int xe_guc_ct_enable(struct xe_guc_ct *ct)
 {
 	struct xe_device *xe = ct_to_xe(ct);
+	struct xe_gt *gt = ct_to_gt(ct);
 	int err;
 
-	xe_assert(xe, !ct->enabled);
+	xe_gt_assert(gt, !xe_guc_ct_enabled(ct));
 
 	guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap);
 	guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap);
@@ -300,34 +343,48 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct)
 	if (err)
 		goto err_out;
 
-	mutex_lock(&ct->lock);
-	spin_lock_irq(&ct->fast_lock);
-	ct->g2h_outstanding = 0;
-	ct->enabled = true;
-	spin_unlock_irq(&ct->fast_lock);
-	mutex_unlock(&ct->lock);
+	xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_ENABLED);
 
 	smp_mb();
 	wake_up_all(&ct->wq);
-	drm_dbg(&xe->drm, "GuC CT communication channel enabled\n");
+	xe_gt_dbg(gt, "GuC CT communication channel enabled\n");
 
 	return 0;
 
 err_out:
-	drm_err(&xe->drm, "Failed to enable CT (%d)\n", err);
+	xe_gt_err(gt, "Failed to enable GuC CT (%pe)\n", ERR_PTR(err));
 
 	return err;
 }
 
+static void stop_g2h_handler(struct xe_guc_ct *ct)
+{
+	cancel_work_sync(&ct->g2h_worker);
+}
+
+/**
+ * xe_guc_ct_disable - Set GuC to disabled state
+ * @ct: the &xe_guc_ct
+ *
+ * Set GuC CT to disabled state and stop g2h handler. No outstanding g2h expected
+ * in this transition.
+ */
 void xe_guc_ct_disable(struct xe_guc_ct *ct)
 {
-	mutex_lock(&ct->lock); /* Serialise dequeue_one_g2h() */
-	spin_lock_irq(&ct->fast_lock); /* Serialise CT fast-path */
-	ct->enabled = false; /* Finally disable CT communication */
-	spin_unlock_irq(&ct->fast_lock);
-	mutex_unlock(&ct->lock);
+	xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_DISABLED);
+	stop_g2h_handler(ct);
+}
 
-	xa_destroy(&ct->fence_lookup);
+/**
+ * xe_guc_ct_stop - Set GuC to stopped state
+ * @ct: the &xe_guc_ct
+ *
+ * Set GuC CT to stopped state, stop g2h handler, and clear any outstanding g2h
+ */
+void xe_guc_ct_stop(struct xe_guc_ct *ct)
+{
+	xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_STOPPED);
+	stop_g2h_handler(ct);
 }
 
 static bool h2g_has_room(struct xe_guc_ct *ct, u32 cmd_len)
@@ -376,7 +433,7 @@ static void h2g_reserve_space(struct xe_guc_ct *ct, u32 cmd_len)
 
 static void __g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h)
 {
-	xe_assert(ct_to_xe(ct), g2h_len <= ct->ctbs.g2h.info.space);
+	xe_gt_assert(ct_to_gt(ct), g2h_len <= ct->ctbs.g2h.info.space);
 
 	if (g2h_len) {
 		lockdep_assert_held(&ct->fast_lock);
@@ -389,8 +446,8 @@ static void __g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h)
 static void __g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len)
 {
 	lockdep_assert_held(&ct->fast_lock);
-	xe_assert(ct_to_xe(ct), ct->ctbs.g2h.info.space + g2h_len <=
-		  ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space);
+	xe_gt_assert(ct_to_gt(ct), ct->ctbs.g2h.info.space + g2h_len <=
+		     ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space);
 
 	ct->ctbs.g2h.info.space += g2h_len;
 	--ct->g2h_outstanding;
@@ -409,6 +466,7 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
 		     u32 ct_fence_value, bool want_response)
 {
 	struct xe_device *xe = ct_to_xe(ct);
+	struct xe_gt *gt = ct_to_gt(ct);
 	struct guc_ctb *h2g = &ct->ctbs.h2g;
 	u32 cmd[H2G_CT_HEADERS];
 	u32 tail = h2g->info.tail;
@@ -419,8 +477,8 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
 	full_len = len + GUC_CTB_HDR_LEN;
 
 	lockdep_assert_held(&ct->lock);
-	xe_assert(xe, full_len <= GUC_CTB_MSG_MAX_LEN);
-	xe_assert(xe, tail <= h2g->info.size);
+	xe_gt_assert(gt, full_len <= GUC_CTB_MSG_MAX_LEN);
+	xe_gt_assert(gt, tail <= h2g->info.size);
 
 	/* Command will wrap, zero fill (NOPs), return and check credits again */
 	if (tail + full_len > h2g->info.size) {
@@ -448,7 +506,7 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
 				   GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
 	} else {
 		cmd[1] =
-			FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) |
+			FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_FAST_REQUEST) |
 			FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
 				   GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
 	}
@@ -469,23 +527,44 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
 	/* Update descriptor */
 	desc_write(xe, h2g, tail, h2g->info.tail);
 
-	trace_xe_guc_ctb_h2g(ct_to_gt(ct)->info.id, *(action - 1), full_len,
+	trace_xe_guc_ctb_h2g(gt->info.id, *(action - 1), full_len,
 			     desc_read(xe, h2g, head), h2g->info.tail);
 
 	return 0;
 }
 
+/*
+ * The CT protocol accepts a 16 bits fence. This field is fully owned by the
+ * driver, the GuC will just copy it to the reply message. Since we need to
+ * be able to distinguish between replies to REQUEST and FAST_REQUEST messages,
+ * we use one bit of the seqno as an indicator for that and a rolling counter
+ * for the remaining 15 bits.
+ */
+#define CT_SEQNO_MASK GENMASK(14, 0)
+#define CT_SEQNO_UNTRACKED BIT(15)
+static u16 next_ct_seqno(struct xe_guc_ct *ct, bool is_g2h_fence)
+{
+	u32 seqno = ct->fence_seqno++ & CT_SEQNO_MASK;
+
+	if (!is_g2h_fence)
+		seqno |= CT_SEQNO_UNTRACKED;
+
+	return seqno;
+}
+
 static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
 				u32 len, u32 g2h_len, u32 num_g2h,
 				struct g2h_fence *g2h_fence)
 {
-	struct xe_device *xe = ct_to_xe(ct);
+	struct xe_gt *gt __maybe_unused = ct_to_gt(ct);
+	u16 seqno;
 	int ret;
 
-	xe_assert(xe, !g2h_len || !g2h_fence);
-	xe_assert(xe, !num_g2h || !g2h_fence);
-	xe_assert(xe, !g2h_len || num_g2h);
-	xe_assert(xe, g2h_len || !num_g2h);
+	xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED);
+	xe_gt_assert(gt, !g2h_len || !g2h_fence);
+	xe_gt_assert(gt, !num_g2h || !g2h_fence);
+	xe_gt_assert(gt, !g2h_len || num_g2h);
+	xe_gt_assert(gt, g2h_len || !num_g2h);
 	lockdep_assert_held(&ct->lock);
 
 	if (unlikely(ct->ctbs.h2g.info.broken)) {
@@ -493,11 +572,18 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
 		goto out;
 	}
 
-	if (unlikely(!ct->enabled)) {
+	if (ct->state == XE_GUC_CT_STATE_DISABLED) {
 		ret = -ENODEV;
 		goto out;
 	}
 
+	if (ct->state == XE_GUC_CT_STATE_STOPPED) {
+		ret = -ECANCELED;
+		goto out;
+	}
+
+	xe_gt_assert(gt, xe_guc_ct_enabled(ct));
+
 	if (g2h_fence) {
 		g2h_len = GUC_CTB_HXG_MSG_MAX_LEN;
 		num_g2h = 1;
@@ -505,7 +591,7 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
 		if (g2h_fence_needs_alloc(g2h_fence)) {
 			void *ptr;
 
-			g2h_fence->seqno = (ct->fence_seqno++ & 0xffff);
+			g2h_fence->seqno = next_ct_seqno(ct, true);
 			ptr = xa_store(&ct->fence_lookup,
 				       g2h_fence->seqno,
 				       g2h_fence, GFP_ATOMIC);
@@ -514,6 +600,10 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
 				goto out;
 			}
 		}
+
+		seqno = g2h_fence->seqno;
+	} else {
+		seqno = next_ct_seqno(ct, false);
 	}
 
 	if (g2h_len)
@@ -523,8 +613,7 @@ retry:
 	if (unlikely(ret))
 		goto out_unlock;
 
-	ret = h2g_write(ct, action, len, g2h_fence ? g2h_fence->seqno : 0,
-			!!g2h_fence);
+	ret = h2g_write(ct, action, len, seqno, !!g2h_fence);
 	if (unlikely(ret)) {
 		if (ret == -EAGAIN)
 			goto retry;
@@ -551,12 +640,12 @@ static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
 			      u32 g2h_len, u32 num_g2h,
 			      struct g2h_fence *g2h_fence)
 {
-	struct drm_device *drm = &ct_to_xe(ct)->drm;
-	struct drm_printer p = drm_info_printer(drm->dev);
+	struct xe_gt *gt = ct_to_gt(ct);
+	struct drm_printer p = xe_gt_info_printer(gt);
 	unsigned int sleep_period_ms = 1;
 	int ret;
 
-	xe_assert(ct_to_xe(ct), !g2h_len || !g2h_fence);
+	xe_gt_assert(gt, !g2h_len || !g2h_fence);
 	lockdep_assert_held(&ct->lock);
 	xe_device_assert_mem_access(ct_to_xe(ct));
 
@@ -614,7 +703,7 @@ try_again:
 	return ret;
 
 broken:
-	drm_err(drm, "No forward process on H2G, reset required");
+	xe_gt_err(gt, "No forward process on H2G, reset required\n");
 	xe_guc_ct_print(ct, &p, true);
 	ct->ctbs.h2g.info.broken = true;
 
@@ -626,7 +715,7 @@ static int guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len,
 {
 	int ret;
 
-	xe_assert(ct_to_xe(ct), !g2h_len || !g2h_fence);
+	xe_gt_assert(ct_to_gt(ct), !g2h_len || !g2h_fence);
 
 	mutex_lock(&ct->lock);
 	ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, g2h_fence);
@@ -682,7 +771,8 @@ static bool retry_failure(struct xe_guc_ct *ct, int ret)
 		return false;
 
 #define ct_alive(ct)	\
-	(ct->enabled && !ct->ctbs.h2g.info.broken && !ct->ctbs.g2h.info.broken)
+	(xe_guc_ct_enabled(ct) && !ct->ctbs.h2g.info.broken && \
+	 !ct->ctbs.g2h.info.broken)
 	if (!wait_event_interruptible_timeout(ct->wq, ct_alive(ct),  HZ * 5))
 		return false;
 #undef ct_alive
@@ -693,7 +783,7 @@ static bool retry_failure(struct xe_guc_ct *ct, int ret)
 static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
 			    u32 *response_buffer, bool no_fail)
 {
-	struct xe_device *xe = ct_to_xe(ct);
+	struct xe_gt *gt = ct_to_gt(ct);
 	struct g2h_fence g2h_fence;
 	int ret = 0;
 
@@ -735,29 +825,48 @@ retry_same_fence:
 
 	ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ);
 	if (!ret) {
-		drm_err(&xe->drm, "Timed out wait for G2H, fence %u, action %04x",
-			g2h_fence.seqno, action[0]);
+		xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x",
+			  g2h_fence.seqno, action[0]);
 		xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno);
 		return -ETIME;
 	}
 
 	if (g2h_fence.retry) {
-		drm_warn(&xe->drm, "Send retry, action 0x%04x, reason %d",
-			 action[0], g2h_fence.reason);
+		xe_gt_warn(gt, "H2G retry, action 0x%04x, reason %u",
+			   action[0], g2h_fence.reason);
 		goto retry;
 	}
 	if (g2h_fence.fail) {
-		drm_err(&xe->drm, "Send failed, action 0x%04x, error %d, hint %d",
-			action[0], g2h_fence.error, g2h_fence.hint);
+		xe_gt_err(gt, "H2G send failed, action 0x%04x, error %d, hint %u",
+			  action[0], g2h_fence.error, g2h_fence.hint);
 		ret = -EIO;
 	}
 
-	return ret > 0 ? 0 : ret;
+	return ret > 0 ? response_buffer ? g2h_fence.response_len : g2h_fence.response_data : ret;
 }
 
+/**
+ * xe_guc_ct_send_recv - Send and receive HXG to the GuC
+ * @ct: the &xe_guc_ct
+ * @action: the dword array with `HXG Request`_ message (can't be NULL)
+ * @len: length of the `HXG Request`_ message (in dwords, can't be 0)
+ * @response_buffer: placeholder for the `HXG Response`_ message (can be NULL)
+ *
+ * Send a `HXG Request`_ message to the GuC over CT communication channel and
+ * blocks until GuC replies with a `HXG Response`_ message.
+ *
+ * For non-blocking communication with GuC use xe_guc_ct_send().
+ *
+ * Note: The size of &response_buffer must be at least GUC_CTB_MAX_DWORDS_.
+ *
+ * Return: response length (in dwords) if &response_buffer was not NULL, or
+ *         DATA0 from `HXG Response`_ if &response_buffer was NULL, or
+ *         a negative error code on failure.
+ */
 int xe_guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
 			u32 *response_buffer)
 {
+	KUNIT_STATIC_STUB_REDIRECT(xe_guc_ct_send_recv, ct, action, len, response_buffer);
 	return guc_ct_send_recv(ct, action, len, response_buffer, false);
 }
 
@@ -767,9 +876,20 @@ int xe_guc_ct_send_recv_no_fail(struct xe_guc_ct *ct, const u32 *action,
 	return guc_ct_send_recv(ct, action, len, response_buffer, true);
 }
 
+static u32 *msg_to_hxg(u32 *msg)
+{
+	return msg + GUC_CTB_MSG_MIN_LEN;
+}
+
+static u32 msg_len_to_hxg_len(u32 len)
+{
+	return len - GUC_CTB_MSG_MIN_LEN;
+}
+
 static int parse_g2h_event(struct xe_guc_ct *ct, u32 *msg, u32 len)
 {
-	u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]);
+	u32 *hxg = msg_to_hxg(msg);
+	u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]);
 
 	lockdep_assert_held(&ct->lock);
 
@@ -786,38 +906,58 @@ static int parse_g2h_event(struct xe_guc_ct *ct, u32 *msg, u32 len)
 
 static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len)
 {
-	struct xe_device *xe = ct_to_xe(ct);
-	u32 response_len = len - GUC_CTB_MSG_MIN_LEN;
+	struct xe_gt *gt =  ct_to_gt(ct);
+	u32 *hxg = msg_to_hxg(msg);
+	u32 hxg_len = msg_len_to_hxg_len(len);
 	u32 fence = FIELD_GET(GUC_CTB_MSG_0_FENCE, msg[0]);
-	u32 type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]);
+	u32 type = FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]);
 	struct g2h_fence *g2h_fence;
 
 	lockdep_assert_held(&ct->lock);
 
+	/*
+	 * Fences for FAST_REQUEST messages are not tracked in ct->fence_lookup.
+	 * Those messages should never fail, so if we do get an error back it
+	 * means we're likely doing an illegal operation and the GuC is
+	 * rejecting it. We have no way to inform the code that submitted the
+	 * H2G that the message was rejected, so we need to escalate the
+	 * failure to trigger a reset.
+	 */
+	if (fence & CT_SEQNO_UNTRACKED) {
+		if (type == GUC_HXG_TYPE_RESPONSE_FAILURE)
+			xe_gt_err(gt, "FAST_REQ H2G fence 0x%x failed! e=0x%x, h=%u\n",
+				  fence,
+				  FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, hxg[0]),
+				  FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, hxg[0]));
+		else
+			xe_gt_err(gt, "unexpected response %u for FAST_REQ H2G fence 0x%x!\n",
+				  type, fence);
+
+		return -EPROTO;
+	}
+
 	g2h_fence = xa_erase(&ct->fence_lookup, fence);
 	if (unlikely(!g2h_fence)) {
 		/* Don't tear down channel, as send could've timed out */
-		drm_warn(&xe->drm, "G2H fence (%u) not found!\n", fence);
+		xe_gt_warn(gt, "G2H fence (%u) not found!\n", fence);
 		g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN);
 		return 0;
 	}
 
-	xe_assert(xe, fence == g2h_fence->seqno);
+	xe_gt_assert(gt, fence == g2h_fence->seqno);
 
 	if (type == GUC_HXG_TYPE_RESPONSE_FAILURE) {
 		g2h_fence->fail = true;
-		g2h_fence->error =
-			FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, msg[1]);
-		g2h_fence->hint =
-			FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, msg[1]);
+		g2h_fence->error = FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, hxg[0]);
+		g2h_fence->hint = FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, hxg[0]);
 	} else if (type == GUC_HXG_TYPE_NO_RESPONSE_RETRY) {
 		g2h_fence->retry = true;
-		g2h_fence->reason =
-			FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, msg[1]);
+		g2h_fence->reason = FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, hxg[0]);
 	} else if (g2h_fence->response_buffer) {
-		g2h_fence->response_len = response_len;
-		memcpy(g2h_fence->response_buffer, msg + GUC_CTB_MSG_MIN_LEN,
-		       response_len * sizeof(u32));
+		g2h_fence->response_len = hxg_len;
+		memcpy(g2h_fence->response_buffer, hxg, hxg_len * sizeof(u32));
+	} else {
+		g2h_fence->response_data = FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, hxg[0]);
 	}
 
 	g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN);
@@ -832,25 +972,23 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len)
 
 static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
 {
-	struct xe_device *xe = ct_to_xe(ct);
-	u32 hxg, origin, type;
+	struct xe_gt *gt = ct_to_gt(ct);
+	u32 *hxg = msg_to_hxg(msg);
+	u32 origin, type;
 	int ret;
 
 	lockdep_assert_held(&ct->lock);
 
-	hxg = msg[1];
-
-	origin = FIELD_GET(GUC_HXG_MSG_0_ORIGIN, hxg);
+	origin = FIELD_GET(GUC_HXG_MSG_0_ORIGIN, hxg[0]);
 	if (unlikely(origin != GUC_HXG_ORIGIN_GUC)) {
-		drm_err(&xe->drm,
-			"G2H channel broken on read, origin=%d, reset required\n",
-			origin);
+		xe_gt_err(gt, "G2H channel broken on read, origin=%u, reset required\n",
+			  origin);
 		ct->ctbs.g2h.info.broken = true;
 
 		return -EPROTO;
 	}
 
-	type = FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg);
+	type = FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]);
 	switch (type) {
 	case GUC_HXG_TYPE_EVENT:
 		ret = parse_g2h_event(ct, msg, len);
@@ -861,9 +999,8 @@ static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
 		ret = parse_g2h_response(ct, msg, len);
 		break;
 	default:
-		drm_err(&xe->drm,
-			"G2H channel broken on read, type=%d, reset required\n",
-			type);
+		xe_gt_err(gt, "G2H channel broken on read, type=%u, reset required\n",
+			  type);
 		ct->ctbs.g2h.info.broken = true;
 
 		ret = -EOPNOTSUPP;
@@ -874,16 +1011,21 @@ static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
 
 static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
 {
-	struct xe_device *xe = ct_to_xe(ct);
 	struct xe_guc *guc = ct_to_guc(ct);
-	u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]);
-	u32 *payload = msg + GUC_CTB_HXG_MSG_MIN_LEN;
-	u32 adj_len = len - GUC_CTB_HXG_MSG_MIN_LEN;
+	struct xe_gt *gt = ct_to_gt(ct);
+	u32 hxg_len = msg_len_to_hxg_len(len);
+	u32 *hxg = msg_to_hxg(msg);
+	u32 action, adj_len;
+	u32 *payload;
 	int ret = 0;
 
-	if (FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]) != GUC_HXG_TYPE_EVENT)
+	if (FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_EVENT)
 		return 0;
 
+	action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]);
+	payload = hxg + GUC_HXG_EVENT_MSG_MIN_LEN;
+	adj_len = hxg_len - GUC_HXG_EVENT_MSG_MIN_LEN;
+
 	switch (action) {
 	case XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE:
 		ret = xe_guc_sched_done_handler(guc, payload, adj_len);
@@ -920,13 +1062,22 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
 		ret = xe_guc_access_counter_notify_handler(guc, payload,
 							   adj_len);
 		break;
+	case XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF:
+		ret = xe_guc_relay_process_guc2pf(&guc->relay, hxg, hxg_len);
+		break;
+	case XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF:
+		ret = xe_guc_relay_process_guc2vf(&guc->relay, hxg, hxg_len);
+		break;
+	case GUC_ACTION_GUC2PF_VF_STATE_NOTIFY:
+		ret = xe_gt_sriov_pf_control_process_guc2pf(gt, hxg, hxg_len);
+		break;
 	default:
-		drm_err(&xe->drm, "unexpected action 0x%04x\n", action);
+		xe_gt_err(gt, "unexpected G2H action 0x%04x\n", action);
 	}
 
 	if (ret)
-		drm_err(&xe->drm, "action 0x%04x failed processing, ret=%d\n",
-			action, ret);
+		xe_gt_err(gt, "G2H action 0x%04x failed (%pe)\n",
+			  action, ERR_PTR(ret));
 
 	return 0;
 }
@@ -934,19 +1085,27 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
 static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
 {
 	struct xe_device *xe = ct_to_xe(ct);
+	struct xe_gt *gt = ct_to_gt(ct);
 	struct guc_ctb *g2h = &ct->ctbs.g2h;
 	u32 tail, head, len;
 	s32 avail;
 	u32 action;
+	u32 *hxg;
 
+	xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED);
 	lockdep_assert_held(&ct->fast_lock);
 
-	if (!ct->enabled)
+	if (ct->state == XE_GUC_CT_STATE_DISABLED)
 		return -ENODEV;
 
+	if (ct->state == XE_GUC_CT_STATE_STOPPED)
+		return -ECANCELED;
+
 	if (g2h->info.broken)
 		return -EPIPE;
 
+	xe_gt_assert(gt, xe_guc_ct_enabled(ct));
+
 	/* Calculate DW available to read */
 	tail = desc_read(xe, g2h, tail);
 	avail = tail - g2h->info.head;
@@ -961,9 +1120,8 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
 			   sizeof(u32));
 	len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, msg[0]) + GUC_CTB_MSG_MIN_LEN;
 	if (len > avail) {
-		drm_err(&xe->drm,
-			"G2H channel broken on read, avail=%d, len=%d, reset required\n",
-			avail, len);
+		xe_gt_err(gt, "G2H channel broken on read, avail=%d, len=%d, reset required\n",
+			  avail, len);
 		g2h->info.broken = true;
 
 		return -EPROTO;
@@ -988,10 +1146,11 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
 				   avail * sizeof(u32));
 	}
 
-	action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]);
+	hxg = msg_to_hxg(msg);
+	action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]);
 
 	if (fast_path) {
-		if (FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]) != GUC_HXG_TYPE_EVENT)
+		if (FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_EVENT)
 			return 0;
 
 		switch (action) {
@@ -1015,11 +1174,13 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
 
 static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len)
 {
-	struct xe_device *xe = ct_to_xe(ct);
+	struct xe_gt *gt = ct_to_gt(ct);
 	struct xe_guc *guc = ct_to_guc(ct);
-	u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]);
-	u32 *payload = msg + GUC_CTB_HXG_MSG_MIN_LEN;
-	u32 adj_len = len - GUC_CTB_HXG_MSG_MIN_LEN;
+	u32 hxg_len = msg_len_to_hxg_len(len);
+	u32 *hxg = msg_to_hxg(msg);
+	u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]);
+	u32 *payload = hxg + GUC_HXG_MSG_MIN_LEN;
+	u32 adj_len = hxg_len - GUC_HXG_MSG_MIN_LEN;
 	int ret = 0;
 
 	switch (action) {
@@ -1032,12 +1193,12 @@ static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len)
 							   adj_len);
 		break;
 	default:
-		drm_warn(&xe->drm, "NOT_POSSIBLE");
+		xe_gt_warn(gt, "NOT_POSSIBLE");
 	}
 
 	if (ret)
-		drm_err(&xe->drm, "action 0x%04x failed processing, ret=%d\n",
-			action, ret);
+		xe_gt_err(gt, "G2H action 0x%04x failed (%pe)\n",
+			  action, ERR_PTR(ret));
 }
 
 /**
@@ -1054,7 +1215,7 @@ void xe_guc_ct_fast_path(struct xe_guc_ct *ct)
 	bool ongoing;
 	int len;
 
-	ongoing = xe_device_mem_access_get_if_ongoing(ct_to_xe(ct));
+	ongoing = xe_pm_runtime_get_if_active(ct_to_xe(ct));
 	if (!ongoing && xe_pm_read_callback_task(ct_to_xe(ct)) == NULL)
 		return;
 
@@ -1067,7 +1228,7 @@ void xe_guc_ct_fast_path(struct xe_guc_ct *ct)
 	spin_unlock(&ct->fast_lock);
 
 	if (ongoing)
-		xe_device_mem_access_put(xe);
+		xe_pm_runtime_put(xe);
 }
 
 /* Returns less than zero on error, 0 on done, 1 on more available */
@@ -1098,6 +1259,7 @@ static int dequeue_one_g2h(struct xe_guc_ct *ct)
 static void g2h_worker_func(struct work_struct *w)
 {
 	struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, g2h_worker);
+	struct xe_gt *gt = ct_to_gt(ct);
 	bool ongoing;
 	int ret;
 
@@ -1124,7 +1286,7 @@ static void g2h_worker_func(struct work_struct *w)
 	 * responses, if the worker here is blocked on those callbacks
 	 * completing, creating a deadlock.
 	 */
-	ongoing = xe_device_mem_access_get_if_ongoing(ct_to_xe(ct));
+	ongoing = xe_pm_runtime_get_if_active(ct_to_xe(ct));
 	if (!ongoing && xe_pm_read_callback_task(ct_to_xe(ct)) == NULL)
 		return;
 
@@ -1134,8 +1296,7 @@ static void g2h_worker_func(struct work_struct *w)
 		mutex_unlock(&ct->lock);
 
 		if (unlikely(ret == -EPROTO || ret == -EOPNOTSUPP)) {
-			struct drm_device *drm = &ct_to_xe(ct)->drm;
-			struct drm_printer p = drm_info_printer(drm->dev);
+			struct drm_printer p = xe_gt_info_printer(gt);
 
 			xe_guc_ct_print(ct, &p, false);
 			kick_reset(ct);
@@ -1143,7 +1304,7 @@ static void g2h_worker_func(struct work_struct *w)
 	} while (ret == 1);
 
 	if (ongoing)
-		xe_device_mem_access_put(ct_to_xe(ct));
+		xe_pm_runtime_put(ct_to_xe(ct));
 }
 
 static void guc_ctb_snapshot_capture(struct xe_device *xe, struct guc_ctb *ctb,
@@ -1245,7 +1406,7 @@ struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct,
 		return NULL;
 	}
 
-	if (ct->enabled) {
+	if (xe_guc_ct_enabled(ct) || ct->state == XE_GUC_CT_STATE_STOPPED) {
 		snapshot->ct_enabled = true;
 		snapshot->g2h_outstanding = READ_ONCE(ct->g2h_outstanding);
 		guc_ctb_snapshot_capture(xe, &ct->ctbs.h2g,
@@ -1271,7 +1432,7 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
 		return;
 
 	if (snapshot->ct_enabled) {
-		drm_puts(p, "\nH2G CTB (all sizes in DW):\n");
+		drm_puts(p, "H2G CTB (all sizes in DW):\n");
 		guc_ctb_snapshot_print(&snapshot->h2g, p);
 
 		drm_puts(p, "\nG2H CTB (all sizes in DW):\n");
@@ -1280,7 +1441,7 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
 		drm_printf(p, "\tg2h outstanding: %d\n",
 			   snapshot->g2h_outstanding);
 	} else {
-		drm_puts(p, "\nCT disabled\n");
+		drm_puts(p, "CT disabled\n");
 	}
 }
 
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h
index f15f8a4857..105bb8e99a 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.h
+++ b/drivers/gpu/drm/xe/xe_guc_ct.h
@@ -13,6 +13,7 @@ struct drm_printer;
 int xe_guc_ct_init(struct xe_guc_ct *ct);
 int xe_guc_ct_enable(struct xe_guc_ct *ct);
 void xe_guc_ct_disable(struct xe_guc_ct *ct);
+void xe_guc_ct_stop(struct xe_guc_ct *ct);
 void xe_guc_ct_fast_path(struct xe_guc_ct *ct);
 
 struct xe_guc_ct_snapshot *
@@ -22,11 +23,18 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
 void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot);
 void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool atomic);
 
+static inline bool xe_guc_ct_enabled(struct xe_guc_ct *ct)
+{
+	return ct->state == XE_GUC_CT_STATE_ENABLED;
+}
+
 static inline void xe_guc_ct_irq_handler(struct xe_guc_ct *ct)
 {
+	if (!xe_guc_ct_enabled(ct))
+		return;
+
 	wake_up_all(&ct->wq);
-	if (ct->enabled)
-		queue_work(system_unbound_wq, &ct->g2h_worker);
+	queue_work(ct->g2h_wq, &ct->g2h_worker);
 	xe_guc_ct_fast_path(ct);
 }
 
diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h
index d814d4ee3f..fede4c6e93 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h
@@ -73,6 +73,20 @@ struct xe_guc_ct_snapshot {
 };
 
 /**
+ * enum xe_guc_ct_state - CT state
+ * @XE_GUC_CT_STATE_NOT_INITIALIZED: CT not initialized, messages not expected in this state
+ * @XE_GUC_CT_STATE_DISABLED: CT disabled, messages not expected in this state
+ * @XE_GUC_CT_STATE_STOPPED: CT stopped, drop messages without errors
+ * @XE_GUC_CT_STATE_ENABLED: CT enabled, messages sent / received in this state
+ */
+enum xe_guc_ct_state {
+	XE_GUC_CT_STATE_NOT_INITIALIZED = 0,
+	XE_GUC_CT_STATE_DISABLED,
+	XE_GUC_CT_STATE_STOPPED,
+	XE_GUC_CT_STATE_ENABLED,
+};
+
+/**
  * struct xe_guc_ct - GuC command transport (CT) layer
  *
  * Includes a pair of CT buffers for bi-directional communication and tracking
@@ -87,17 +101,17 @@ struct xe_guc_ct {
 	spinlock_t fast_lock;
 	/** @ctbs: buffers for sending and receiving commands */
 	struct {
-		/** @send: Host to GuC (H2G, send) channel */
+		/** @ctbs.send: Host to GuC (H2G, send) channel */
 		struct guc_ctb h2g;
-		/** @recv: GuC to Host (G2H, receive) channel */
+		/** @ctbs.recv: GuC to Host (G2H, receive) channel */
 		struct guc_ctb g2h;
 	} ctbs;
 	/** @g2h_outstanding: number of outstanding G2H */
 	u32 g2h_outstanding;
 	/** @g2h_worker: worker to process G2H messages */
 	struct work_struct g2h_worker;
-	/** @enabled: CT enabled */
-	bool enabled;
+	/** @state: CT state */
+	enum xe_guc_ct_state state;
 	/** @fence_seqno: G2H fence seqno - 16 bits used by CT */
 	u32 fence_seqno;
 	/** @fence_lookup: G2H fence lookup */
@@ -106,6 +120,8 @@ struct xe_guc_ct {
 	wait_queue_head_t wq;
 	/** @g2h_fence_wq: wait queue used for G2H fencing */
 	wait_queue_head_t g2h_fence_wq;
+	/** @g2h_wq: used to process G2H */
+	struct workqueue_struct *g2h_wq;
 	/** @msg: Message buffer */
 	u32 msg[GUC_CTB_MSG_MAX_LEN];
 	/** @fast_msg: Message buffer */
diff --git a/drivers/gpu/drm/xe/xe_guc_db_mgr.c b/drivers/gpu/drm/xe/xe_guc_db_mgr.c
new file mode 100644
index 0000000000..8d9a0287df
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_db_mgr.c
@@ -0,0 +1,266 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/bitmap.h>
+#include <linux/mutex.h>
+
+#include <drm/drm_managed.h>
+
+#include "regs/xe_guc_regs.h"
+
+#include "xe_assert.h"
+#include "xe_gt_printk.h"
+#include "xe_guc.h"
+#include "xe_guc_db_mgr.h"
+#include "xe_guc_types.h"
+
+/**
+ * DOC: GuC Doorbells
+ *
+ * The GFX doorbell solution provides a mechanism for submission of workload
+ * to the graphics hardware by a ring3 application without the penalty of
+ * ring transition for each workload submission.
+ *
+ * In SR-IOV mode, the doorbells are treated as shared resource and PF must
+ * be able to provision exclusive range of IDs across VFs, which may want to
+ * use this feature.
+ */
+
+static struct xe_guc *dbm_to_guc(struct xe_guc_db_mgr *dbm)
+{
+	return container_of(dbm, struct xe_guc, dbm);
+}
+
+static struct xe_gt *dbm_to_gt(struct xe_guc_db_mgr *dbm)
+{
+	return guc_to_gt(dbm_to_guc(dbm));
+}
+
+static struct xe_device *dbm_to_xe(struct xe_guc_db_mgr *dbm)
+{
+	return gt_to_xe(dbm_to_gt(dbm));
+}
+
+#define dbm_assert(_dbm, _cond)		xe_gt_assert(dbm_to_gt(_dbm), _cond)
+#define dbm_mutex(_dbm)			(&dbm_to_guc(_dbm)->submission_state.lock)
+
+static void dbm_print_locked(struct xe_guc_db_mgr *dbm, struct drm_printer *p, int indent);
+
+static void __fini_dbm(struct drm_device *drm, void *arg)
+{
+	struct xe_guc_db_mgr *dbm = arg;
+	unsigned int weight;
+
+	mutex_lock(dbm_mutex(dbm));
+
+	weight = bitmap_weight(dbm->bitmap, dbm->count);
+	if (weight) {
+		struct drm_printer p = xe_gt_info_printer(dbm_to_gt(dbm));
+
+		xe_gt_err(dbm_to_gt(dbm), "GuC doorbells manager unclean (%u/%u)\n",
+			  weight, dbm->count);
+		dbm_print_locked(dbm, &p, 1);
+	}
+
+	bitmap_free(dbm->bitmap);
+	dbm->bitmap = NULL;
+	dbm->count = 0;
+
+	mutex_unlock(dbm_mutex(dbm));
+}
+
+/**
+ * xe_guc_db_mgr_init() - Initialize GuC Doorbells Manager.
+ * @dbm: the &xe_guc_db_mgr to initialize
+ * @count: number of doorbells to manage
+ *
+ * The bare-metal or PF driver can pass ~0 as &count to indicate that all
+ * doorbells supported by the hardware are available for use.
+ *
+ * Only VF's drivers will have to provide explicit number of doorbells IDs
+ * that they can use.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_guc_db_mgr_init(struct xe_guc_db_mgr *dbm, unsigned int count)
+{
+	int ret;
+
+	if (count == ~0)
+		count = GUC_NUM_DOORBELLS;
+
+	dbm_assert(dbm, !dbm->bitmap);
+	dbm_assert(dbm, count <= GUC_NUM_DOORBELLS);
+
+	if (!count)
+		goto done;
+
+	dbm->bitmap = bitmap_zalloc(count, GFP_KERNEL);
+	if (!dbm->bitmap)
+		return -ENOMEM;
+	dbm->count = count;
+
+	ret = drmm_add_action_or_reset(&dbm_to_xe(dbm)->drm, __fini_dbm, dbm);
+	if (ret)
+		return ret;
+done:
+	xe_gt_dbg(dbm_to_gt(dbm), "using %u doorbell(s)\n", dbm->count);
+	return 0;
+}
+
+static int dbm_reserve_chunk_locked(struct xe_guc_db_mgr *dbm,
+				    unsigned int count, unsigned int spare)
+{
+	unsigned int used;
+	int index;
+
+	dbm_assert(dbm, count);
+	dbm_assert(dbm, count <= GUC_NUM_DOORBELLS);
+	dbm_assert(dbm, dbm->count <= GUC_NUM_DOORBELLS);
+	lockdep_assert_held(dbm_mutex(dbm));
+
+	if (!dbm->count)
+		return -ENODATA;
+
+	if (spare) {
+		used = bitmap_weight(dbm->bitmap, dbm->count);
+		if (used + count + spare > dbm->count)
+			return -EDQUOT;
+	}
+
+	index = bitmap_find_next_zero_area(dbm->bitmap, dbm->count, 0, count, 0);
+	if (index >= dbm->count)
+		return -ENOSPC;
+
+	bitmap_set(dbm->bitmap, index, count);
+
+	return index;
+}
+
+static void dbm_release_chunk_locked(struct xe_guc_db_mgr *dbm,
+				     unsigned int start, unsigned int count)
+{
+	dbm_assert(dbm, count);
+	dbm_assert(dbm, count <= GUC_NUM_DOORBELLS);
+	dbm_assert(dbm, dbm->count);
+	dbm_assert(dbm, dbm->count <= GUC_NUM_DOORBELLS);
+	lockdep_assert_held(dbm_mutex(dbm));
+
+	if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
+		unsigned int n;
+
+		for (n = 0; n < count; n++)
+			dbm_assert(dbm, test_bit(start + n, dbm->bitmap));
+	}
+	bitmap_clear(dbm->bitmap, start, count);
+}
+
+/**
+ * xe_guc_db_mgr_reserve_id_locked() - Reserve a single GuC Doorbell ID.
+ * @dbm: the &xe_guc_db_mgr
+ *
+ * This function expects that submission lock is already taken.
+ *
+ * Return: ID of the allocated GuC doorbell or a negative error code on failure.
+ */
+int xe_guc_db_mgr_reserve_id_locked(struct xe_guc_db_mgr *dbm)
+{
+	return dbm_reserve_chunk_locked(dbm, 1, 0);
+}
+
+/**
+ * xe_guc_db_mgr_release_id_locked() - Release a single GuC Doorbell ID.
+ * @dbm: the &xe_guc_db_mgr
+ * @id: the GuC Doorbell ID to release
+ *
+ * This function expects that submission lock is already taken.
+ */
+void xe_guc_db_mgr_release_id_locked(struct xe_guc_db_mgr *dbm, unsigned int id)
+{
+	return dbm_release_chunk_locked(dbm, id, 1);
+}
+
+/**
+ * xe_guc_db_mgr_reserve_range() - Reserve a range of GuC Doorbell IDs.
+ * @dbm: the &xe_guc_db_mgr
+ * @count: number of GuC doorbell IDs to reserve
+ * @spare: number of GuC doorbell IDs to keep available
+ *
+ * This function is dedicated for the for use by the PF which expects that
+ * allocated range for the VF will be contiguous and that there will be at
+ * least &spare IDs still available for the PF use after this reservation.
+ *
+ * Return: starting ID of the allocated GuC doorbell ID range or
+ *         a negative error code on failure.
+ */
+int xe_guc_db_mgr_reserve_range(struct xe_guc_db_mgr *dbm,
+				unsigned int count, unsigned int spare)
+{
+	int ret;
+
+	mutex_lock(dbm_mutex(dbm));
+	ret = dbm_reserve_chunk_locked(dbm, count, spare);
+	mutex_unlock(dbm_mutex(dbm));
+
+	return ret;
+}
+
+/**
+ * xe_guc_db_mgr_release_range() - Release a range of Doorbell IDs.
+ * @dbm: the &xe_guc_db_mgr
+ * @start: the starting ID of GuC doorbell ID range to release
+ * @count: number of GuC doorbell IDs to release
+ */
+void xe_guc_db_mgr_release_range(struct xe_guc_db_mgr *dbm,
+				 unsigned int start, unsigned int count)
+{
+	mutex_lock(dbm_mutex(dbm));
+	dbm_release_chunk_locked(dbm, start, count);
+	mutex_unlock(dbm_mutex(dbm));
+}
+
+static void dbm_print_locked(struct xe_guc_db_mgr *dbm, struct drm_printer *p, int indent)
+{
+	unsigned int rs, re;
+	unsigned int total;
+
+	drm_printf_indent(p, indent, "count: %u\n", dbm->count);
+	if (!dbm->bitmap)
+		return;
+
+	total = 0;
+	for_each_clear_bitrange(rs, re, dbm->bitmap, dbm->count) {
+		drm_printf_indent(p, indent, "available range: %u..%u (%u)\n",
+				  rs, re - 1, re - rs);
+		total += re - rs;
+	}
+	drm_printf_indent(p, indent, "available total: %u\n", total);
+
+	total = 0;
+	for_each_set_bitrange(rs, re, dbm->bitmap, dbm->count) {
+		drm_printf_indent(p, indent, "reserved range: %u..%u (%u)\n",
+				  rs, re - 1, re - rs);
+		total += re - rs;
+	}
+	drm_printf_indent(p, indent, "reserved total: %u\n", total);
+}
+
+/**
+ * xe_guc_db_mgr_print() - Print status of GuC Doorbells Manager.
+ * @dbm: the &xe_guc_db_mgr to print
+ * @p: the &drm_printer to print to
+ * @indent: tab indentation level
+ */
+void xe_guc_db_mgr_print(struct xe_guc_db_mgr *dbm,
+			 struct drm_printer *p, int indent)
+{
+	mutex_lock(dbm_mutex(dbm));
+	dbm_print_locked(dbm, p, indent);
+	mutex_unlock(dbm_mutex(dbm));
+}
+
+#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST)
+#include "tests/xe_guc_db_mgr_test.c"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_db_mgr.h b/drivers/gpu/drm/xe/xe_guc_db_mgr.h
new file mode 100644
index 0000000000..c250fa0ca9
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_db_mgr.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GUC_DB_MGR_H_
+#define _XE_GUC_DB_MGR_H_
+
+struct drm_printer;
+struct xe_guc_db_mgr;
+
+int xe_guc_db_mgr_init(struct xe_guc_db_mgr *dbm, unsigned int count);
+
+int xe_guc_db_mgr_reserve_id_locked(struct xe_guc_db_mgr *dbm);
+void xe_guc_db_mgr_release_id_locked(struct xe_guc_db_mgr *dbm, unsigned int id);
+
+int xe_guc_db_mgr_reserve_range(struct xe_guc_db_mgr *dbm, unsigned int count, unsigned int spare);
+void xe_guc_db_mgr_release_range(struct xe_guc_db_mgr *dbm, unsigned int start, unsigned int count);
+
+void xe_guc_db_mgr_print(struct xe_guc_db_mgr *dbm, struct drm_printer *p, int indent);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c
index ffd7d53bcc..d3822cbea2 100644
--- a/drivers/gpu/drm/xe/xe_guc_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c
@@ -14,6 +14,7 @@
 #include "xe_guc_ct.h"
 #include "xe_guc_log.h"
 #include "xe_macros.h"
+#include "xe_pm.h"
 
 static struct xe_guc *node_to_guc(struct drm_info_node *node)
 {
@@ -26,9 +27,9 @@ static int guc_info(struct seq_file *m, void *data)
 	struct xe_device *xe = guc_to_xe(guc);
 	struct drm_printer p = drm_seq_file_printer(m);
 
-	xe_device_mem_access_get(xe);
+	xe_pm_runtime_get(xe);
 	xe_guc_print_info(guc, &p);
-	xe_device_mem_access_put(xe);
+	xe_pm_runtime_put(xe);
 
 	return 0;
 }
@@ -39,9 +40,9 @@ static int guc_log(struct seq_file *m, void *data)
 	struct xe_device *xe = guc_to_xe(guc);
 	struct drm_printer p = drm_seq_file_printer(m);
 
-	xe_device_mem_access_get(xe);
+	xe_pm_runtime_get(xe);
 	xe_guc_log_print(&guc->log, &p);
-	xe_device_mem_access_put(xe);
+	xe_pm_runtime_put(xe);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h
index 4dd5a88a78..19ee71aeaf 100644
--- a/drivers/gpu/drm/xe/xe_guc_fwif.h
+++ b/drivers/gpu/drm/xe/xe_guc_fwif.h
@@ -14,6 +14,8 @@
 #define G2H_LEN_DW_DEREGISTER_CONTEXT		3
 #define G2H_LEN_DW_TLB_INVALIDATE		3
 
+#define GUC_ID_MAX			65535
+
 #define GUC_CONTEXT_DISABLE		0
 #define GUC_CONTEXT_ENABLE		1
 
@@ -97,6 +99,7 @@ struct guc_update_exec_queue_policy {
 #define   GUC_WA_POLLCS			BIT(18)
 #define   GUC_WA_RENDER_RST_RC6_EXIT	BIT(19)
 #define   GUC_WA_RCS_REGS_IN_CCS_REGS_LIST	BIT(21)
+#define   GUC_WA_ENABLE_TSC_CHECK_ON_RC6	BIT(22)
 
 #define GUC_CTL_FEATURE			2
 #define   GUC_CTL_ENABLE_SLPC		BIT(2)
@@ -206,7 +209,10 @@ struct guc_ads {
 	u32 capture_instance[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES];
 	u32 capture_class[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES];
 	u32 capture_global[GUC_CAPTURE_LIST_INDEX_MAX];
-	u32 reserved[14];
+	u32 wa_klv_addr_lo;
+	u32 wa_klv_addr_hi;
+	u32 wa_klv_size;
+	u32 reserved[11];
 } __packed;
 
 /* Engine usage stats */
diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c b/drivers/gpu/drm/xe/xe_guc_hwconfig.c
index 2a13a00917..d9b570a154 100644
--- a/drivers/gpu/drm/xe/xe_guc_hwconfig.c
+++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c
@@ -14,7 +14,7 @@
 #include "xe_guc.h"
 #include "xe_map.h"
 
-static int send_get_hwconfig(struct xe_guc *guc, u32 ggtt_addr, u32 size)
+static int send_get_hwconfig(struct xe_guc *guc, u64 ggtt_addr, u32 size)
 {
 	u32 action[] = {
 		XE_GUC_ACTION_GET_HWCONFIG,
@@ -78,8 +78,9 @@ int xe_guc_hwconfig_init(struct xe_guc *guc)
 		return -EINVAL;
 
 	bo = xe_managed_bo_create_pin_map(xe, tile, PAGE_ALIGN(size),
-					  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
-					  XE_BO_CREATE_GGTT_BIT);
+					  XE_BO_FLAG_SYSTEM |
+					  XE_BO_FLAG_GGTT |
+					  XE_BO_FLAG_GGTT_INVALIDATE);
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
 	guc->hwconfig.bo = bo;
diff --git a/drivers/gpu/drm/xe/xe_guc_hxg_helpers.h b/drivers/gpu/drm/xe/xe_guc_hxg_helpers.h
new file mode 100644
index 0000000000..aeeb573c68
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_hxg_helpers.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GUC_HXG_HELPERS_H_
+#define _XE_GUC_HXG_HELPERS_H_
+
+#include <linux/bitfield.h>
+#include <linux/types.h>
+
+#include "abi/guc_messages_abi.h"
+
+/**
+ * hxg_sizeof - Queries size of the object or type (in HXG units).
+ * @T: the object or type
+ *
+ * Force a compilation error if actual size is not aligned to HXG unit (u32).
+ *
+ * Return: size in dwords (u32).
+ */
+#define hxg_sizeof(T)	(sizeof(T) / sizeof(u32) + BUILD_BUG_ON_ZERO(sizeof(T) % sizeof(u32)))
+
+static inline const char *guc_hxg_type_to_string(unsigned int type)
+{
+	switch (type) {
+	case GUC_HXG_TYPE_REQUEST:
+		return "request";
+	case GUC_HXG_TYPE_FAST_REQUEST:
+		return "fast-request";
+	case GUC_HXG_TYPE_EVENT:
+		return "event";
+	case GUC_HXG_TYPE_NO_RESPONSE_BUSY:
+		return "busy";
+	case GUC_HXG_TYPE_NO_RESPONSE_RETRY:
+		return "retry";
+	case GUC_HXG_TYPE_RESPONSE_FAILURE:
+		return "failure";
+	case GUC_HXG_TYPE_RESPONSE_SUCCESS:
+		return "response";
+	default:
+		return "<invalid>";
+	}
+}
+
+static inline bool guc_hxg_type_is_action(unsigned int type)
+{
+	switch (type) {
+	case GUC_HXG_TYPE_REQUEST:
+	case GUC_HXG_TYPE_FAST_REQUEST:
+	case GUC_HXG_TYPE_EVENT:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static inline bool guc_hxg_type_is_reply(unsigned int type)
+{
+	switch (type) {
+	case GUC_HXG_TYPE_NO_RESPONSE_BUSY:
+	case GUC_HXG_TYPE_NO_RESPONSE_RETRY:
+	case GUC_HXG_TYPE_RESPONSE_FAILURE:
+	case GUC_HXG_TYPE_RESPONSE_SUCCESS:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static inline u32 guc_hxg_msg_encode_success(u32 *msg, u32 data0)
+{
+	msg[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		 FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_SUCCESS) |
+		 FIELD_PREP(GUC_HXG_RESPONSE_MSG_0_DATA0, data0);
+
+	return GUC_HXG_RESPONSE_MSG_MIN_LEN;
+}
+
+static inline u32 guc_hxg_msg_encode_failure(u32 *msg, u32 error, u32 hint)
+{
+	msg[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		 FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_FAILURE) |
+		 FIELD_PREP(GUC_HXG_FAILURE_MSG_0_HINT, hint) |
+		 FIELD_PREP(GUC_HXG_FAILURE_MSG_0_ERROR, error);
+
+	return GUC_HXG_FAILURE_MSG_LEN;
+}
+
+static inline u32 guc_hxg_msg_encode_busy(u32 *msg, u32 counter)
+{
+	msg[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		 FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_NO_RESPONSE_BUSY) |
+		 FIELD_PREP(GUC_HXG_BUSY_MSG_0_COUNTER, counter);
+
+	return GUC_HXG_BUSY_MSG_LEN;
+}
+
+static inline u32 guc_hxg_msg_encode_retry(u32 *msg, u32 reason)
+{
+	msg[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		 FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_NO_RESPONSE_RETRY) |
+		 FIELD_PREP(GUC_HXG_RETRY_MSG_0_REASON, reason);
+
+	return GUC_HXG_RETRY_MSG_LEN;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_id_mgr.c b/drivers/gpu/drm/xe/xe_guc_id_mgr.c
new file mode 100644
index 0000000000..0fb7c6b78c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_id_mgr.c
@@ -0,0 +1,279 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include <linux/bitmap.h>
+#include <linux/mutex.h>
+
+#include <drm/drm_managed.h>
+
+#include "xe_assert.h"
+#include "xe_gt_printk.h"
+#include "xe_guc.h"
+#include "xe_guc_id_mgr.h"
+#include "xe_guc_types.h"
+
+static struct xe_guc *idm_to_guc(struct xe_guc_id_mgr *idm)
+{
+	return container_of(idm, struct xe_guc, submission_state.idm);
+}
+
+static struct xe_gt *idm_to_gt(struct xe_guc_id_mgr *idm)
+{
+	return guc_to_gt(idm_to_guc(idm));
+}
+
+static struct xe_device *idm_to_xe(struct xe_guc_id_mgr *idm)
+{
+	return gt_to_xe(idm_to_gt(idm));
+}
+
+#define idm_assert(idm, cond)		xe_gt_assert(idm_to_gt(idm), cond)
+#define idm_mutex(idm)			(&idm_to_guc(idm)->submission_state.lock)
+
+static void idm_print_locked(struct xe_guc_id_mgr *idm, struct drm_printer *p, int indent);
+
+static void __fini_idm(struct drm_device *drm, void *arg)
+{
+	struct xe_guc_id_mgr *idm = arg;
+
+	mutex_lock(idm_mutex(idm));
+
+	if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
+		unsigned int weight = bitmap_weight(idm->bitmap, idm->total);
+
+		if (weight) {
+			struct drm_printer p = xe_gt_info_printer(idm_to_gt(idm));
+
+			xe_gt_err(idm_to_gt(idm), "GUC ID manager unclean (%u/%u)\n",
+				  weight, idm->total);
+			idm_print_locked(idm, &p, 1);
+		}
+	}
+
+	bitmap_free(idm->bitmap);
+	idm->bitmap = NULL;
+	idm->total = 0;
+	idm->used = 0;
+
+	mutex_unlock(idm_mutex(idm));
+}
+
+/**
+ * xe_guc_id_mgr_init() - Initialize GuC context ID Manager.
+ * @idm: the &xe_guc_id_mgr to initialize
+ * @limit: number of IDs to manage
+ *
+ * The bare-metal or PF driver can pass ~0 as &limit to indicate that all
+ * context IDs supported by the GuC firmware are available for use.
+ *
+ * Only VF drivers will have to provide explicit number of context IDs
+ * that they can use.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_guc_id_mgr_init(struct xe_guc_id_mgr *idm, unsigned int limit)
+{
+	int ret;
+
+	idm_assert(idm, !idm->bitmap);
+	idm_assert(idm, !idm->total);
+	idm_assert(idm, !idm->used);
+
+	if (limit == ~0)
+		limit = GUC_ID_MAX;
+	else if (limit > GUC_ID_MAX)
+		return -ERANGE;
+	else if (!limit)
+		return -EINVAL;
+
+	idm->bitmap = bitmap_zalloc(limit, GFP_KERNEL);
+	if (!idm->bitmap)
+		return -ENOMEM;
+	idm->total = limit;
+
+	ret = drmm_add_action_or_reset(&idm_to_xe(idm)->drm, __fini_idm, idm);
+	if (ret)
+		return ret;
+
+	xe_gt_info(idm_to_gt(idm), "using %u GUC ID(s)\n", idm->total);
+	return 0;
+}
+
+static unsigned int find_last_zero_area(unsigned long *bitmap,
+					unsigned int total,
+					unsigned int count)
+{
+	unsigned int found = total;
+	unsigned int rs, re, range;
+
+	for_each_clear_bitrange(rs, re, bitmap, total) {
+		range = re - rs;
+		if (range < count)
+			continue;
+		found = rs + (range - count);
+	}
+	return found;
+}
+
+static int idm_reserve_chunk_locked(struct xe_guc_id_mgr *idm,
+				    unsigned int count, unsigned int retain)
+{
+	int id;
+
+	idm_assert(idm, count);
+	lockdep_assert_held(idm_mutex(idm));
+
+	if (!idm->total)
+		return -ENODATA;
+
+	if (retain) {
+		/*
+		 * For IDs reservations (used on PF for VFs) we want to make
+		 * sure there will be at least 'retain' available for the PF
+		 */
+		if (idm->used + count + retain > idm->total)
+			return -EDQUOT;
+		/*
+		 * ... and we want to reserve highest IDs close to the end.
+		 */
+		id = find_last_zero_area(idm->bitmap, idm->total, count);
+	} else {
+		/*
+		 * For regular IDs reservations (used by submission code)
+		 * we start searching from the lower range of IDs.
+		 */
+		id = bitmap_find_next_zero_area(idm->bitmap, idm->total, 0, count, 0);
+	}
+	if (id >= idm->total)
+		return -ENOSPC;
+
+	bitmap_set(idm->bitmap, id, count);
+	idm->used += count;
+
+	return id;
+}
+
+static void idm_release_chunk_locked(struct xe_guc_id_mgr *idm,
+				     unsigned int start, unsigned int count)
+{
+	idm_assert(idm, count);
+	idm_assert(idm, count <= idm->used);
+	idm_assert(idm, start < idm->total);
+	idm_assert(idm, start + count - 1 < idm->total);
+	lockdep_assert_held(idm_mutex(idm));
+
+	if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
+		unsigned int n;
+
+		for (n = 0; n < count; n++)
+			idm_assert(idm, test_bit(start + n, idm->bitmap));
+	}
+	bitmap_clear(idm->bitmap, start, count);
+	idm->used -= count;
+}
+
+/**
+ * xe_guc_id_mgr_reserve_locked() - Reserve one or more GuC context IDs.
+ * @idm: the &xe_guc_id_mgr
+ * @count: number of IDs to allocate (can't be 0)
+ *
+ * This function is dedicated for the use by the GuC submission code,
+ * where submission lock is already taken.
+ *
+ * Return: ID of allocated GuC context or a negative error code on failure.
+ */
+int xe_guc_id_mgr_reserve_locked(struct xe_guc_id_mgr *idm, unsigned int count)
+{
+	return idm_reserve_chunk_locked(idm, count, 0);
+}
+
+/**
+ * xe_guc_id_mgr_release_locked() - Release one or more GuC context IDs.
+ * @idm: the &xe_guc_id_mgr
+ * @id: the GuC context ID to release
+ * @count: number of IDs to release (can't be 0)
+ *
+ * This function is dedicated for the use by the GuC submission code,
+ * where submission lock is already taken.
+ */
+void xe_guc_id_mgr_release_locked(struct xe_guc_id_mgr *idm, unsigned int id,
+				  unsigned int count)
+{
+	return idm_release_chunk_locked(idm, id, count);
+}
+
+/**
+ * xe_guc_id_mgr_reserve() - Reserve a range of GuC context IDs.
+ * @idm: the &xe_guc_id_mgr
+ * @count: number of GuC context IDs to reserve (can't be 0)
+ * @retain: number of GuC context IDs to keep available (can't be 0)
+ *
+ * This function is dedicated for the use by the PF driver which expects that
+ * reserved range of IDs will be contiguous and that there will be at least
+ * &retain IDs still available for the PF after this reservation.
+ *
+ * Return: starting ID of the allocated GuC context ID range or
+ *         a negative error code on failure.
+ */
+int xe_guc_id_mgr_reserve(struct xe_guc_id_mgr *idm,
+			  unsigned int count, unsigned int retain)
+{
+	int ret;
+
+	idm_assert(idm, count);
+	idm_assert(idm, retain);
+
+	mutex_lock(idm_mutex(idm));
+	ret = idm_reserve_chunk_locked(idm, count, retain);
+	mutex_unlock(idm_mutex(idm));
+
+	return ret;
+}
+
+/**
+ * xe_guc_id_mgr_release() - Release a range of GuC context IDs.
+ * @idm: the &xe_guc_id_mgr
+ * @start: the starting ID of GuC context range to release
+ * @count: number of GuC context IDs to release
+ */
+void xe_guc_id_mgr_release(struct xe_guc_id_mgr *idm,
+			   unsigned int start, unsigned int count)
+{
+	mutex_lock(idm_mutex(idm));
+	idm_release_chunk_locked(idm, start, count);
+	mutex_unlock(idm_mutex(idm));
+}
+
+static void idm_print_locked(struct xe_guc_id_mgr *idm, struct drm_printer *p, int indent)
+{
+	unsigned int rs, re;
+
+	lockdep_assert_held(idm_mutex(idm));
+
+	drm_printf_indent(p, indent, "total %u\n", idm->total);
+	if (!idm->bitmap)
+		return;
+
+	drm_printf_indent(p, indent, "used %u\n", idm->used);
+	for_each_set_bitrange(rs, re, idm->bitmap, idm->total)
+		drm_printf_indent(p, indent, "range %u..%u (%u)\n", rs, re - 1, re - rs);
+}
+
+/**
+ * xe_guc_id_mgr_print() - Print status of GuC ID Manager.
+ * @idm: the &xe_guc_id_mgr to print
+ * @p: the &drm_printer to print to
+ * @indent: tab indentation level
+ */
+void xe_guc_id_mgr_print(struct xe_guc_id_mgr *idm, struct drm_printer *p, int indent)
+{
+	mutex_lock(idm_mutex(idm));
+	idm_print_locked(idm, p, indent);
+	mutex_unlock(idm_mutex(idm));
+}
+
+#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST)
+#include "tests/xe_guc_id_mgr_test.c"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_id_mgr.h b/drivers/gpu/drm/xe/xe_guc_id_mgr.h
new file mode 100644
index 0000000000..368f8c80e4
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_id_mgr.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef _XE_GUC_ID_MGR_H_
+#define _XE_GUC_ID_MGR_H_
+
+struct drm_printer;
+struct xe_guc_id_mgr;
+
+int xe_guc_id_mgr_init(struct xe_guc_id_mgr *idm, unsigned int count);
+
+int xe_guc_id_mgr_reserve_locked(struct xe_guc_id_mgr *idm, unsigned int count);
+void xe_guc_id_mgr_release_locked(struct xe_guc_id_mgr *idm, unsigned int id, unsigned int count);
+
+int xe_guc_id_mgr_reserve(struct xe_guc_id_mgr *idm, unsigned int count, unsigned int retain);
+void xe_guc_id_mgr_release(struct xe_guc_id_mgr *idm, unsigned int start, unsigned int count);
+
+void xe_guc_id_mgr_print(struct xe_guc_id_mgr *idm, struct drm_printer *p, int indent);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_klv_helpers.c b/drivers/gpu/drm/xe/xe_guc_klv_helpers.c
new file mode 100644
index 0000000000..ceca949932
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_klv_helpers.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include <linux/bitfield.h>
+#include <drm/drm_print.h>
+
+#include "abi/guc_klvs_abi.h"
+#include "xe_guc_klv_helpers.h"
+
+#define make_u64(hi, lo) ((u64)((u64)(u32)(hi) << 32 | (u32)(lo)))
+
+/**
+ * xe_guc_klv_key_to_string - Convert KLV key into friendly name.
+ * @key: the `GuC KLV`_ key
+ *
+ * Return: name of the KLV key.
+ */
+const char *xe_guc_klv_key_to_string(u16 key)
+{
+	switch (key) {
+	/* VGT POLICY keys */
+	case GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_KEY:
+		return "sched_if_idle";
+	case GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD_KEY:
+		return "sample_period";
+	case GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH_KEY:
+		return "reset_engine";
+	/* VF CFG keys */
+	case GUC_KLV_VF_CFG_GGTT_START_KEY:
+		return "ggtt_start";
+	case GUC_KLV_VF_CFG_GGTT_SIZE_KEY:
+		return "ggtt_size";
+	case GUC_KLV_VF_CFG_LMEM_SIZE_KEY:
+		return "lmem_size";
+	case GUC_KLV_VF_CFG_NUM_CONTEXTS_KEY:
+		return "num_contexts";
+	case GUC_KLV_VF_CFG_TILE_MASK_KEY:
+		return "tile_mask";
+	case GUC_KLV_VF_CFG_NUM_DOORBELLS_KEY:
+		return "num_doorbells";
+	case GUC_KLV_VF_CFG_EXEC_QUANTUM_KEY:
+		return "exec_quantum";
+	case GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY:
+		return "preempt_timeout";
+	case GUC_KLV_VF_CFG_BEGIN_DOORBELL_ID_KEY:
+		return "begin_db_id";
+	case GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_KEY:
+		return "begin_ctx_id";
+	default:
+		return "(unknown)";
+	}
+}
+
+/**
+ * xe_guc_klv_print - Print content of the buffer with `GuC KLV`_.
+ * @klvs: the buffer with KLVs
+ * @num_dwords: number of dwords (u32) available in the buffer
+ * @p: the &drm_printer
+ *
+ * The buffer may contain more than one KLV.
+ */
+void xe_guc_klv_print(const u32 *klvs, u32 num_dwords, struct drm_printer *p)
+{
+	while (num_dwords >= GUC_KLV_LEN_MIN) {
+		u32 key = FIELD_GET(GUC_KLV_0_KEY, klvs[0]);
+		u32 len = FIELD_GET(GUC_KLV_0_LEN, klvs[0]);
+
+		klvs += GUC_KLV_LEN_MIN;
+		num_dwords -= GUC_KLV_LEN_MIN;
+
+		if (num_dwords < len) {
+			drm_printf(p, "{ key %#06x : truncated %zu of %zu bytes %*ph } # %s\n",
+				   key, num_dwords * sizeof(u32), len * sizeof(u32),
+				   (int)(num_dwords * sizeof(u32)), klvs,
+				   xe_guc_klv_key_to_string(key));
+			return;
+		}
+
+		switch (len) {
+		case 0:
+			drm_printf(p, "{ key %#06x : no value } # %s\n",
+				   key, xe_guc_klv_key_to_string(key));
+			break;
+		case 1:
+			drm_printf(p, "{ key %#06x : 32b value %u } # %s\n",
+				   key, klvs[0], xe_guc_klv_key_to_string(key));
+			break;
+		case 2:
+			drm_printf(p, "{ key %#06x : 64b value %#llx } # %s\n",
+				   key, make_u64(klvs[1], klvs[0]),
+				   xe_guc_klv_key_to_string(key));
+			break;
+		default:
+			drm_printf(p, "{ key %#06x : %zu bytes %*ph } # %s\n",
+				   key, len * sizeof(u32), (int)(len * sizeof(u32)),
+				   klvs, xe_guc_klv_key_to_string(key));
+			break;
+		}
+
+		klvs += len;
+		num_dwords -= len;
+	}
+
+	/* we don't expect any leftovers, fix if KLV header is ever changed */
+	BUILD_BUG_ON(GUC_KLV_LEN_MIN > 1);
+}
+
+/**
+ * xe_guc_klv_count - Count KLVs present in the buffer.
+ * @klvs: the buffer with KLVs
+ * @num_dwords: number of dwords (u32) in the buffer
+ *
+ * Return: number of recognized KLVs or
+ *         a negative error code if KLV buffer is truncated.
+ */
+int xe_guc_klv_count(const u32 *klvs, u32 num_dwords)
+{
+	int num_klvs = 0;
+
+	while (num_dwords >= GUC_KLV_LEN_MIN) {
+		u32 len = FIELD_GET(GUC_KLV_0_LEN, klvs[0]);
+
+		if (num_dwords < len + GUC_KLV_LEN_MIN)
+			break;
+
+		klvs += GUC_KLV_LEN_MIN + len;
+		num_dwords -= GUC_KLV_LEN_MIN + len;
+		num_klvs++;
+	}
+
+	return num_dwords ? -ENODATA : num_klvs;
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_klv_helpers.h b/drivers/gpu/drm/xe/xe_guc_klv_helpers.h
new file mode 100644
index 0000000000..b835e0ebe6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_klv_helpers.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef _XE_GUC_KLV_HELPERS_H_
+#define _XE_GUC_KLV_HELPERS_H_
+
+#include <linux/types.h>
+
+struct drm_printer;
+
+const char *xe_guc_klv_key_to_string(u16 key);
+
+void xe_guc_klv_print(const u32 *klvs, u32 num_dwords, struct drm_printer *p);
+int xe_guc_klv_count(const u32 *klvs, u32 num_dwords);
+
+/**
+ * PREP_GUC_KLV - Prepare KLV header value based on provided key and len.
+ * @key: KLV key
+ * @len: KLV length
+ *
+ * Return: value of the KLV header (u32).
+ */
+#define PREP_GUC_KLV(key, len) \
+	(FIELD_PREP(GUC_KLV_0_KEY, (key)) | \
+	 FIELD_PREP(GUC_KLV_0_LEN, (len)))
+
+/**
+ * PREP_GUC_KLV_CONST - Prepare KLV header value based on const key and len.
+ * @key: const KLV key
+ * @len: const KLV length
+ *
+ * Return: value of the KLV header (u32).
+ */
+#define PREP_GUC_KLV_CONST(key, len) \
+	(FIELD_PREP_CONST(GUC_KLV_0_KEY, (key)) | \
+	 FIELD_PREP_CONST(GUC_KLV_0_LEN, (len)))
+
+/**
+ * PREP_GUC_KLV_TAG - Prepare KLV header value based on unique KLV definition tag.
+ * @TAG: unique tag of the KLV definition
+ *
+ * Combine separate KEY and LEN definitions of the KLV identified by the TAG.
+ *
+ * Return: value of the KLV header (u32).
+ */
+#define PREP_GUC_KLV_TAG(TAG) \
+	PREP_GUC_KLV_CONST(GUC_KLV_##TAG##_KEY, GUC_KLV_##TAG##_LEN)
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c
index bcd2f4d340..a37ee34194 100644
--- a/drivers/gpu/drm/xe/xe_guc_log.c
+++ b/drivers/gpu/drm/xe/xe_guc_log.c
@@ -84,8 +84,9 @@ int xe_guc_log_init(struct xe_guc_log *log)
 	struct xe_bo *bo;
 
 	bo = xe_managed_bo_create_pin_map(xe, tile, guc_log_size(),
-					  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
-					  XE_BO_CREATE_GGTT_BIT);
+					  XE_BO_FLAG_SYSTEM |
+					  XE_BO_FLAG_GGTT |
+					  XE_BO_FLAG_GGTT_INVALIDATE);
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
 
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index d917025925..23382ced4e 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -145,25 +145,6 @@ static int pc_action_reset(struct xe_guc_pc *pc)
 	return ret;
 }
 
-static int pc_action_shutdown(struct xe_guc_pc *pc)
-{
-	struct  xe_guc_ct *ct = &pc_to_guc(pc)->ct;
-	int ret;
-	u32 action[] = {
-		GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
-		SLPC_EVENT(SLPC_EVENT_SHUTDOWN, 2),
-		xe_bo_ggtt_addr(pc->bo),
-		0,
-	};
-
-	ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
-	if (ret)
-		drm_err(&pc_to_xe(pc)->drm, "GuC PC shutdown %pe",
-			ERR_PTR(ret));
-
-	return ret;
-}
-
 static int pc_action_query_task_state(struct xe_guc_pc *pc)
 {
 	struct xe_guc_ct *ct = &pc_to_guc(pc)->ct;
@@ -381,8 +362,6 @@ u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc)
 	struct xe_device *xe = gt_to_xe(gt);
 	u32 freq;
 
-	xe_device_mem_access_get(gt_to_xe(gt));
-
 	/* When in RC6, actual frequency reported will be 0. */
 	if (GRAPHICS_VERx100(xe) >= 1270) {
 		freq = xe_mmio_read32(gt, MTL_MIRROR_TARGET_WP1);
@@ -394,8 +373,6 @@ u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc)
 
 	freq = decode_freq(freq);
 
-	xe_device_mem_access_put(gt_to_xe(gt));
-
 	return freq;
 }
 
@@ -412,14 +389,13 @@ int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq)
 	struct xe_gt *gt = pc_to_gt(pc);
 	int ret;
 
-	xe_device_mem_access_get(gt_to_xe(gt));
 	/*
 	 * GuC SLPC plays with cur freq request when GuCRC is enabled
 	 * Block RC6 for a more reliable read.
 	 */
 	ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
 	if (ret)
-		goto out;
+		return ret;
 
 	*freq = xe_mmio_read32(gt, RPNSWREQ);
 
@@ -427,9 +403,7 @@ int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq)
 	*freq = decode_freq(*freq);
 
 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
-out:
-	xe_device_mem_access_put(gt_to_xe(gt));
-	return ret;
+	return 0;
 }
 
 /**
@@ -451,12 +425,7 @@ u32 xe_guc_pc_get_rp0_freq(struct xe_guc_pc *pc)
  */
 u32 xe_guc_pc_get_rpe_freq(struct xe_guc_pc *pc)
 {
-	struct xe_gt *gt = pc_to_gt(pc);
-	struct xe_device *xe = gt_to_xe(gt);
-
-	xe_device_mem_access_get(xe);
 	pc_update_rp_values(pc);
-	xe_device_mem_access_put(xe);
 
 	return pc->rpe_freq;
 }
@@ -485,7 +454,6 @@ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq)
 	struct xe_gt *gt = pc_to_gt(pc);
 	int ret;
 
-	xe_device_mem_access_get(pc_to_xe(pc));
 	mutex_lock(&pc->freq_lock);
 	if (!pc->freq_ready) {
 		/* Might be in the middle of a gt reset */
@@ -511,7 +479,6 @@ fw:
 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
 out:
 	mutex_unlock(&pc->freq_lock);
-	xe_device_mem_access_put(pc_to_xe(pc));
 	return ret;
 }
 
@@ -528,7 +495,6 @@ int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq)
 {
 	int ret;
 
-	xe_device_mem_access_get(pc_to_xe(pc));
 	mutex_lock(&pc->freq_lock);
 	if (!pc->freq_ready) {
 		/* Might be in the middle of a gt reset */
@@ -544,8 +510,6 @@ int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq)
 
 out:
 	mutex_unlock(&pc->freq_lock);
-	xe_device_mem_access_put(pc_to_xe(pc));
-
 	return ret;
 }
 
@@ -561,7 +525,6 @@ int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq)
 {
 	int ret;
 
-	xe_device_mem_access_get(pc_to_xe(pc));
 	mutex_lock(&pc->freq_lock);
 	if (!pc->freq_ready) {
 		/* Might be in the middle of a gt reset */
@@ -577,7 +540,6 @@ int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq)
 
 out:
 	mutex_unlock(&pc->freq_lock);
-	xe_device_mem_access_put(pc_to_xe(pc));
 	return ret;
 }
 
@@ -594,7 +556,6 @@ int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq)
 {
 	int ret;
 
-	xe_device_mem_access_get(pc_to_xe(pc));
 	mutex_lock(&pc->freq_lock);
 	if (!pc->freq_ready) {
 		/* Might be in the middle of a gt reset */
@@ -610,7 +571,6 @@ int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq)
 
 out:
 	mutex_unlock(&pc->freq_lock);
-	xe_device_mem_access_put(pc_to_xe(pc));
 	return ret;
 }
 
@@ -623,8 +583,6 @@ enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc)
 	struct xe_gt *gt = pc_to_gt(pc);
 	u32 reg, gt_c_state;
 
-	xe_device_mem_access_get(gt_to_xe(gt));
-
 	if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) {
 		reg = xe_mmio_read32(gt, MTL_MIRROR_TARGET_WP1);
 		gt_c_state = REG_FIELD_GET(MTL_CC_MASK, reg);
@@ -633,8 +591,6 @@ enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc)
 		gt_c_state = REG_FIELD_GET(RCN_MASK, reg);
 	}
 
-	xe_device_mem_access_put(gt_to_xe(gt));
-
 	switch (gt_c_state) {
 	case GT_C6:
 		return GT_IDLE_C6;
@@ -654,9 +610,7 @@ u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc)
 	struct xe_gt *gt = pc_to_gt(pc);
 	u32 reg;
 
-	xe_device_mem_access_get(gt_to_xe(gt));
 	reg = xe_mmio_read32(gt, GT_GFX_RC6);
-	xe_device_mem_access_put(gt_to_xe(gt));
 
 	return reg;
 }
@@ -670,9 +624,7 @@ u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc)
 	struct xe_gt *gt = pc_to_gt(pc);
 	u64 reg;
 
-	xe_device_mem_access_get(gt_to_xe(gt));
 	reg = xe_mmio_read32(gt, MTL_MEDIA_MC6);
-	xe_device_mem_access_put(gt_to_xe(gt));
 
 	return reg;
 }
@@ -743,24 +695,28 @@ static int pc_adjust_freq_bounds(struct xe_guc_pc *pc)
 
 	ret = pc_action_query_task_state(pc);
 	if (ret)
-		return ret;
+		goto out;
 
 	/*
 	 * GuC defaults to some RPmax that is not actually achievable without
 	 * overclocking. Let's adjust it to the Hardware RP0, which is the
 	 * regular maximum
 	 */
-	if (pc_get_max_freq(pc) > pc->rp0_freq)
-		pc_set_max_freq(pc, pc->rp0_freq);
+	if (pc_get_max_freq(pc) > pc->rp0_freq) {
+		ret = pc_set_max_freq(pc, pc->rp0_freq);
+		if (ret)
+			goto out;
+	}
 
 	/*
 	 * Same thing happens for Server platforms where min is listed as
 	 * RPMax
 	 */
 	if (pc_get_min_freq(pc) > pc->rp0_freq)
-		pc_set_min_freq(pc, pc->rp0_freq);
+		ret = pc_set_min_freq(pc, pc->rp0_freq);
 
-	return 0;
+out:
+	return ret;
 }
 
 static int pc_adjust_requested_freq(struct xe_guc_pc *pc)
@@ -801,23 +757,19 @@ int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc)
 	if (xe->info.skip_guc_pc)
 		return 0;
 
-	xe_device_mem_access_get(pc_to_xe(pc));
-
 	ret = pc_action_setup_gucrc(pc, XE_GUCRC_HOST_CONTROL);
 	if (ret)
-		goto out;
+		return ret;
 
 	ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
 	if (ret)
-		goto out;
+		return ret;
 
 	xe_gt_idle_disable_c6(gt);
 
 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
 
-out:
-	xe_device_mem_access_put(pc_to_xe(pc));
-	return ret;
+	return 0;
 }
 
 static void pc_init_pcode_freq(struct xe_guc_pc *pc)
@@ -870,11 +822,9 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
 
 	xe_gt_assert(gt, xe_device_uc_enabled(xe));
 
-	xe_device_mem_access_get(pc_to_xe(pc));
-
 	ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
 	if (ret)
-		goto out_fail_force_wake;
+		return ret;
 
 	if (xe->info.skip_guc_pc) {
 		if (xe->info.platform != XE_PVC)
@@ -914,8 +864,6 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
 
 out:
 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
-out_fail_force_wake:
-	xe_device_mem_access_put(pc_to_xe(pc));
 	return ret;
 }
 
@@ -926,52 +874,32 @@ out_fail_force_wake:
 int xe_guc_pc_stop(struct xe_guc_pc *pc)
 {
 	struct xe_device *xe = pc_to_xe(pc);
-	int ret;
-
-	xe_device_mem_access_get(pc_to_xe(pc));
 
 	if (xe->info.skip_guc_pc) {
 		xe_gt_idle_disable_c6(pc_to_gt(pc));
-		ret = 0;
-		goto out;
+		return 0;
 	}
 
 	mutex_lock(&pc->freq_lock);
 	pc->freq_ready = false;
 	mutex_unlock(&pc->freq_lock);
 
-	ret = pc_action_shutdown(pc);
-	if (ret)
-		goto out;
-
-	if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_NOT_RUNNING)) {
-		drm_err(&pc_to_xe(pc)->drm, "GuC PC Shutdown failed\n");
-		ret = -EIO;
-	}
-
-out:
-	xe_device_mem_access_put(pc_to_xe(pc));
-	return ret;
+	return 0;
 }
 
 /**
  * xe_guc_pc_fini - Finalize GuC's Power Conservation component
- * @pc: Xe_GuC_PC instance
+ * @drm: DRM device
+ * @arg: opaque pointer that should point to Xe_GuC_PC instance
  */
-void xe_guc_pc_fini(struct xe_guc_pc *pc)
+static void xe_guc_pc_fini(struct drm_device *drm, void *arg)
 {
-	struct xe_device *xe = pc_to_xe(pc);
-
-	if (xe->info.skip_guc_pc) {
-		xe_device_mem_access_get(xe);
-		xe_gt_idle_disable_c6(pc_to_gt(pc));
-		xe_device_mem_access_put(xe);
-		return;
-	}
+	struct xe_guc_pc *pc = arg;
 
+	XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL));
 	XE_WARN_ON(xe_guc_pc_gucrc_disable(pc));
 	XE_WARN_ON(xe_guc_pc_stop(pc));
-	mutex_destroy(&pc->freq_lock);
+	xe_force_wake_put(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL);
 }
 
 /**
@@ -985,18 +913,23 @@ int xe_guc_pc_init(struct xe_guc_pc *pc)
 	struct xe_device *xe = gt_to_xe(gt);
 	struct xe_bo *bo;
 	u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data));
+	int err;
 
 	if (xe->info.skip_guc_pc)
 		return 0;
 
-	mutex_init(&pc->freq_lock);
+	err = drmm_mutex_init(&xe->drm, &pc->freq_lock);
+	if (err)
+		return err;
 
 	bo = xe_managed_bo_create_pin_map(xe, tile, size,
-					  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
-					  XE_BO_CREATE_GGTT_BIT);
+					  XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+					  XE_BO_FLAG_GGTT |
+					  XE_BO_FLAG_GGTT_INVALIDATE);
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
 
 	pc->bo = bo;
-	return 0;
+
+	return drmm_add_action_or_reset(&xe->drm, xe_guc_pc_fini, pc);
 }
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h
index cecad8e930..d3680d8949 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.h
+++ b/drivers/gpu/drm/xe/xe_guc_pc.h
@@ -9,7 +9,6 @@
 #include "xe_guc_pc_types.h"
 
 int xe_guc_pc_init(struct xe_guc_pc *pc);
-void xe_guc_pc_fini(struct xe_guc_pc *pc);
 int xe_guc_pc_start(struct xe_guc_pc *pc);
 int xe_guc_pc_stop(struct xe_guc_pc *pc);
 int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc);
diff --git a/drivers/gpu/drm/xe/xe_guc_relay.c b/drivers/gpu/drm/xe/xe_guc_relay.c
new file mode 100644
index 0000000000..c0a2d8d5d3
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_relay.c
@@ -0,0 +1,941 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/bitfield.h>
+#include <linux/delay.h>
+
+#include <drm/drm_managed.h>
+
+#include <kunit/static_stub.h>
+#include <kunit/test-bug.h>
+
+#include "abi/guc_actions_sriov_abi.h"
+#include "abi/guc_relay_actions_abi.h"
+#include "abi/guc_relay_communication_abi.h"
+
+#include "xe_assert.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_gt_sriov_printk.h"
+#include "xe_guc.h"
+#include "xe_guc_ct.h"
+#include "xe_guc_hxg_helpers.h"
+#include "xe_guc_relay.h"
+#include "xe_guc_relay_types.h"
+#include "xe_sriov.h"
+
+/*
+ * How long should we wait for the response?
+ * XXX this value is subject for the profiling.
+ */
+#define RELAY_TIMEOUT_MSEC	(2500)
+
+static void relays_worker_fn(struct work_struct *w);
+
+static struct xe_guc *relay_to_guc(struct xe_guc_relay *relay)
+{
+	return container_of(relay, struct xe_guc, relay);
+}
+
+static struct xe_guc_ct *relay_to_ct(struct xe_guc_relay *relay)
+{
+	return &relay_to_guc(relay)->ct;
+}
+
+static struct xe_gt *relay_to_gt(struct xe_guc_relay *relay)
+{
+	return guc_to_gt(relay_to_guc(relay));
+}
+
+static struct xe_device *relay_to_xe(struct xe_guc_relay *relay)
+{
+	return gt_to_xe(relay_to_gt(relay));
+}
+
+#define relay_assert(relay, condition)	xe_gt_assert(relay_to_gt(relay), condition)
+#define relay_notice(relay, msg...)	xe_gt_sriov_notice(relay_to_gt(relay), "relay: " msg)
+#define relay_debug(relay, msg...)	xe_gt_sriov_dbg_verbose(relay_to_gt(relay), "relay: " msg)
+
+static int relay_get_totalvfs(struct xe_guc_relay *relay)
+{
+	struct xe_device *xe = relay_to_xe(relay);
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+
+	KUNIT_STATIC_STUB_REDIRECT(relay_get_totalvfs, relay);
+	return IS_SRIOV_VF(xe) ? 0 : pci_sriov_get_totalvfs(pdev);
+}
+
+static bool relay_is_ready(struct xe_guc_relay *relay)
+{
+	return mempool_initialized(&relay->pool);
+}
+
+static u32 relay_get_next_rid(struct xe_guc_relay *relay)
+{
+	u32 rid;
+
+	spin_lock(&relay->lock);
+	rid = ++relay->last_rid;
+	spin_unlock(&relay->lock);
+
+	return rid;
+}
+
+/**
+ * struct relay_transaction - internal data used to handle transactions
+ *
+ * Relation between struct relay_transaction members::
+ *
+ *                 <-------------------- GUC_CTB_MAX_DWORDS -------------->
+ *                                  <-------- GUC_RELAY_MSG_MAX_LEN --->
+ *                 <--- offset ---> <--- request_len ------->
+ *                +----------------+-------------------------+----------+--+
+ *                |                |                         |          |  |
+ *                +----------------+-------------------------+----------+--+
+ *                ^                ^
+ *               /                /
+ *    request_buf          request
+ *
+ *                 <-------------------- GUC_CTB_MAX_DWORDS -------------->
+ *                                  <-------- GUC_RELAY_MSG_MAX_LEN --->
+ *                 <--- offset ---> <--- response_len --->
+ *                +----------------+----------------------+-------------+--+
+ *                |                |                      |             |  |
+ *                +----------------+----------------------+-------------+--+
+ *                ^                ^
+ *               /                /
+ *   response_buf         response
+ */
+struct relay_transaction {
+	/**
+	 * @incoming: indicates whether this transaction represents an incoming
+	 *            request from the remote VF/PF or this transaction
+	 *            represents outgoing request to the remote VF/PF.
+	 */
+	bool incoming;
+
+	/**
+	 * @remote: PF/VF identifier of the origin (or target) of the relay
+	 *          request message.
+	 */
+	u32 remote;
+
+	/** @rid: identifier of the VF/PF relay message. */
+	u32 rid;
+
+	/**
+	 * @request: points to the inner VF/PF request message, copied to the
+	 *           #response_buf starting at #offset.
+	 */
+	u32 *request;
+
+	/** @request_len: length of the inner VF/PF request message. */
+	u32 request_len;
+
+	/**
+	 * @response: points to the placeholder buffer where inner VF/PF
+	 *            response will be located, for outgoing transaction
+	 *            this could be caller's buffer (if provided) otherwise
+	 *            it points to the #response_buf starting at #offset.
+	 */
+	u32 *response;
+
+	/**
+	 * @response_len: length of the inner VF/PF response message (only
+	 *                if #status is 0), initially set to the size of the
+	 *                placeholder buffer where response message will be
+	 *                copied.
+	 */
+	u32 response_len;
+
+	/**
+	 * @offset: offset to the start of the inner VF/PF relay message inside
+	 *          buffers; this offset is equal the length of the outer GuC
+	 *          relay header message.
+	 */
+	u32 offset;
+
+	/**
+	 * @request_buf: buffer with VF/PF request message including outer
+	 *               transport message.
+	 */
+	u32 request_buf[GUC_CTB_MAX_DWORDS];
+
+	/**
+	 * @response_buf: buffer with VF/PF response message including outer
+	 *                transport message.
+	 */
+	u32 response_buf[GUC_CTB_MAX_DWORDS];
+
+	/**
+	 * @reply: status of the reply, 0 means that data pointed by the
+	 *         #response is valid.
+	 */
+	int reply;
+
+	/** @done: completion of the outgoing transaction. */
+	struct completion done;
+
+	/** @link: transaction list link */
+	struct list_head link;
+};
+
+static u32 prepare_pf2guc(u32 *msg, u32 target, u32 rid)
+{
+	msg[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		 FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+		 FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, XE_GUC_ACTION_PF2GUC_RELAY_TO_VF);
+	msg[1] = FIELD_PREP(PF2GUC_RELAY_TO_VF_REQUEST_MSG_1_VFID, target);
+	msg[2] = FIELD_PREP(PF2GUC_RELAY_TO_VF_REQUEST_MSG_2_RELAY_ID, rid);
+
+	return PF2GUC_RELAY_TO_VF_REQUEST_MSG_MIN_LEN;
+}
+
+static u32 prepare_vf2guc(u32 *msg, u32 rid)
+{
+	msg[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		 FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+		 FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, XE_GUC_ACTION_VF2GUC_RELAY_TO_PF);
+	msg[1] = FIELD_PREP(VF2GUC_RELAY_TO_PF_REQUEST_MSG_1_RELAY_ID, rid);
+
+	return VF2GUC_RELAY_TO_PF_REQUEST_MSG_MIN_LEN;
+}
+
+static struct relay_transaction *
+__relay_get_transaction(struct xe_guc_relay *relay, bool incoming, u32 remote, u32 rid,
+			const u32 *action, u32 action_len, u32 *resp, u32 resp_size)
+{
+	struct relay_transaction *txn;
+
+	relay_assert(relay, action_len >= GUC_RELAY_MSG_MIN_LEN);
+	relay_assert(relay, action_len <= GUC_RELAY_MSG_MAX_LEN);
+	relay_assert(relay, !(!!resp ^ !!resp_size));
+	relay_assert(relay, resp_size <= GUC_RELAY_MSG_MAX_LEN);
+	relay_assert(relay, resp_size == 0 || resp_size >= GUC_RELAY_MSG_MIN_LEN);
+
+	if (unlikely(!relay_is_ready(relay)))
+		return ERR_PTR(-ENODEV);
+
+	/*
+	 * For incoming requests we can't use GFP_KERNEL as those are delivered
+	 * with CTB lock held which is marked as used in the reclaim path.
+	 * Btw, that's one of the reason why we use mempool here!
+	 */
+	txn = mempool_alloc(&relay->pool, incoming ? GFP_ATOMIC : GFP_KERNEL);
+	if (!txn)
+		return ERR_PTR(-ENOMEM);
+
+	txn->incoming = incoming;
+	txn->remote = remote;
+	txn->rid = rid;
+	txn->offset = remote ?
+		prepare_pf2guc(incoming ? txn->response_buf : txn->request_buf, remote, rid) :
+		prepare_vf2guc(incoming ? txn->response_buf : txn->request_buf, rid);
+
+	relay_assert(relay, txn->offset);
+	relay_assert(relay, txn->offset + GUC_RELAY_MSG_MAX_LEN <= ARRAY_SIZE(txn->request_buf));
+	relay_assert(relay, txn->offset + GUC_RELAY_MSG_MAX_LEN <= ARRAY_SIZE(txn->response_buf));
+
+	txn->request = txn->request_buf + txn->offset;
+	memcpy(&txn->request_buf[txn->offset], action, sizeof(u32) * action_len);
+	txn->request_len = action_len;
+
+	txn->response = resp ?: txn->response_buf + txn->offset;
+	txn->response_len = resp_size ?: GUC_RELAY_MSG_MAX_LEN;
+	txn->reply = -ENOMSG;
+	INIT_LIST_HEAD(&txn->link);
+	init_completion(&txn->done);
+
+	return txn;
+}
+
+static struct relay_transaction *
+relay_new_transaction(struct xe_guc_relay *relay, u32 target, const u32 *action, u32 len,
+		      u32 *resp, u32 resp_size)
+{
+	u32 rid = relay_get_next_rid(relay);
+
+	return __relay_get_transaction(relay, false, target, rid, action, len, resp, resp_size);
+}
+
+static struct relay_transaction *
+relay_new_incoming_transaction(struct xe_guc_relay *relay, u32 origin, u32 rid,
+			       const u32 *action, u32 len)
+{
+	return __relay_get_transaction(relay, true, origin, rid, action, len, NULL, 0);
+}
+
+static void relay_release_transaction(struct xe_guc_relay *relay, struct relay_transaction *txn)
+{
+	relay_assert(relay, list_empty(&txn->link));
+
+	txn->offset = 0;
+	txn->response = NULL;
+	txn->reply = -ESTALE;
+	mempool_free(txn, &relay->pool);
+}
+
+static int relay_send_transaction(struct xe_guc_relay *relay, struct relay_transaction *txn)
+{
+	u32 len = txn->incoming ? txn->response_len : txn->request_len;
+	u32 *buf = txn->incoming ? txn->response_buf : txn->request_buf;
+	u32 *msg = buf + txn->offset;
+	int ret;
+
+	relay_assert(relay, txn->offset);
+	relay_assert(relay, txn->offset + len <= GUC_CTB_MAX_DWORDS);
+	relay_assert(relay, len >= GUC_RELAY_MSG_MIN_LEN);
+	relay_assert(relay, len <= GUC_RELAY_MSG_MAX_LEN);
+
+	relay_debug(relay, "sending %s.%u to %u = %*ph\n",
+		    guc_hxg_type_to_string(FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0])),
+		    txn->rid, txn->remote, (int)sizeof(u32) * len, msg);
+
+	ret = xe_guc_ct_send_block(relay_to_ct(relay), buf, len + txn->offset);
+
+	if (unlikely(ret > 0)) {
+		relay_notice(relay, "Unexpected data=%d from GuC, wrong ABI?\n", ret);
+		ret = -EPROTO;
+	}
+	if (unlikely(ret < 0)) {
+		relay_notice(relay, "Failed to send %s.%x to GuC (%pe) %*ph ...\n",
+			     guc_hxg_type_to_string(FIELD_GET(GUC_HXG_MSG_0_TYPE, buf[0])),
+			     FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, buf[0]),
+			     ERR_PTR(ret), (int)sizeof(u32) * txn->offset, buf);
+		relay_notice(relay, "Failed to send %s.%u to %u (%pe) %*ph\n",
+			     guc_hxg_type_to_string(FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0])),
+			     txn->rid, txn->remote, ERR_PTR(ret), (int)sizeof(u32) * len, msg);
+	}
+
+	return ret;
+}
+
+static void __fini_relay(struct drm_device *drm, void *arg)
+{
+	struct xe_guc_relay *relay = arg;
+
+	mempool_exit(&relay->pool);
+}
+
+/**
+ * xe_guc_relay_init - Initialize a &xe_guc_relay
+ * @relay: the &xe_guc_relay to initialize
+ *
+ * Initialize remaining members of &xe_guc_relay that may depend
+ * on the SR-IOV mode.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_guc_relay_init(struct xe_guc_relay *relay)
+{
+	const int XE_RELAY_MEMPOOL_MIN_NUM = 1;
+	struct xe_device *xe = relay_to_xe(relay);
+	int err;
+
+	relay_assert(relay, !relay_is_ready(relay));
+
+	if (!IS_SRIOV(xe))
+		return 0;
+
+	spin_lock_init(&relay->lock);
+	INIT_WORK(&relay->worker, relays_worker_fn);
+	INIT_LIST_HEAD(&relay->pending_relays);
+	INIT_LIST_HEAD(&relay->incoming_actions);
+
+	err = mempool_init_kmalloc_pool(&relay->pool, XE_RELAY_MEMPOOL_MIN_NUM +
+					relay_get_totalvfs(relay),
+					sizeof(struct relay_transaction));
+	if (err)
+		return err;
+
+	relay_debug(relay, "using mempool with %d elements\n", relay->pool.min_nr);
+
+	return drmm_add_action_or_reset(&xe->drm, __fini_relay, relay);
+}
+
+static u32 to_relay_error(int err)
+{
+	/* XXX: assume that relay errors match errno codes */
+	return err < 0 ? -err : GUC_RELAY_ERROR_UNDISCLOSED;
+}
+
+static int from_relay_error(u32 error)
+{
+	/* XXX: assume that relay errors match errno codes */
+	return error ? -error : -ENODATA;
+}
+
+static u32 sanitize_relay_error(u32 error)
+{
+	/* XXX TBD if generic error codes will be allowed */
+	if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG))
+		error = GUC_RELAY_ERROR_UNDISCLOSED;
+	return error;
+}
+
+static u32 sanitize_relay_error_hint(u32 hint)
+{
+	/* XXX TBD if generic error codes will be allowed */
+	if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG))
+		hint = 0;
+	return hint;
+}
+
+static u32 prepare_error_reply(u32 *msg, u32 error, u32 hint)
+{
+	msg[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		 FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_FAILURE) |
+		 FIELD_PREP(GUC_HXG_FAILURE_MSG_0_HINT, hint) |
+		 FIELD_PREP(GUC_HXG_FAILURE_MSG_0_ERROR, error);
+
+	XE_WARN_ON(!FIELD_FIT(GUC_HXG_FAILURE_MSG_0_ERROR, error));
+	XE_WARN_ON(!FIELD_FIT(GUC_HXG_FAILURE_MSG_0_HINT, hint));
+
+	return GUC_HXG_FAILURE_MSG_LEN;
+}
+
+static void relay_testonly_nop(struct xe_guc_relay *relay)
+{
+	KUNIT_STATIC_STUB_REDIRECT(relay_testonly_nop, relay);
+}
+
+static int relay_send_message_and_wait(struct xe_guc_relay *relay,
+				       struct relay_transaction *txn,
+				       u32 *buf, u32 buf_size)
+{
+	unsigned long timeout = msecs_to_jiffies(RELAY_TIMEOUT_MSEC);
+	u32 *msg = &txn->request_buf[txn->offset];
+	u32 len = txn->request_len;
+	u32 type, action, data0;
+	int ret;
+	long n;
+
+	type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]);
+	action = FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, msg[0]);
+	data0 = FIELD_GET(GUC_HXG_REQUEST_MSG_0_DATA0, msg[0]);
+
+	relay_debug(relay, "%s.%u to %u action %#x:%u\n",
+		    guc_hxg_type_to_string(type),
+		    txn->rid, txn->remote, action, data0);
+
+	/* list ordering does not need to match RID ordering */
+	spin_lock(&relay->lock);
+	list_add_tail(&txn->link, &relay->pending_relays);
+	spin_unlock(&relay->lock);
+
+resend:
+	ret = relay_send_transaction(relay, txn);
+	if (unlikely(ret < 0))
+		goto unlink;
+
+wait:
+	n = wait_for_completion_timeout(&txn->done, timeout);
+	if (unlikely(n == 0 && txn->reply)) {
+		ret = -ETIME;
+		goto unlink;
+	}
+
+	relay_debug(relay, "%u.%u reply %d after %u msec\n",
+		    txn->remote, txn->rid, txn->reply, jiffies_to_msecs(timeout - n));
+	if (unlikely(txn->reply)) {
+		reinit_completion(&txn->done);
+		if (txn->reply == -EAGAIN)
+			goto resend;
+		if (txn->reply == -EBUSY) {
+			relay_testonly_nop(relay);
+			goto wait;
+		}
+		if (txn->reply > 0)
+			ret = from_relay_error(txn->reply);
+		else
+			ret = txn->reply;
+		goto unlink;
+	}
+
+	relay_debug(relay, "%u.%u response %*ph\n", txn->remote, txn->rid,
+		    (int)sizeof(u32) * txn->response_len, txn->response);
+	relay_assert(relay, txn->response_len >= GUC_RELAY_MSG_MIN_LEN);
+	ret = txn->response_len;
+
+unlink:
+	spin_lock(&relay->lock);
+	list_del_init(&txn->link);
+	spin_unlock(&relay->lock);
+
+	if (unlikely(ret < 0)) {
+		relay_notice(relay, "Unsuccessful %s.%u %#x:%u to %u (%pe) %*ph\n",
+			     guc_hxg_type_to_string(type), txn->rid,
+			     action, data0, txn->remote, ERR_PTR(ret),
+			     (int)sizeof(u32) * len, msg);
+	}
+
+	return ret;
+}
+
+static int relay_send_to(struct xe_guc_relay *relay, u32 target,
+			 const u32 *msg, u32 len, u32 *buf, u32 buf_size)
+{
+	struct relay_transaction *txn;
+	int ret;
+
+	relay_assert(relay, len >= GUC_RELAY_MSG_MIN_LEN);
+	relay_assert(relay, len <= GUC_RELAY_MSG_MAX_LEN);
+	relay_assert(relay, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_HOST);
+	relay_assert(relay, guc_hxg_type_is_action(FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0])));
+
+	if (unlikely(!relay_is_ready(relay)))
+		return -ENODEV;
+
+	txn = relay_new_transaction(relay, target, msg, len, buf, buf_size);
+	if (IS_ERR(txn))
+		return PTR_ERR(txn);
+
+	switch (FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0])) {
+	case GUC_HXG_TYPE_REQUEST:
+		ret = relay_send_message_and_wait(relay, txn, buf, buf_size);
+		break;
+	case GUC_HXG_TYPE_FAST_REQUEST:
+		relay_assert(relay, !GUC_HXG_TYPE_FAST_REQUEST);
+		fallthrough;
+	case GUC_HXG_TYPE_EVENT:
+		ret = relay_send_transaction(relay, txn);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	relay_release_transaction(relay, txn);
+	return ret;
+}
+
+#ifdef CONFIG_PCI_IOV
+/**
+ * xe_guc_relay_send_to_vf - Send a message to the VF.
+ * @relay: the &xe_guc_relay which will send the message
+ * @target: target VF number
+ * @msg: request message to be sent
+ * @len: length of the request message (in dwords, can't be 0)
+ * @buf: placeholder for the response message
+ * @buf_size: size of the response message placeholder (in dwords)
+ *
+ * This function can only be used by the driver running in the SR-IOV PF mode.
+ *
+ * Return: Non-negative response length (in dwords) or
+ *         a negative error code on failure.
+ */
+int xe_guc_relay_send_to_vf(struct xe_guc_relay *relay, u32 target,
+			    const u32 *msg, u32 len, u32 *buf, u32 buf_size)
+{
+	relay_assert(relay, IS_SRIOV_PF(relay_to_xe(relay)));
+
+	return relay_send_to(relay, target, msg, len, buf, buf_size);
+}
+#endif
+
+/**
+ * xe_guc_relay_send_to_pf - Send a message to the PF.
+ * @relay: the &xe_guc_relay which will send the message
+ * @msg: request message to be sent
+ * @len: length of the message (in dwords, can't be 0)
+ * @buf: placeholder for the response message
+ * @buf_size: size of the response message placeholder (in dwords)
+ *
+ * This function can only be used by driver running in SR-IOV VF mode.
+ *
+ * Return: Non-negative response length (in dwords) or
+ *         a negative error code on failure.
+ */
+int xe_guc_relay_send_to_pf(struct xe_guc_relay *relay,
+			    const u32 *msg, u32 len, u32 *buf, u32 buf_size)
+{
+	relay_assert(relay, IS_SRIOV_VF(relay_to_xe(relay)));
+
+	return relay_send_to(relay, PFID, msg, len, buf, buf_size);
+}
+
+static int relay_handle_reply(struct xe_guc_relay *relay, u32 origin,
+			      u32 rid, int reply, const u32 *msg, u32 len)
+{
+	struct relay_transaction *pending;
+	int err = -ESRCH;
+
+	spin_lock(&relay->lock);
+	list_for_each_entry(pending, &relay->pending_relays, link) {
+		if (pending->remote != origin || pending->rid != rid) {
+			relay_debug(relay, "%u.%u still awaits response\n",
+				    pending->remote, pending->rid);
+			continue;
+		}
+		err = 0; /* found! */
+		if (reply == 0) {
+			if (len > pending->response_len) {
+				reply = -ENOBUFS;
+				err = -ENOBUFS;
+			} else {
+				memcpy(pending->response, msg, 4 * len);
+				pending->response_len = len;
+			}
+		}
+		pending->reply = reply;
+		complete_all(&pending->done);
+		break;
+	}
+	spin_unlock(&relay->lock);
+
+	return err;
+}
+
+static int relay_handle_failure(struct xe_guc_relay *relay, u32 origin,
+				u32 rid, const u32 *msg, u32 len)
+{
+	int error = FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, msg[0]);
+	u32 hint __maybe_unused = FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, msg[0]);
+
+	relay_assert(relay, len);
+	relay_debug(relay, "%u.%u error %#x (%pe) hint %u debug %*ph\n",
+		    origin, rid, error, ERR_PTR(-error), hint, 4 * (len - 1), msg + 1);
+
+	return relay_handle_reply(relay, origin, rid, error ?: -EREMOTEIO, NULL, 0);
+}
+
+static int relay_testloop_action_handler(struct xe_guc_relay *relay, u32 origin,
+					 const u32 *msg, u32 len, u32 *response, u32 size)
+{
+	static ktime_t last_reply = 0;
+	u32 type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]);
+	u32 action = FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, msg[0]);
+	u32 opcode = FIELD_GET(GUC_HXG_REQUEST_MSG_0_DATA0, msg[0]);
+	ktime_t now = ktime_get();
+	bool busy;
+	int ret;
+
+	relay_assert(relay, guc_hxg_type_is_action(type));
+	relay_assert(relay, action == GUC_RELAY_ACTION_VFXPF_TESTLOOP);
+
+	if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV))
+		return -ECONNREFUSED;
+
+	if (!last_reply)
+		last_reply = now;
+	busy = ktime_before(now, ktime_add_ms(last_reply, 2 * RELAY_TIMEOUT_MSEC));
+	if (!busy)
+		last_reply = now;
+
+	switch (opcode) {
+	case VFXPF_TESTLOOP_OPCODE_NOP:
+		if (type == GUC_HXG_TYPE_EVENT)
+			return 0;
+		return guc_hxg_msg_encode_success(response, 0);
+	case VFXPF_TESTLOOP_OPCODE_BUSY:
+		if (type == GUC_HXG_TYPE_EVENT)
+			return -EPROTO;
+		msleep(RELAY_TIMEOUT_MSEC / 8);
+		if (busy)
+			return -EINPROGRESS;
+		return guc_hxg_msg_encode_success(response, 0);
+	case VFXPF_TESTLOOP_OPCODE_RETRY:
+		if (type == GUC_HXG_TYPE_EVENT)
+			return -EPROTO;
+		msleep(RELAY_TIMEOUT_MSEC / 8);
+		if (busy)
+			return guc_hxg_msg_encode_retry(response, 0);
+		return guc_hxg_msg_encode_success(response, 0);
+	case VFXPF_TESTLOOP_OPCODE_ECHO:
+		if (type == GUC_HXG_TYPE_EVENT)
+			return -EPROTO;
+		if (size < len)
+			return -ENOBUFS;
+		ret = guc_hxg_msg_encode_success(response, len);
+		memcpy(response + ret, msg + ret, (len - ret) * sizeof(u32));
+		return len;
+	case VFXPF_TESTLOOP_OPCODE_FAIL:
+		return -EHWPOISON;
+	default:
+		break;
+	}
+
+	relay_notice(relay, "Unexpected action %#x opcode %#x\n", action, opcode);
+	return -EBADRQC;
+}
+
+static int relay_action_handler(struct xe_guc_relay *relay, u32 origin,
+				const u32 *msg, u32 len, u32 *response, u32 size)
+{
+	u32 type;
+	int ret;
+
+	relay_assert(relay, len >= GUC_HXG_MSG_MIN_LEN);
+
+	if (FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, msg[0]) == GUC_RELAY_ACTION_VFXPF_TESTLOOP)
+		return relay_testloop_action_handler(relay, origin, msg, len, response, size);
+
+	type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]);
+
+	/* XXX: PF services will be added later */
+	ret = -EOPNOTSUPP;
+
+	if (type == GUC_HXG_TYPE_EVENT)
+		relay_assert(relay, ret <= 0);
+
+	return ret;
+}
+
+static struct relay_transaction *relay_dequeue_transaction(struct xe_guc_relay *relay)
+{
+	struct relay_transaction *txn;
+
+	spin_lock(&relay->lock);
+	txn = list_first_entry_or_null(&relay->incoming_actions, struct relay_transaction, link);
+	if (txn)
+		list_del_init(&txn->link);
+	spin_unlock(&relay->lock);
+
+	return txn;
+}
+
+static void relay_process_incoming_action(struct xe_guc_relay *relay)
+{
+	struct relay_transaction *txn;
+	bool again = false;
+	u32 type;
+	int ret;
+
+	txn = relay_dequeue_transaction(relay);
+	if (!txn)
+		return;
+
+	type = FIELD_GET(GUC_HXG_MSG_0_TYPE, txn->request_buf[txn->offset]);
+
+	ret = relay_action_handler(relay, txn->remote,
+				   txn->request_buf + txn->offset, txn->request_len,
+				   txn->response_buf + txn->offset,
+				   ARRAY_SIZE(txn->response_buf) - txn->offset);
+
+	if (ret == -EINPROGRESS) {
+		again = true;
+		ret = guc_hxg_msg_encode_busy(txn->response_buf + txn->offset, 0);
+	}
+
+	if (ret > 0) {
+		txn->response_len = ret;
+		ret = relay_send_transaction(relay, txn);
+	}
+
+	if (ret < 0) {
+		u32 error = to_relay_error(ret);
+
+		relay_notice(relay, "Failed to handle %s.%u from %u (%pe) %*ph\n",
+			     guc_hxg_type_to_string(type), txn->rid, txn->remote,
+			     ERR_PTR(ret), 4 * txn->request_len, txn->request_buf + txn->offset);
+
+		txn->response_len = prepare_error_reply(txn->response_buf + txn->offset,
+							txn->remote ?
+							sanitize_relay_error(error) : error,
+							txn->remote ?
+							sanitize_relay_error_hint(-ret) : -ret);
+		ret = relay_send_transaction(relay, txn);
+		again = false;
+	}
+
+	if (again) {
+		spin_lock(&relay->lock);
+		list_add(&txn->link, &relay->incoming_actions);
+		spin_unlock(&relay->lock);
+		return;
+	}
+
+	if (unlikely(ret < 0))
+		relay_notice(relay, "Failed to process action.%u (%pe) %*ph\n",
+			     txn->rid, ERR_PTR(ret), 4 * txn->request_len,
+			     txn->request_buf + txn->offset);
+
+	relay_release_transaction(relay, txn);
+}
+
+static bool relay_needs_worker(struct xe_guc_relay *relay)
+{
+	return !list_empty(&relay->incoming_actions);
+}
+
+static void relay_kick_worker(struct xe_guc_relay *relay)
+{
+	KUNIT_STATIC_STUB_REDIRECT(relay_kick_worker, relay);
+	queue_work(relay_to_xe(relay)->sriov.wq, &relay->worker);
+}
+
+static void relays_worker_fn(struct work_struct *w)
+{
+	struct xe_guc_relay *relay = container_of(w, struct xe_guc_relay, worker);
+
+	relay_process_incoming_action(relay);
+
+	if (relay_needs_worker(relay))
+		relay_kick_worker(relay);
+}
+
+static int relay_queue_action_msg(struct xe_guc_relay *relay, u32 origin, u32 rid,
+				  const u32 *msg, u32 len)
+{
+	struct relay_transaction *txn;
+
+	txn = relay_new_incoming_transaction(relay, origin, rid, msg, len);
+	if (IS_ERR(txn))
+		return PTR_ERR(txn);
+
+	spin_lock(&relay->lock);
+	list_add_tail(&txn->link, &relay->incoming_actions);
+	spin_unlock(&relay->lock);
+
+	relay_kick_worker(relay);
+	return 0;
+}
+
+static int relay_process_msg(struct xe_guc_relay *relay, u32 origin, u32 rid,
+			     const u32 *msg, u32 len)
+{
+	u32 type;
+	int err;
+
+	if (unlikely(len < GUC_HXG_MSG_MIN_LEN))
+		return -EPROTO;
+
+	if (FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) != GUC_HXG_ORIGIN_HOST)
+		return -EPROTO;
+
+	type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]);
+	relay_debug(relay, "received %s.%u from %u = %*ph\n",
+		    guc_hxg_type_to_string(type), rid, origin, 4 * len, msg);
+
+	switch (type) {
+	case GUC_HXG_TYPE_REQUEST:
+	case GUC_HXG_TYPE_FAST_REQUEST:
+	case GUC_HXG_TYPE_EVENT:
+		err = relay_queue_action_msg(relay, origin, rid, msg, len);
+		break;
+	case GUC_HXG_TYPE_RESPONSE_SUCCESS:
+		err = relay_handle_reply(relay, origin, rid, 0, msg, len);
+		break;
+	case GUC_HXG_TYPE_NO_RESPONSE_BUSY:
+		err = relay_handle_reply(relay, origin, rid, -EBUSY, NULL, 0);
+		break;
+	case GUC_HXG_TYPE_NO_RESPONSE_RETRY:
+		err = relay_handle_reply(relay, origin, rid, -EAGAIN, NULL, 0);
+		break;
+	case GUC_HXG_TYPE_RESPONSE_FAILURE:
+		err = relay_handle_failure(relay, origin, rid, msg, len);
+		break;
+	default:
+		err = -EBADRQC;
+	}
+
+	if (unlikely(err))
+		relay_notice(relay, "Failed to process %s.%u from %u (%pe) %*ph\n",
+			     guc_hxg_type_to_string(type), rid, origin,
+			     ERR_PTR(err), 4 * len, msg);
+
+	return err;
+}
+
+/**
+ * xe_guc_relay_process_guc2vf - Handle relay notification message from the GuC.
+ * @relay: the &xe_guc_relay which will handle the message
+ * @msg: message to be handled
+ * @len: length of the message (in dwords)
+ *
+ * This function will handle relay messages received from the GuC.
+ *
+ * This function is can only be used if driver is running in SR-IOV mode.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_guc_relay_process_guc2vf(struct xe_guc_relay *relay, const u32 *msg, u32 len)
+{
+	u32 rid;
+
+	relay_assert(relay, len >= GUC_HXG_MSG_MIN_LEN);
+	relay_assert(relay, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_GUC);
+	relay_assert(relay, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]) == GUC_HXG_TYPE_EVENT);
+	relay_assert(relay, FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[0]) ==
+		     XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF);
+
+	if (unlikely(!IS_SRIOV_VF(relay_to_xe(relay)) && !kunit_get_current_test()))
+		return -EPERM;
+
+	if (unlikely(!relay_is_ready(relay)))
+		return -ENODEV;
+
+	if (unlikely(len < GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN))
+		return -EPROTO;
+
+	if (unlikely(len > GUC2VF_RELAY_FROM_PF_EVENT_MSG_MAX_LEN))
+		return -EMSGSIZE;
+
+	if (unlikely(FIELD_GET(GUC_HXG_EVENT_MSG_0_DATA0, msg[0])))
+		return -EPFNOSUPPORT;
+
+	rid = FIELD_GET(GUC2VF_RELAY_FROM_PF_EVENT_MSG_1_RELAY_ID, msg[1]);
+
+	return relay_process_msg(relay, PFID, rid,
+				 msg + GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN,
+				 len - GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN);
+}
+
+#ifdef CONFIG_PCI_IOV
+/**
+ * xe_guc_relay_process_guc2pf - Handle relay notification message from the GuC.
+ * @relay: the &xe_guc_relay which will handle the message
+ * @msg: message to be handled
+ * @len: length of the message (in dwords)
+ *
+ * This function will handle relay messages received from the GuC.
+ *
+ * This function can only be used if driver is running in SR-IOV PF mode.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_guc_relay_process_guc2pf(struct xe_guc_relay *relay, const u32 *msg, u32 len)
+{
+	u32 origin, rid;
+	int err;
+
+	relay_assert(relay, len >= GUC_HXG_EVENT_MSG_MIN_LEN);
+	relay_assert(relay, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_GUC);
+	relay_assert(relay, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]) == GUC_HXG_TYPE_EVENT);
+	relay_assert(relay, FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[0]) ==
+		     XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF);
+
+	if (unlikely(!IS_SRIOV_PF(relay_to_xe(relay)) && !kunit_get_current_test()))
+		return -EPERM;
+
+	if (unlikely(!relay_is_ready(relay)))
+		return -ENODEV;
+
+	if (unlikely(len < GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN))
+		return -EPROTO;
+
+	if (unlikely(len > GUC2PF_RELAY_FROM_VF_EVENT_MSG_MAX_LEN))
+		return -EMSGSIZE;
+
+	if (unlikely(FIELD_GET(GUC_HXG_EVENT_MSG_0_DATA0, msg[0])))
+		return -EPFNOSUPPORT;
+
+	origin = FIELD_GET(GUC2PF_RELAY_FROM_VF_EVENT_MSG_1_VFID, msg[1]);
+	rid = FIELD_GET(GUC2PF_RELAY_FROM_VF_EVENT_MSG_2_RELAY_ID, msg[2]);
+
+	if (unlikely(origin > relay_get_totalvfs(relay)))
+		return -ENOENT;
+
+	err = relay_process_msg(relay, origin, rid,
+				msg + GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN,
+				len - GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN);
+
+	return err;
+}
+#endif
+
+#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST)
+#include "tests/xe_guc_relay_test.c"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_relay.h b/drivers/gpu/drm/xe/xe_guc_relay.h
new file mode 100644
index 0000000000..385429aa18
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_relay.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GUC_RELAY_H_
+#define _XE_GUC_RELAY_H_
+
+#include <linux/types.h>
+#include <linux/errno.h>
+
+struct xe_guc_relay;
+
+int xe_guc_relay_init(struct xe_guc_relay *relay);
+
+int xe_guc_relay_send_to_pf(struct xe_guc_relay *relay,
+			    const u32 *msg, u32 len, u32 *buf, u32 buf_size);
+
+int xe_guc_relay_process_guc2vf(struct xe_guc_relay *relay, const u32 *msg, u32 len);
+
+#ifdef CONFIG_PCI_IOV
+int xe_guc_relay_send_to_vf(struct xe_guc_relay *relay, u32 target,
+			    const u32 *msg, u32 len, u32 *buf, u32 buf_size);
+int xe_guc_relay_process_guc2pf(struct xe_guc_relay *relay, const u32 *msg, u32 len);
+#else
+static inline int xe_guc_relay_send_to_vf(struct xe_guc_relay *relay, u32 target,
+					  const u32 *msg, u32 len, u32 *buf, u32 buf_size)
+{
+	return -ENODEV;
+}
+static inline int xe_guc_relay_process_guc2pf(struct xe_guc_relay *relay, const u32 *msg, u32 len)
+{
+	return -ENODEV;
+}
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_relay_types.h b/drivers/gpu/drm/xe/xe_guc_relay_types.h
new file mode 100644
index 0000000000..5999fcb77e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_relay_types.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GUC_RELAY_TYPES_H_
+#define _XE_GUC_RELAY_TYPES_H_
+
+#include <linux/mempool.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+
+/**
+ * struct xe_guc_relay - Data used by the VF-PF Relay Communication over GuC.
+ */
+struct xe_guc_relay {
+	/**@lock: protects all internal data. */
+	spinlock_t lock;
+
+	/** @worker: dispatches incoming action messages. */
+	struct work_struct worker;
+
+	/** @pending_relays: list of sent requests that await a response. */
+	struct list_head pending_relays;
+
+	/** @incoming_actions: list of incoming relay action messages to process. */
+	struct list_head incoming_actions;
+
+	/** @pool: pool of the relay message buffers. */
+	mempool_t pool;
+
+	/** @last_rid: last Relay-ID used while sending a message. */
+	u32 last_rid;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index bd1079aa88..0f42971ff0 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -23,9 +23,11 @@
 #include "xe_force_wake.h"
 #include "xe_gpu_scheduler.h"
 #include "xe_gt.h"
+#include "xe_gt_printk.h"
 #include "xe_guc.h"
 #include "xe_guc_ct.h"
 #include "xe_guc_exec_queue_types.h"
+#include "xe_guc_id_mgr.h"
 #include "xe_guc_submit_types.h"
 #include "xe_hw_engine.h"
 #include "xe_hw_fence.h"
@@ -235,17 +237,9 @@ static void guc_submit_fini(struct drm_device *drm, void *arg)
 	struct xe_guc *guc = arg;
 
 	xa_destroy(&guc->submission_state.exec_queue_lookup);
-	ida_destroy(&guc->submission_state.guc_ids);
-	bitmap_free(guc->submission_state.guc_ids_bitmap);
 	free_submit_wq(guc);
-	mutex_destroy(&guc->submission_state.lock);
 }
 
-#define GUC_ID_MAX		65535
-#define GUC_ID_NUMBER_MLRC	4096
-#define GUC_ID_NUMBER_SLRC	(GUC_ID_MAX - GUC_ID_NUMBER_MLRC)
-#define GUC_ID_START_MLRC	GUC_ID_NUMBER_SLRC
-
 static const struct xe_exec_queue_ops guc_exec_queue_ops;
 
 static void primelockdep(struct xe_guc *guc)
@@ -268,33 +262,28 @@ int xe_guc_submit_init(struct xe_guc *guc)
 	struct xe_gt *gt = guc_to_gt(guc);
 	int err;
 
-	guc->submission_state.guc_ids_bitmap =
-		bitmap_zalloc(GUC_ID_NUMBER_MLRC, GFP_KERNEL);
-	if (!guc->submission_state.guc_ids_bitmap)
-		return -ENOMEM;
+	err = drmm_mutex_init(&xe->drm, &guc->submission_state.lock);
+	if (err)
+		return err;
+
+	err = xe_guc_id_mgr_init(&guc->submission_state.idm, ~0);
+	if (err)
+		return err;
 
 	err = alloc_submit_wq(guc);
-	if (err) {
-		bitmap_free(guc->submission_state.guc_ids_bitmap);
+	if (err)
 		return err;
-	}
 
 	gt->exec_queue_ops = &guc_exec_queue_ops;
 
-	mutex_init(&guc->submission_state.lock);
 	xa_init(&guc->submission_state.exec_queue_lookup);
-	ida_init(&guc->submission_state.guc_ids);
 
 	spin_lock_init(&guc->submission_state.suspend.lock);
 	guc->submission_state.suspend.context = dma_fence_context_alloc(1);
 
 	primelockdep(guc);
 
-	err = drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc);
-	if (err)
-		return err;
-
-	return 0;
+	return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc);
 }
 
 static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count)
@@ -306,12 +295,8 @@ static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa
 	for (i = 0; i < xa_count; ++i)
 		xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i);
 
-	if (xe_exec_queue_is_parallel(q))
-		bitmap_release_region(guc->submission_state.guc_ids_bitmap,
-				      q->guc->id - GUC_ID_START_MLRC,
-				      order_base_2(q->width));
-	else
-		ida_simple_remove(&guc->submission_state.guc_ids, q->guc->id);
+	xe_guc_id_mgr_release_locked(&guc->submission_state.idm,
+				     q->guc->id, q->width);
 }
 
 static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
@@ -329,21 +314,12 @@ static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
 	 */
 	lockdep_assert_held(&guc->submission_state.lock);
 
-	if (xe_exec_queue_is_parallel(q)) {
-		void *bitmap = guc->submission_state.guc_ids_bitmap;
-
-		ret = bitmap_find_free_region(bitmap, GUC_ID_NUMBER_MLRC,
-					      order_base_2(q->width));
-	} else {
-		ret = ida_simple_get(&guc->submission_state.guc_ids, 0,
-				     GUC_ID_NUMBER_SLRC, GFP_NOWAIT);
-	}
+	ret = xe_guc_id_mgr_reserve_locked(&guc->submission_state.idm,
+					   q->width);
 	if (ret < 0)
 		return ret;
 
 	q->guc->id = ret;
-	if (xe_exec_queue_is_parallel(q))
-		q->guc->id += GUC_ID_START_MLRC;
 
 	for (i = 0; i < q->width; ++i) {
 		ptr = xa_store(&guc->submission_state.exec_queue_lookup,
@@ -532,7 +508,7 @@ static void register_engine(struct xe_exec_queue *q)
 	info.flags = CONTEXT_REGISTRATION_FLAG_KMD;
 
 	if (xe_exec_queue_is_parallel(q)) {
-		u32 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc);
+		u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc);
 		struct iosys_map map = xe_lrc_parallel_map(lrc);
 
 		info.wq_desc_lo = lower_32_bits(ggtt_addr +
@@ -811,7 +787,8 @@ static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p)
 static void simple_error_capture(struct xe_exec_queue *q)
 {
 	struct xe_guc *guc = exec_queue_to_guc(q);
-	struct drm_printer p = drm_err_printer("");
+	struct xe_device *xe = guc_to_xe(guc);
+	struct drm_printer p = drm_err_printer(&xe->drm, NULL);
 	struct xe_hw_engine *hwe;
 	enum xe_hw_engine_id id;
 	u32 adj_logical_mask = q->logical_mask;
@@ -831,7 +808,9 @@ static void simple_error_capture(struct xe_exec_queue *q)
 			}
 		}
 
-		xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
+		if (xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL))
+			xe_gt_info(guc_to_gt(guc),
+				   "failed to get forcewake for error capture");
 		xe_guc_ct_print(&guc->ct, &p, true);
 		guc_exec_queue_print(q, &p);
 		for_each_hw_engine(hwe, guc_to_gt(guc), id) {
@@ -927,18 +906,26 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
 	int err = -ETIME;
 	int i = 0;
 
-	if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) {
-		xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_KERNEL));
-		xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)));
+	/*
+	 * TDR has fired before free job worker. Common if exec queue
+	 * immediately closed after last fence signaled.
+	 */
+	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) {
+		guc_exec_queue_free_job(drm_job);
 
-		drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx",
-			   xe_sched_job_seqno(job), q->guc->id, q->flags);
-		simple_error_capture(q);
-		xe_devcoredump(q);
-	} else {
-		drm_dbg(&xe->drm, "Timedout signaled job: seqno=%u, guc_id=%d, flags=0x%lx",
-			 xe_sched_job_seqno(job), q->guc->id, q->flags);
+		return DRM_GPU_SCHED_STAT_NOMINAL;
 	}
+
+	drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx",
+		   xe_sched_job_seqno(job), q->guc->id, q->flags);
+	xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL,
+		   "Kernel-submitted job timed out\n");
+	xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q),
+		   "VM job timed out on non-killed execqueue\n");
+
+	simple_error_capture(q);
+	xe_devcoredump(job);
+
 	trace_xe_sched_job_timedout(job);
 
 	/* Kill the run_job entry point */
@@ -1253,6 +1240,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
 	return 0;
 
 err_entity:
+	mutex_unlock(&guc->submission_state.lock);
 	xe_sched_entity_fini(&ge->entity);
 err_sched:
 	xe_sched_fini(&ge->sched);
@@ -1348,21 +1336,6 @@ static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
 	return 0;
 }
 
-static int guc_exec_queue_set_job_timeout(struct xe_exec_queue *q, u32 job_timeout_ms)
-{
-	struct xe_gpu_scheduler *sched = &q->guc->sched;
-	struct xe_guc *guc = exec_queue_to_guc(q);
-	struct xe_device *xe = guc_to_xe(guc);
-
-	xe_assert(xe, !exec_queue_registered(q));
-	xe_assert(xe, !exec_queue_banned(q));
-	xe_assert(xe, !exec_queue_killed(q));
-
-	sched->base.timeout = job_timeout_ms;
-
-	return 0;
-}
-
 static int guc_exec_queue_suspend(struct xe_exec_queue *q)
 {
 	struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND;
@@ -1413,7 +1386,6 @@ static const struct xe_exec_queue_ops guc_exec_queue_ops = {
 	.set_priority = guc_exec_queue_set_priority,
 	.set_timeslice = guc_exec_queue_set_timeslice,
 	.set_preempt_timeout = guc_exec_queue_set_preempt_timeout,
-	.set_job_timeout = guc_exec_queue_set_job_timeout,
 	.suspend = guc_exec_queue_suspend,
 	.suspend_wait = guc_exec_queue_suspend_wait,
 	.resume = guc_exec_queue_resume,
@@ -1457,8 +1429,8 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
 			    !xe_sched_job_completed(job)) ||
 			    xe_sched_invalidate_job(job, 2)) {
 				trace_xe_sched_job_ban(job);
-				xe_sched_tdr_queue_imm(&q->guc->sched);
 				set_exec_queue_banned(q);
+				xe_sched_tdr_queue_imm(&q->guc->sched);
 			}
 		}
 	}
@@ -1580,28 +1552,8 @@ static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q)
 	xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action));
 }
 
-int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
+static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q)
 {
-	struct xe_device *xe = guc_to_xe(guc);
-	struct xe_exec_queue *q;
-	u32 guc_id = msg[0];
-
-	if (unlikely(len < 2)) {
-		drm_err(&xe->drm, "Invalid length %u", len);
-		return -EPROTO;
-	}
-
-	q = g2h_exec_queue_lookup(guc, guc_id);
-	if (unlikely(!q))
-		return -EPROTO;
-
-	if (unlikely(!exec_queue_pending_enable(q) &&
-		     !exec_queue_pending_disable(q))) {
-		drm_err(&xe->drm, "Unexpected engine state 0x%04x",
-			atomic_read(&q->guc->state));
-		return -EPROTO;
-	}
-
 	trace_xe_exec_queue_scheduling_done(q);
 
 	if (exec_queue_pending_enable(q)) {
@@ -1621,17 +1573,15 @@ int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
 			deregister_exec_queue(guc, q);
 		}
 	}
-
-	return 0;
 }
 
-int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
+int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
 {
 	struct xe_device *xe = guc_to_xe(guc);
 	struct xe_exec_queue *q;
 	u32 guc_id = msg[0];
 
-	if (unlikely(len < 1)) {
+	if (unlikely(len < 2)) {
 		drm_err(&xe->drm, "Invalid length %u", len);
 		return -EPROTO;
 	}
@@ -1640,13 +1590,20 @@ int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
 	if (unlikely(!q))
 		return -EPROTO;
 
-	if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) ||
-	    exec_queue_pending_enable(q) || exec_queue_enabled(q)) {
+	if (unlikely(!exec_queue_pending_enable(q) &&
+		     !exec_queue_pending_disable(q))) {
 		drm_err(&xe->drm, "Unexpected engine state 0x%04x",
 			atomic_read(&q->guc->state));
 		return -EPROTO;
 	}
 
+	handle_sched_done(guc, q);
+
+	return 0;
+}
+
+static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q)
+{
 	trace_xe_exec_queue_deregister_done(q);
 
 	clear_exec_queue_registered(q);
@@ -1655,6 +1612,31 @@ int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
 		xe_exec_queue_put(q);
 	else
 		__guc_exec_queue_fini(guc, q);
+}
+
+int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+	struct xe_device *xe = guc_to_xe(guc);
+	struct xe_exec_queue *q;
+	u32 guc_id = msg[0];
+
+	if (unlikely(len < 1)) {
+		drm_err(&xe->drm, "Invalid length %u", len);
+		return -EPROTO;
+	}
+
+	q = g2h_exec_queue_lookup(guc, guc_id);
+	if (unlikely(!q))
+		return -EPROTO;
+
+	if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) ||
+	    exec_queue_pending_enable(q) || exec_queue_enabled(q)) {
+		drm_err(&xe->drm, "Unexpected engine state 0x%04x",
+			atomic_read(&q->guc->state));
+		return -EPROTO;
+	}
+
+	handle_deregister_done(guc, q);
 
 	return 0;
 }
@@ -1794,7 +1776,7 @@ guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps
 
 /**
  * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine.
- * @q: Xe exec queue.
+ * @q: faulty exec queue
  *
  * This can be printed out in a later stage like during dev_coredump
  * analysis.
@@ -1805,19 +1787,14 @@ guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps
 struct xe_guc_submit_exec_queue_snapshot *
 xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q)
 {
-	struct xe_guc *guc = exec_queue_to_guc(q);
-	struct xe_device *xe = guc_to_xe(guc);
 	struct xe_gpu_scheduler *sched = &q->guc->sched;
-	struct xe_sched_job *job;
 	struct xe_guc_submit_exec_queue_snapshot *snapshot;
 	int i;
 
 	snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC);
 
-	if (!snapshot) {
-		drm_err(&xe->drm, "Skipping GuC Engine snapshot entirely.\n");
+	if (!snapshot)
 		return NULL;
-	}
 
 	snapshot->guc.id = q->guc->id;
 	memcpy(&snapshot->name, &q->name, sizeof(snapshot->name));
@@ -1830,23 +1807,14 @@ xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q)
 	snapshot->sched_props.preempt_timeout_us =
 		q->sched_props.preempt_timeout_us;
 
-	snapshot->lrc = kmalloc_array(q->width, sizeof(struct lrc_snapshot),
+	snapshot->lrc = kmalloc_array(q->width, sizeof(struct xe_lrc_snapshot *),
 				      GFP_ATOMIC);
 
-	if (!snapshot->lrc) {
-		drm_err(&xe->drm, "Skipping GuC Engine LRC snapshot.\n");
-	} else {
+	if (snapshot->lrc) {
 		for (i = 0; i < q->width; ++i) {
 			struct xe_lrc *lrc = q->lrc + i;
 
-			snapshot->lrc[i].context_desc =
-				lower_32_bits(xe_lrc_ggtt_addr(lrc));
-			snapshot->lrc[i].head = xe_lrc_ring_head(lrc);
-			snapshot->lrc[i].tail.internal = lrc->ring.tail;
-			snapshot->lrc[i].tail.memory =
-				xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL);
-			snapshot->lrc[i].start_seqno = xe_lrc_start_seqno(lrc);
-			snapshot->lrc[i].seqno = xe_lrc_seqno(lrc);
+			snapshot->lrc[i] = xe_lrc_snapshot_capture(lrc);
 		}
 	}
 
@@ -1863,17 +1831,17 @@ xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q)
 					       sizeof(struct pending_list_snapshot),
 					       GFP_ATOMIC);
 
-	if (!snapshot->pending_list) {
-		drm_err(&xe->drm, "Skipping GuC Engine pending_list snapshot.\n");
-	} else {
+	if (snapshot->pending_list) {
+		struct xe_sched_job *job_iter;
+
 		i = 0;
-		list_for_each_entry(job, &sched->base.pending_list, drm.list) {
+		list_for_each_entry(job_iter, &sched->base.pending_list, drm.list) {
 			snapshot->pending_list[i].seqno =
-				xe_sched_job_seqno(job);
+				xe_sched_job_seqno(job_iter);
 			snapshot->pending_list[i].fence =
-				dma_fence_is_signaled(job->fence) ? 1 : 0;
+				dma_fence_is_signaled(job_iter->fence) ? 1 : 0;
 			snapshot->pending_list[i].finished =
-				dma_fence_is_signaled(&job->drm.s_fence->finished)
+				dma_fence_is_signaled(&job_iter->drm.s_fence->finished)
 				? 1 : 0;
 			i++;
 		}
@@ -1885,6 +1853,24 @@ xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q)
 }
 
 /**
+ * xe_guc_exec_queue_snapshot_capture_delayed - Take delayed part of snapshot of the GuC Engine.
+ * @snapshot: Previously captured snapshot of job.
+ *
+ * This captures some data that requires taking some locks, so it cannot be done in signaling path.
+ */
+void
+xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot *snapshot)
+{
+	int i;
+
+	if (!snapshot || !snapshot->lrc)
+		return;
+
+	for (i = 0; i < snapshot->width; ++i)
+		xe_lrc_snapshot_capture_delayed(snapshot->lrc[i]);
+}
+
+/**
  * xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot.
  * @snapshot: GuC Submit Engine snapshot object.
  * @p: drm_printer where it will be printed out.
@@ -1912,18 +1898,9 @@ xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps
 	drm_printf(p, "\tPreempt timeout: %u (us)\n",
 		   snapshot->sched_props.preempt_timeout_us);
 
-	for (i = 0; snapshot->lrc && i < snapshot->width; ++i) {
-		drm_printf(p, "\tHW Context Desc: 0x%08x\n",
-			   snapshot->lrc[i].context_desc);
-		drm_printf(p, "\tLRC Head: (memory) %u\n",
-			   snapshot->lrc[i].head);
-		drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n",
-			   snapshot->lrc[i].tail.internal,
-			   snapshot->lrc[i].tail.memory);
-		drm_printf(p, "\tStart seqno: (memory) %d\n",
-			   snapshot->lrc[i].start_seqno);
-		drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->lrc[i].seqno);
-	}
+	for (i = 0; snapshot->lrc && i < snapshot->width; ++i)
+		xe_lrc_snapshot_print(snapshot->lrc[i], p);
+
 	drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state);
 	drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags);
 
@@ -1948,10 +1925,16 @@ xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps
  */
 void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot)
 {
+	int i;
+
 	if (!snapshot)
 		return;
 
-	kfree(snapshot->lrc);
+	if (snapshot->lrc) {
+		for (i = 0; i < snapshot->width; i++)
+			xe_lrc_snapshot_free(snapshot->lrc[i]);
+		kfree(snapshot->lrc);
+	}
 	kfree(snapshot->pending_list);
 	kfree(snapshot);
 }
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h
index fc97869c5b..fad0421ead 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.h
+++ b/drivers/gpu/drm/xe/xe_guc_submit.h
@@ -29,6 +29,8 @@ int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 le
 struct xe_guc_submit_exec_queue_snapshot *
 xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q);
 void
+xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot *snapshot);
+void
 xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot,
 				 struct drm_printer *p);
 void
diff --git a/drivers/gpu/drm/xe/xe_guc_submit_types.h b/drivers/gpu/drm/xe/xe_guc_submit_types.h
index 649b0a8526..dc7456c345 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_submit_types.h
@@ -61,17 +61,6 @@ struct guc_submit_parallel_scratch {
 	u32 wq[WQ_SIZE / sizeof(u32)];
 };
 
-struct lrc_snapshot {
-	u32 context_desc;
-	u32 head;
-	struct {
-		u32 internal;
-		u32 memory;
-	} tail;
-	u32 start_seqno;
-	u32 seqno;
-};
-
 struct pending_list_snapshot {
 	u32 seqno;
 	bool fence;
@@ -102,14 +91,14 @@ struct xe_guc_submit_exec_queue_snapshot {
 
 	/** @sched_props: scheduling properties */
 	struct {
-		/** @timeslice_us: timeslice period in micro-seconds */
+		/** @sched_props.timeslice_us: timeslice period in micro-seconds */
 		u32 timeslice_us;
-		/** @preempt_timeout_us: preemption timeout in micro-seconds */
+		/** @sched_props.preempt_timeout_us: preemption timeout in micro-seconds */
 		u32 preempt_timeout_us;
 	} sched_props;
 
 	/** @lrc: LRC Snapshot */
-	struct lrc_snapshot *lrc;
+	struct xe_lrc_snapshot **lrc;
 
 	/** @schedule_state: Schedule State at the moment of Crash */
 	u32 schedule_state;
@@ -118,11 +107,11 @@ struct xe_guc_submit_exec_queue_snapshot {
 
 	/** @guc: GuC Engine Snapshot */
 	struct {
-		/** @wqi_head: work queue item head */
+		/** @guc.wqi_head: work queue item head */
 		u32 wqi_head;
-		/** @wqi_tail: work queue item tail */
+		/** @guc.wqi_tail: work queue item tail */
 		u32 wqi_tail;
-		/** @id: GuC id for this exec_queue */
+		/** @guc.id: GuC id for this exec_queue */
 		u16 id;
 	} guc;
 
@@ -133,13 +122,13 @@ struct xe_guc_submit_exec_queue_snapshot {
 	bool parallel_execution;
 	/** @parallel: snapshot of the useful parallel scratch */
 	struct {
-		/** @wq_desc: Workqueue description */
+		/** @parallel.wq_desc: Workqueue description */
 		struct {
-			/** @head: Workqueue Head */
+			/** @parallel.wq_desc.head: Workqueue Head */
 			u32 head;
-			/** @tail: Workqueue Tail */
+			/** @parallel.wq_desc.tail: Workqueue Tail */
 			u32 tail;
-			/** @status: Workqueue Status */
+			/** @parallel.wq_desc.status: Workqueue Status */
 			u32 status;
 		} wq_desc;
 		/** @wq: Workqueue Items */
diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h
index cd80802e89..82bd93f786 100644
--- a/drivers/gpu/drm/xe/xe_guc_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_types.h
@@ -15,9 +15,38 @@
 #include "xe_guc_fwif.h"
 #include "xe_guc_log_types.h"
 #include "xe_guc_pc_types.h"
+#include "xe_guc_relay_types.h"
 #include "xe_uc_fw_types.h"
 
 /**
+ * struct xe_guc_db_mgr - GuC Doorbells Manager.
+ *
+ * Note: GuC Doorbells Manager is relying on &xe_guc::submission_state.lock
+ * to protect its members.
+ */
+struct xe_guc_db_mgr {
+	/** @count: number of doorbells to manage */
+	unsigned int count;
+	/** @bitmap: bitmap to track allocated doorbells */
+	unsigned long *bitmap;
+};
+
+/**
+ * struct xe_guc_id_mgr - GuC context ID Manager.
+ *
+ * Note: GuC context ID Manager is relying on &xe_guc::submission_state.lock
+ * to protect its members.
+ */
+struct xe_guc_id_mgr {
+	/** @bitmap: bitmap to track allocated IDs */
+	unsigned long *bitmap;
+	/** @total: total number of IDs being managed */
+	unsigned int total;
+	/** @used: number of IDs currently in use */
+	unsigned int used;
+};
+
+/**
  * struct xe_guc - Graphic micro controller
  */
 struct xe_guc {
@@ -31,45 +60,48 @@ struct xe_guc {
 	struct xe_guc_ct ct;
 	/** @pc: GuC Power Conservation */
 	struct xe_guc_pc pc;
+	/** @dbm: GuC Doorbell Manager */
+	struct xe_guc_db_mgr dbm;
 	/** @submission_state: GuC submission state */
 	struct {
-		/** @exec_queue_lookup: Lookup an xe_engine from guc_id */
+		/** @submission_state.idm: GuC context ID Manager */
+		struct xe_guc_id_mgr idm;
+		/** @submission_state.exec_queue_lookup: Lookup an xe_engine from guc_id */
 		struct xarray exec_queue_lookup;
-		/** @guc_ids: used to allocate new guc_ids, single-lrc */
-		struct ida guc_ids;
-		/** @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc */
-		unsigned long *guc_ids_bitmap;
-		/** @stopped: submissions are stopped */
+		/** @submission_state.stopped: submissions are stopped */
 		atomic_t stopped;
-		/** @lock: protects submission state */
+		/** @submission_state.lock: protects submission state */
 		struct mutex lock;
-		/** @suspend: suspend fence state */
+		/** @submission_state.suspend: suspend fence state */
 		struct {
-			/** @lock: suspend fences lock */
+			/** @submission_state.suspend.lock: suspend fences lock */
 			spinlock_t lock;
-			/** @context: suspend fences context */
+			/** @submission_state.suspend.context: suspend fences context */
 			u64 context;
-			/** @seqno: suspend fences seqno */
+			/** @submission_state.suspend.seqno: suspend fences seqno */
 			u32 seqno;
 		} suspend;
 #ifdef CONFIG_PROVE_LOCKING
 #define NUM_SUBMIT_WQ	256
-		/** @submit_wq_pool: submission ordered workqueues pool */
+		/** @submission_state.submit_wq_pool: submission ordered workqueues pool */
 		struct workqueue_struct *submit_wq_pool[NUM_SUBMIT_WQ];
-		/** @submit_wq_idx: submission ordered workqueue index */
+		/** @submission_state.submit_wq_idx: submission ordered workqueue index */
 		int submit_wq_idx;
 #endif
-		/** @enabled: submission is enabled */
+		/** @submission_state.enabled: submission is enabled */
 		bool enabled;
 	} submission_state;
 	/** @hwconfig: Hardware config state */
 	struct {
-		/** @bo: buffer object of the hardware config */
+		/** @hwconfig.bo: buffer object of the hardware config */
 		struct xe_bo *bo;
-		/** @size: size of the hardware config */
+		/** @hwconfig.size: size of the hardware config */
 		u32 size;
 	} hwconfig;
 
+	/** @relay: GuC Relay Communication used in SR-IOV */
+	struct xe_guc_relay relay;
+
 	/**
 	 * @notify_reg: Register which is written to notify GuC of H2G messages
 	 */
diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.c b/drivers/gpu/drm/xe/xe_heci_gsc.c
index bfdd33b9b2..1c9d38b6f5 100644
--- a/drivers/gpu/drm/xe/xe_heci_gsc.c
+++ b/drivers/gpu/drm/xe/xe_heci_gsc.c
@@ -29,7 +29,7 @@ static void heci_gsc_irq_unmask(struct irq_data *d)
 	/* generic irq handling */
 }
 
-static struct irq_chip heci_gsc_irq_chip = {
+static const struct irq_chip heci_gsc_irq_chip = {
 	.name = "gsc_irq_chip",
 	.irq_mask = heci_gsc_irq_mask,
 	.irq_unmask = heci_gsc_irq_unmask,
diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c
new file mode 100644
index 0000000000..2c32dc46f7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hmm.c
@@ -0,0 +1,253 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include <linux/scatterlist.h>
+#include <linux/mmu_notifier.h>
+#include <linux/dma-mapping.h>
+#include <linux/memremap.h>
+#include <linux/swap.h>
+#include <linux/hmm.h>
+#include <linux/mm.h>
+#include "xe_hmm.h"
+#include "xe_vm.h"
+#include "xe_bo.h"
+
+static u64 xe_npages_in_range(unsigned long start, unsigned long end)
+{
+	return (end - start) >> PAGE_SHIFT;
+}
+
+/*
+ * xe_mark_range_accessed() - mark a range is accessed, so core mm
+ * have such information for memory eviction or write back to
+ * hard disk
+ *
+ * @range: the range to mark
+ * @write: if write to this range, we mark pages in this range
+ * as dirty
+ */
+static void xe_mark_range_accessed(struct hmm_range *range, bool write)
+{
+	struct page *page;
+	u64 i, npages;
+
+	npages = xe_npages_in_range(range->start, range->end);
+	for (i = 0; i < npages; i++) {
+		page = hmm_pfn_to_page(range->hmm_pfns[i]);
+		if (write)
+			set_page_dirty_lock(page);
+
+		mark_page_accessed(page);
+	}
+}
+
+/*
+ * xe_build_sg() - build a scatter gather table for all the physical pages/pfn
+ * in a hmm_range. dma-map pages if necessary. dma-address is save in sg table
+ * and will be used to program GPU page table later.
+ *
+ * @xe: the xe device who will access the dma-address in sg table
+ * @range: the hmm range that we build the sg table from. range->hmm_pfns[]
+ * has the pfn numbers of pages that back up this hmm address range.
+ * @st: pointer to the sg table.
+ * @write: whether we write to this range. This decides dma map direction
+ * for system pages. If write we map it bi-diretional; otherwise
+ * DMA_TO_DEVICE
+ *
+ * All the contiguous pfns will be collapsed into one entry in
+ * the scatter gather table. This is for the purpose of efficiently
+ * programming GPU page table.
+ *
+ * The dma_address in the sg table will later be used by GPU to
+ * access memory. So if the memory is system memory, we need to
+ * do a dma-mapping so it can be accessed by GPU/DMA.
+ *
+ * FIXME: This function currently only support pages in system
+ * memory. If the memory is GPU local memory (of the GPU who
+ * is going to access memory), we need gpu dpa (device physical
+ * address), and there is no need of dma-mapping. This is TBD.
+ *
+ * FIXME: dma-mapping for peer gpu device to access remote gpu's
+ * memory. Add this when you support p2p
+ *
+ * This function allocates the storage of the sg table. It is
+ * caller's responsibility to free it calling sg_free_table.
+ *
+ * Returns 0 if successful; -ENOMEM if fails to allocate memory
+ */
+static int xe_build_sg(struct xe_device *xe, struct hmm_range *range,
+		       struct sg_table *st, bool write)
+{
+	struct device *dev = xe->drm.dev;
+	struct page **pages;
+	u64 i, npages;
+	int ret;
+
+	npages = xe_npages_in_range(range->start, range->end);
+	pages = kvmalloc_array(npages, sizeof(*pages), GFP_KERNEL);
+	if (!pages)
+		return -ENOMEM;
+
+	for (i = 0; i < npages; i++) {
+		pages[i] = hmm_pfn_to_page(range->hmm_pfns[i]);
+		xe_assert(xe, !is_device_private_page(pages[i]));
+	}
+
+	ret = sg_alloc_table_from_pages_segment(st, pages, npages, 0, npages << PAGE_SHIFT,
+						xe_sg_segment_size(dev), GFP_KERNEL);
+	if (ret)
+		goto free_pages;
+
+	ret = dma_map_sgtable(dev, st, write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE,
+			      DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING);
+	if (ret) {
+		sg_free_table(st);
+		st = NULL;
+	}
+
+free_pages:
+	kvfree(pages);
+	return ret;
+}
+
+/*
+ * xe_hmm_userptr_free_sg() - Free the scatter gather table of userptr
+ *
+ * @uvma: the userptr vma which hold the scatter gather table
+ *
+ * With function xe_userptr_populate_range, we allocate storage of
+ * the userptr sg table. This is a helper function to free this
+ * sg table, and dma unmap the address in the table.
+ */
+void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma)
+{
+	struct xe_userptr *userptr = &uvma->userptr;
+	struct xe_vma *vma = &uvma->vma;
+	bool write = !xe_vma_read_only(vma);
+	struct xe_vm *vm = xe_vma_vm(vma);
+	struct xe_device *xe = vm->xe;
+	struct device *dev = xe->drm.dev;
+
+	xe_assert(xe, userptr->sg);
+	dma_unmap_sgtable(dev, userptr->sg,
+			  write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, 0);
+
+	sg_free_table(userptr->sg);
+	userptr->sg = NULL;
+}
+
+/**
+ * xe_hmm_userptr_populate_range() - Populate physical pages of a virtual
+ * address range
+ *
+ * @uvma: userptr vma which has information of the range to populate.
+ * @is_mm_mmap_locked: True if mmap_read_lock is already acquired by caller.
+ *
+ * This function populate the physical pages of a virtual
+ * address range. The populated physical pages is saved in
+ * userptr's sg table. It is similar to get_user_pages but call
+ * hmm_range_fault.
+ *
+ * This function also read mmu notifier sequence # (
+ * mmu_interval_read_begin), for the purpose of later
+ * comparison (through mmu_interval_read_retry).
+ *
+ * This must be called with mmap read or write lock held.
+ *
+ * This function allocates the storage of the userptr sg table.
+ * It is caller's responsibility to free it calling sg_free_table.
+ *
+ * returns: 0 for succuss; negative error no on failure
+ */
+int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma,
+				  bool is_mm_mmap_locked)
+{
+	unsigned long timeout =
+		jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
+	unsigned long *pfns, flags = HMM_PFN_REQ_FAULT;
+	struct xe_userptr *userptr;
+	struct xe_vma *vma = &uvma->vma;
+	u64 userptr_start = xe_vma_userptr(vma);
+	u64 userptr_end = userptr_start + xe_vma_size(vma);
+	struct xe_vm *vm = xe_vma_vm(vma);
+	struct hmm_range hmm_range;
+	bool write = !xe_vma_read_only(vma);
+	unsigned long notifier_seq;
+	u64 npages;
+	int ret;
+
+	userptr = &uvma->userptr;
+
+	if (is_mm_mmap_locked)
+		mmap_assert_locked(userptr->notifier.mm);
+
+	if (vma->gpuva.flags & XE_VMA_DESTROYED)
+		return 0;
+
+	notifier_seq = mmu_interval_read_begin(&userptr->notifier);
+	if (notifier_seq == userptr->notifier_seq)
+		return 0;
+
+	if (userptr->sg)
+		xe_hmm_userptr_free_sg(uvma);
+
+	npages = xe_npages_in_range(userptr_start, userptr_end);
+	pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
+	if (unlikely(!pfns))
+		return -ENOMEM;
+
+	if (write)
+		flags |= HMM_PFN_REQ_WRITE;
+
+	if (!mmget_not_zero(userptr->notifier.mm)) {
+		ret = -EFAULT;
+		goto free_pfns;
+	}
+
+	hmm_range.default_flags = flags;
+	hmm_range.hmm_pfns = pfns;
+	hmm_range.notifier = &userptr->notifier;
+	hmm_range.start = userptr_start;
+	hmm_range.end = userptr_end;
+	hmm_range.dev_private_owner = vm->xe;
+
+	while (true) {
+		hmm_range.notifier_seq = mmu_interval_read_begin(&userptr->notifier);
+
+		if (!is_mm_mmap_locked)
+			mmap_read_lock(userptr->notifier.mm);
+
+		ret = hmm_range_fault(&hmm_range);
+
+		if (!is_mm_mmap_locked)
+			mmap_read_unlock(userptr->notifier.mm);
+
+		if (ret == -EBUSY) {
+			if (time_after(jiffies, timeout))
+				break;
+
+			continue;
+		}
+		break;
+	}
+
+	mmput(userptr->notifier.mm);
+
+	if (ret)
+		goto free_pfns;
+
+	ret = xe_build_sg(vm->xe, &hmm_range, &userptr->sgt, write);
+	if (ret)
+		goto free_pfns;
+
+	xe_mark_range_accessed(&hmm_range, write);
+	userptr->sg = &userptr->sgt;
+	userptr->notifier_seq = hmm_range.notifier_seq;
+
+free_pfns:
+	kvfree(pfns);
+	return ret;
+}
+
diff --git a/drivers/gpu/drm/xe/xe_hmm.h b/drivers/gpu/drm/xe/xe_hmm.h
new file mode 100644
index 0000000000..909dc2bdcd
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hmm.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include <linux/types.h>
+
+struct xe_userptr_vma;
+
+int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, bool is_mm_mmap_locked);
+void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma);
diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c
index 01b2d0bd26..39a484a575 100644
--- a/drivers/gpu/drm/xe/xe_huc.c
+++ b/drivers/gpu/drm/xe/xe_huc.c
@@ -58,8 +58,8 @@ static int huc_alloc_gsc_pkt(struct xe_huc *huc)
 	bo = xe_bo_create_pin_map(xe, gt_to_tile(gt), NULL,
 				  PXP43_HUC_AUTH_INOUT_SIZE * 2,
 				  ttm_bo_type_kernel,
-				  XE_BO_CREATE_SYSTEM_BIT |
-				  XE_BO_CREATE_GGTT_BIT);
+				  XE_BO_FLAG_SYSTEM |
+				  XE_BO_FLAG_GGTT);
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
 
@@ -105,6 +105,25 @@ out:
 	return ret;
 }
 
+int xe_huc_init_post_hwconfig(struct xe_huc *huc)
+{
+	struct xe_tile *tile = gt_to_tile(huc_to_gt(huc));
+	struct xe_device *xe = huc_to_xe(huc);
+	int ret;
+
+	if (!IS_DGFX(huc_to_xe(huc)))
+		return 0;
+
+	if (!xe_uc_fw_is_loadable(&huc->fw))
+		return 0;
+
+	ret = xe_managed_bo_reinit_in_vram(xe, tile, &huc->fw.bo);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
 int xe_huc_upload(struct xe_huc *huc)
 {
 	if (!xe_uc_fw_is_loadable(&huc->fw))
diff --git a/drivers/gpu/drm/xe/xe_huc.h b/drivers/gpu/drm/xe/xe_huc.h
index 5320172302..3ab56cc14b 100644
--- a/drivers/gpu/drm/xe/xe_huc.h
+++ b/drivers/gpu/drm/xe/xe_huc.h
@@ -17,6 +17,7 @@ enum xe_huc_auth_types {
 };
 
 int xe_huc_init(struct xe_huc *huc);
+int xe_huc_init_post_hwconfig(struct xe_huc *huc);
 int xe_huc_upload(struct xe_huc *huc);
 int xe_huc_auth(struct xe_huc *huc, enum xe_huc_auth_types type);
 bool xe_huc_is_authenticated(struct xe_huc *huc, enum xe_huc_auth_types type);
diff --git a/drivers/gpu/drm/xe/xe_huc_debugfs.c b/drivers/gpu/drm/xe/xe_huc_debugfs.c
index 18585a7eeb..3a888a4018 100644
--- a/drivers/gpu/drm/xe/xe_huc_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_huc_debugfs.c
@@ -12,6 +12,7 @@
 #include "xe_gt.h"
 #include "xe_huc.h"
 #include "xe_macros.h"
+#include "xe_pm.h"
 
 static struct xe_gt *
 huc_to_gt(struct xe_huc *huc)
@@ -36,9 +37,9 @@ static int huc_info(struct seq_file *m, void *data)
 	struct xe_device *xe = huc_to_xe(huc);
 	struct drm_printer p = drm_seq_file_printer(m);
 
-	xe_device_mem_access_get(xe);
+	xe_pm_runtime_get(xe);
 	xe_huc_print_info(huc, &p);
-	xe_device_mem_access_put(xe);
+	xe_pm_runtime_put(xe);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 1fa5cf5eea..455f375c1c 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -14,8 +14,10 @@
 #include "xe_device.h"
 #include "xe_execlist.h"
 #include "xe_force_wake.h"
+#include "xe_gsc.h"
 #include "xe_gt.h"
 #include "xe_gt_ccs_mode.h"
+#include "xe_gt_printk.h"
 #include "xe_gt_topology.h"
 #include "xe_hw_fence.h"
 #include "xe_irq.h"
@@ -25,6 +27,7 @@
 #include "xe_reg_sr.h"
 #include "xe_rtp.h"
 #include "xe_sched_job.h"
+#include "xe_sriov.h"
 #include "xe_tuning.h"
 #include "xe_uc_fw.h"
 #include "xe_wa.h"
@@ -34,6 +37,7 @@ struct engine_info {
 	const char *name;
 	unsigned int class : 8;
 	unsigned int instance : 8;
+	unsigned int irq_offset : 8;
 	enum xe_force_wake_domains domain;
 	u32 mmio_base;
 };
@@ -43,6 +47,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "rcs0",
 		.class = XE_ENGINE_CLASS_RENDER,
 		.instance = 0,
+		.irq_offset = ilog2(INTR_RCS0),
 		.domain = XE_FW_RENDER,
 		.mmio_base = RENDER_RING_BASE,
 	},
@@ -50,6 +55,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "bcs0",
 		.class = XE_ENGINE_CLASS_COPY,
 		.instance = 0,
+		.irq_offset = ilog2(INTR_BCS(0)),
 		.domain = XE_FW_RENDER,
 		.mmio_base = BLT_RING_BASE,
 	},
@@ -57,6 +63,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "bcs1",
 		.class = XE_ENGINE_CLASS_COPY,
 		.instance = 1,
+		.irq_offset = ilog2(INTR_BCS(1)),
 		.domain = XE_FW_RENDER,
 		.mmio_base = XEHPC_BCS1_RING_BASE,
 	},
@@ -64,6 +71,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "bcs2",
 		.class = XE_ENGINE_CLASS_COPY,
 		.instance = 2,
+		.irq_offset = ilog2(INTR_BCS(2)),
 		.domain = XE_FW_RENDER,
 		.mmio_base = XEHPC_BCS2_RING_BASE,
 	},
@@ -71,6 +79,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "bcs3",
 		.class = XE_ENGINE_CLASS_COPY,
 		.instance = 3,
+		.irq_offset = ilog2(INTR_BCS(3)),
 		.domain = XE_FW_RENDER,
 		.mmio_base = XEHPC_BCS3_RING_BASE,
 	},
@@ -78,6 +87,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "bcs4",
 		.class = XE_ENGINE_CLASS_COPY,
 		.instance = 4,
+		.irq_offset = ilog2(INTR_BCS(4)),
 		.domain = XE_FW_RENDER,
 		.mmio_base = XEHPC_BCS4_RING_BASE,
 	},
@@ -85,6 +95,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "bcs5",
 		.class = XE_ENGINE_CLASS_COPY,
 		.instance = 5,
+		.irq_offset = ilog2(INTR_BCS(5)),
 		.domain = XE_FW_RENDER,
 		.mmio_base = XEHPC_BCS5_RING_BASE,
 	},
@@ -92,12 +103,14 @@ static const struct engine_info engine_infos[] = {
 		.name = "bcs6",
 		.class = XE_ENGINE_CLASS_COPY,
 		.instance = 6,
+		.irq_offset = ilog2(INTR_BCS(6)),
 		.domain = XE_FW_RENDER,
 		.mmio_base = XEHPC_BCS6_RING_BASE,
 	},
 	[XE_HW_ENGINE_BCS7] = {
 		.name = "bcs7",
 		.class = XE_ENGINE_CLASS_COPY,
+		.irq_offset = ilog2(INTR_BCS(7)),
 		.instance = 7,
 		.domain = XE_FW_RENDER,
 		.mmio_base = XEHPC_BCS7_RING_BASE,
@@ -106,6 +119,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "bcs8",
 		.class = XE_ENGINE_CLASS_COPY,
 		.instance = 8,
+		.irq_offset = ilog2(INTR_BCS8),
 		.domain = XE_FW_RENDER,
 		.mmio_base = XEHPC_BCS8_RING_BASE,
 	},
@@ -114,6 +128,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "vcs0",
 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 		.instance = 0,
+		.irq_offset = 32 + ilog2(INTR_VCS(0)),
 		.domain = XE_FW_MEDIA_VDBOX0,
 		.mmio_base = BSD_RING_BASE,
 	},
@@ -121,6 +136,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "vcs1",
 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 		.instance = 1,
+		.irq_offset = 32 + ilog2(INTR_VCS(1)),
 		.domain = XE_FW_MEDIA_VDBOX1,
 		.mmio_base = BSD2_RING_BASE,
 	},
@@ -128,6 +144,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "vcs2",
 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 		.instance = 2,
+		.irq_offset = 32 + ilog2(INTR_VCS(2)),
 		.domain = XE_FW_MEDIA_VDBOX2,
 		.mmio_base = BSD3_RING_BASE,
 	},
@@ -135,6 +152,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "vcs3",
 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 		.instance = 3,
+		.irq_offset = 32 + ilog2(INTR_VCS(3)),
 		.domain = XE_FW_MEDIA_VDBOX3,
 		.mmio_base = BSD4_RING_BASE,
 	},
@@ -142,6 +160,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "vcs4",
 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 		.instance = 4,
+		.irq_offset = 32 + ilog2(INTR_VCS(4)),
 		.domain = XE_FW_MEDIA_VDBOX4,
 		.mmio_base = XEHP_BSD5_RING_BASE,
 	},
@@ -149,6 +168,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "vcs5",
 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 		.instance = 5,
+		.irq_offset = 32 + ilog2(INTR_VCS(5)),
 		.domain = XE_FW_MEDIA_VDBOX5,
 		.mmio_base = XEHP_BSD6_RING_BASE,
 	},
@@ -156,6 +176,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "vcs6",
 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 		.instance = 6,
+		.irq_offset = 32 + ilog2(INTR_VCS(6)),
 		.domain = XE_FW_MEDIA_VDBOX6,
 		.mmio_base = XEHP_BSD7_RING_BASE,
 	},
@@ -163,6 +184,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "vcs7",
 		.class = XE_ENGINE_CLASS_VIDEO_DECODE,
 		.instance = 7,
+		.irq_offset = 32 + ilog2(INTR_VCS(7)),
 		.domain = XE_FW_MEDIA_VDBOX7,
 		.mmio_base = XEHP_BSD8_RING_BASE,
 	},
@@ -170,6 +192,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "vecs0",
 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
 		.instance = 0,
+		.irq_offset = 32 + ilog2(INTR_VECS(0)),
 		.domain = XE_FW_MEDIA_VEBOX0,
 		.mmio_base = VEBOX_RING_BASE,
 	},
@@ -177,6 +200,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "vecs1",
 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
 		.instance = 1,
+		.irq_offset = 32 + ilog2(INTR_VECS(1)),
 		.domain = XE_FW_MEDIA_VEBOX1,
 		.mmio_base = VEBOX2_RING_BASE,
 	},
@@ -184,6 +208,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "vecs2",
 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
 		.instance = 2,
+		.irq_offset = 32 + ilog2(INTR_VECS(2)),
 		.domain = XE_FW_MEDIA_VEBOX2,
 		.mmio_base = XEHP_VEBOX3_RING_BASE,
 	},
@@ -191,6 +216,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "vecs3",
 		.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
 		.instance = 3,
+		.irq_offset = 32 + ilog2(INTR_VECS(3)),
 		.domain = XE_FW_MEDIA_VEBOX3,
 		.mmio_base = XEHP_VEBOX4_RING_BASE,
 	},
@@ -198,6 +224,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "ccs0",
 		.class = XE_ENGINE_CLASS_COMPUTE,
 		.instance = 0,
+		.irq_offset = ilog2(INTR_CCS(0)),
 		.domain = XE_FW_RENDER,
 		.mmio_base = COMPUTE0_RING_BASE,
 	},
@@ -205,6 +232,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "ccs1",
 		.class = XE_ENGINE_CLASS_COMPUTE,
 		.instance = 1,
+		.irq_offset = ilog2(INTR_CCS(1)),
 		.domain = XE_FW_RENDER,
 		.mmio_base = COMPUTE1_RING_BASE,
 	},
@@ -212,6 +240,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "ccs2",
 		.class = XE_ENGINE_CLASS_COMPUTE,
 		.instance = 2,
+		.irq_offset = ilog2(INTR_CCS(2)),
 		.domain = XE_FW_RENDER,
 		.mmio_base = COMPUTE2_RING_BASE,
 	},
@@ -219,6 +248,7 @@ static const struct engine_info engine_infos[] = {
 		.name = "ccs3",
 		.class = XE_ENGINE_CLASS_COMPUTE,
 		.instance = 3,
+		.irq_offset = ilog2(INTR_CCS(3)),
 		.domain = XE_FW_RENDER,
 		.mmio_base = COMPUTE3_RING_BASE,
 	},
@@ -289,6 +319,19 @@ static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_gt *gt,
 	       xe_rtp_match_first_render_or_compute(gt, hwe);
 }
 
+static bool xe_rtp_cfeg_wmtp_disabled(const struct xe_gt *gt,
+				      const struct xe_hw_engine *hwe)
+{
+	if (GRAPHICS_VER(gt_to_xe(gt)) < 20)
+		return false;
+
+	if (hwe->class != XE_ENGINE_CLASS_COMPUTE &&
+	    hwe->class != XE_ENGINE_CLASS_RENDER)
+		return false;
+
+	return xe_mmio_read32(hwe->gt, XEHP_FUSE4) & CFEG_WMTP_DISABLE;
+}
+
 void
 xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
 {
@@ -319,6 +362,14 @@ xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
 		  XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE,
 					   RCU_MODE_FIXED_SLICE_CCS_MODE))
 		},
+		/* Disable WMTP if HW doesn't support it */
+		{ XE_RTP_NAME("DISABLE_WMTP_ON_UNSUPPORTED_HW"),
+		  XE_RTP_RULES(FUNC(xe_rtp_cfeg_wmtp_disabled)),
+		  XE_RTP_ACTIONS(FIELD_SET(CS_CHICKEN1(0),
+					   PREEMPT_GPGPU_LEVEL_MASK,
+					   PREEMPT_GPGPU_THREAD_GROUP_LEVEL)),
+		  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE)
+		},
 		{}
 	};
 
@@ -397,6 +448,7 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
 	hwe->class = info->class;
 	hwe->instance = info->instance;
 	hwe->mmio_base = info->mmio_base;
+	hwe->irq_offset = info->irq_offset;
 	hwe->domain = info->domain;
 	hwe->name = info->name;
 	hwe->fence_irq = &gt->fence_irq[info->class];
@@ -413,6 +465,32 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
 		hwe->eclass->sched_props.preempt_timeout_us = XE_HW_ENGINE_PREEMPT_TIMEOUT;
 		hwe->eclass->sched_props.preempt_timeout_min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN;
 		hwe->eclass->sched_props.preempt_timeout_max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX;
+
+		/*
+		 * The GSC engine can accept submissions while the GSC shim is
+		 * being reset, during which time the submission is stalled. In
+		 * the worst case, the shim reset can take up to the maximum GSC
+		 * command execution time (250ms), so the request start can be
+		 * delayed by that much; the request itself can take that long
+		 * without being preemptible, which means worst case it can
+		 * theoretically take up to 500ms for a preemption to go through
+		 * on the GSC engine. Adding to that an extra 100ms as a safety
+		 * margin, we get a minimum recommended timeout of 600ms.
+		 * The preempt_timeout value can't be tuned for OTHER_CLASS
+		 * because the class is reserved for kernel usage, so we just
+		 * need to make sure that the starting value is above that
+		 * threshold; since our default value (640ms) is greater than
+		 * 600ms, the only way we can go below is via a kconfig setting.
+		 * If that happens, log it in dmesg and update the value.
+		 */
+		if (hwe->class == XE_ENGINE_CLASS_OTHER) {
+			const u32 min_preempt_timeout = 600 * 1000;
+			if (hwe->eclass->sched_props.preempt_timeout_us < min_preempt_timeout) {
+				hwe->eclass->sched_props.preempt_timeout_us = min_preempt_timeout;
+				xe_gt_notice(gt, "Increasing preempt_timeout for GSC to 600ms\n");
+			}
+		}
+
 		/* Record default props */
 		hwe->eclass->defaults = hwe->eclass->sched_props;
 	}
@@ -440,8 +518,9 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
 	xe_reg_sr_apply_whitelist(hwe);
 
 	hwe->hwsp = xe_managed_bo_create_pin_map(xe, tile, SZ_4K,
-						 XE_BO_CREATE_VRAM_IF_DGFX(tile) |
-						 XE_BO_CREATE_GGTT_BIT);
+						 XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+						 XE_BO_FLAG_GGTT |
+						 XE_BO_FLAG_GGTT_INVALIDATE);
 	if (IS_ERR(hwe->hwsp)) {
 		err = PTR_ERR(hwe->hwsp);
 		goto err_name;
@@ -459,18 +538,19 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
 		}
 	}
 
-	if (xe_device_uc_enabled(xe))
+	if (xe_device_uc_enabled(xe)) {
+		/* GSCCS has a special interrupt for reset */
+		if (hwe->class == XE_ENGINE_CLASS_OTHER)
+			hwe->irq_handler = xe_gsc_hwe_irq_handler;
+
 		xe_hw_engine_enable_ring(hwe);
+	}
 
 	/* We reserve the highest BCS instance for USM */
 	if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY)
 		gt->usm.reserved_bcs_instance = hwe->instance;
 
-	err = drmm_add_action_or_reset(&xe->drm, hw_engine_fini, hwe);
-	if (err)
-		return err;
-
-	return 0;
+	return drmm_add_action_or_reset(&xe->drm, hw_engine_fini, hwe);
 
 err_kernel_lrc:
 	xe_lrc_finish(&hwe->kernel_lrc);
@@ -700,7 +780,7 @@ struct xe_hw_engine_snapshot *
 xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
 {
 	struct xe_hw_engine_snapshot *snapshot;
-	int len;
+	u64 val;
 
 	if (!xe_hw_engine_is_valid(hwe))
 		return NULL;
@@ -710,11 +790,7 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
 	if (!snapshot)
 		return NULL;
 
-	len = strlen(hwe->name) + 1;
-	snapshot->name = kzalloc(len, GFP_ATOMIC);
-	if (snapshot->name)
-		strscpy(snapshot->name, hwe->name, len);
-
+	snapshot->name = kstrdup(hwe->name, GFP_ATOMIC);
 	snapshot->class = hwe->class;
 	snapshot->logical_instance = hwe->logical_instance;
 	snapshot->forcewake.domain = hwe->domain;
@@ -722,19 +798,35 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
 						    hwe->domain);
 	snapshot->mmio_base = hwe->mmio_base;
 
-	snapshot->reg.ring_hwstam = hw_engine_mmio_read32(hwe, RING_HWSTAM(0));
-	snapshot->reg.ring_hws_pga = hw_engine_mmio_read32(hwe,
-							   RING_HWS_PGA(0));
-	snapshot->reg.ring_execlist_status_lo =
+	/* no more VF accessible data below this point */
+	if (IS_SRIOV_VF(gt_to_xe(hwe->gt)))
+		return snapshot;
+
+	snapshot->reg.ring_execlist_status =
 		hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0));
-	snapshot->reg.ring_execlist_status_hi =
-		hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0));
-	snapshot->reg.ring_execlist_sq_contents_lo =
-		hw_engine_mmio_read32(hwe,
-				      RING_EXECLIST_SQ_CONTENTS_LO(0));
-	snapshot->reg.ring_execlist_sq_contents_hi =
-		hw_engine_mmio_read32(hwe,
-				      RING_EXECLIST_SQ_CONTENTS_HI(0));
+	val = hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0));
+	snapshot->reg.ring_execlist_status |= val << 32;
+
+	snapshot->reg.ring_execlist_sq_contents =
+		hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_LO(0));
+	val = hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_HI(0));
+	snapshot->reg.ring_execlist_sq_contents |= val << 32;
+
+	snapshot->reg.ring_acthd = hw_engine_mmio_read32(hwe, RING_ACTHD(0));
+	val = hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0));
+	snapshot->reg.ring_acthd |= val << 32;
+
+	snapshot->reg.ring_bbaddr = hw_engine_mmio_read32(hwe, RING_BBADDR(0));
+	val = hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0));
+	snapshot->reg.ring_bbaddr |= val << 32;
+
+	snapshot->reg.ring_dma_fadd =
+		hw_engine_mmio_read32(hwe, RING_DMA_FADD(0));
+	val = hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0));
+	snapshot->reg.ring_dma_fadd |= val << 32;
+
+	snapshot->reg.ring_hwstam = hw_engine_mmio_read32(hwe, RING_HWSTAM(0));
+	snapshot->reg.ring_hws_pga = hw_engine_mmio_read32(hwe, RING_HWS_PGA(0));
 	snapshot->reg.ring_start = hw_engine_mmio_read32(hwe, RING_START(0));
 	snapshot->reg.ring_head =
 		hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR;
@@ -748,16 +840,6 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
 	snapshot->reg.ring_esr = hw_engine_mmio_read32(hwe, RING_ESR(0));
 	snapshot->reg.ring_emr = hw_engine_mmio_read32(hwe, RING_EMR(0));
 	snapshot->reg.ring_eir = hw_engine_mmio_read32(hwe, RING_EIR(0));
-	snapshot->reg.ring_acthd_udw =
-		hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0));
-	snapshot->reg.ring_acthd = hw_engine_mmio_read32(hwe, RING_ACTHD(0));
-	snapshot->reg.ring_bbaddr_udw =
-		hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0));
-	snapshot->reg.ring_bbaddr = hw_engine_mmio_read32(hwe, RING_BBADDR(0));
-	snapshot->reg.ring_dma_fadd_udw =
-		hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0));
-	snapshot->reg.ring_dma_fadd =
-		hw_engine_mmio_read32(hwe, RING_DMA_FADD(0));
 	snapshot->reg.ipehr = hw_engine_mmio_read32(hwe, RING_IPEHR(0));
 
 	if (snapshot->class == XE_ENGINE_CLASS_COMPUTE)
@@ -786,33 +868,25 @@ void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot,
 		   snapshot->forcewake.domain, snapshot->forcewake.ref);
 	drm_printf(p, "\tHWSTAM: 0x%08x\n", snapshot->reg.ring_hwstam);
 	drm_printf(p, "\tRING_HWS_PGA: 0x%08x\n", snapshot->reg.ring_hws_pga);
-	drm_printf(p, "\tRING_EXECLIST_STATUS_LO: 0x%08x\n",
-		   snapshot->reg.ring_execlist_status_lo);
-	drm_printf(p, "\tRING_EXECLIST_STATUS_HI: 0x%08x\n",
-		   snapshot->reg.ring_execlist_status_hi);
-	drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_LO: 0x%08x\n",
-		   snapshot->reg.ring_execlist_sq_contents_lo);
-	drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_HI: 0x%08x\n",
-		   snapshot->reg.ring_execlist_sq_contents_hi);
+	drm_printf(p, "\tRING_EXECLIST_STATUS: 0x%016llx\n",
+		   snapshot->reg.ring_execlist_status);
+	drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS: 0x%016llx\n",
+		   snapshot->reg.ring_execlist_sq_contents);
 	drm_printf(p, "\tRING_START: 0x%08x\n", snapshot->reg.ring_start);
-	drm_printf(p, "\tRING_HEAD:  0x%08x\n", snapshot->reg.ring_head);
-	drm_printf(p, "\tRING_TAIL:  0x%08x\n", snapshot->reg.ring_tail);
+	drm_printf(p, "\tRING_HEAD: 0x%08x\n", snapshot->reg.ring_head);
+	drm_printf(p, "\tRING_TAIL: 0x%08x\n", snapshot->reg.ring_tail);
 	drm_printf(p, "\tRING_CTL: 0x%08x\n", snapshot->reg.ring_ctl);
 	drm_printf(p, "\tRING_MI_MODE: 0x%08x\n", snapshot->reg.ring_mi_mode);
 	drm_printf(p, "\tRING_MODE: 0x%08x\n",
 		   snapshot->reg.ring_mode);
-	drm_printf(p, "\tRING_IMR:   0x%08x\n", snapshot->reg.ring_imr);
-	drm_printf(p, "\tRING_ESR:   0x%08x\n", snapshot->reg.ring_esr);
-	drm_printf(p, "\tRING_EMR:   0x%08x\n", snapshot->reg.ring_emr);
-	drm_printf(p, "\tRING_EIR:   0x%08x\n", snapshot->reg.ring_eir);
-	drm_printf(p, "\tACTHD:  0x%08x_%08x\n", snapshot->reg.ring_acthd_udw,
-		   snapshot->reg.ring_acthd);
-	drm_printf(p, "\tBBADDR: 0x%08x_%08x\n", snapshot->reg.ring_bbaddr_udw,
-		   snapshot->reg.ring_bbaddr);
-	drm_printf(p, "\tDMA_FADDR: 0x%08x_%08x\n",
-		   snapshot->reg.ring_dma_fadd_udw,
-		   snapshot->reg.ring_dma_fadd);
-	drm_printf(p, "\tIPEHR: 0x%08x\n\n", snapshot->reg.ipehr);
+	drm_printf(p, "\tRING_IMR: 0x%08x\n", snapshot->reg.ring_imr);
+	drm_printf(p, "\tRING_ESR: 0x%08x\n", snapshot->reg.ring_esr);
+	drm_printf(p, "\tRING_EMR: 0x%08x\n", snapshot->reg.ring_emr);
+	drm_printf(p, "\tRING_EIR: 0x%08x\n", snapshot->reg.ring_eir);
+	drm_printf(p, "\tACTHD: 0x%016llx\n", snapshot->reg.ring_acthd);
+	drm_printf(p, "\tBBADDR: 0x%016llx\n", snapshot->reg.ring_bbaddr);
+	drm_printf(p, "\tDMA_FADDR: 0x%016llx\n", snapshot->reg.ring_dma_fadd);
+	drm_printf(p, "\tIPEHR: 0x%08x\n", snapshot->reg.ipehr);
 	if (snapshot->class == XE_ENGINE_CLASS_COMPUTE)
 		drm_printf(p, "\tRCU_MODE: 0x%08x\n",
 			   snapshot->reg.rcu_mode);
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
index e49bc14f0e..844ec68cbb 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
@@ -7,8 +7,10 @@
 #include <linux/kobject.h>
 #include <linux/sysfs.h>
 
+#include "xe_device.h"
 #include "xe_gt.h"
 #include "xe_hw_engine_class_sysfs.h"
+#include "xe_pm.h"
 
 #define MAX_ENGINE_CLASS_NAME_LEN    16
 static int xe_add_hw_engine_class_defaults(struct xe_device *xe,
@@ -70,10 +72,10 @@ static ssize_t job_timeout_max_show(struct kobject *kobj,
 {
 	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
 
-	return sprintf(buf, "%u\n", eclass->sched_props.job_timeout_max);
+	return sysfs_emit(buf, "%u\n", eclass->sched_props.job_timeout_max);
 }
 
-static struct kobj_attribute job_timeout_max_attr =
+static const struct kobj_attribute job_timeout_max_attr =
 __ATTR(job_timeout_max, 0644, job_timeout_max_show, job_timeout_max_store);
 
 static ssize_t job_timeout_min_store(struct kobject *kobj,
@@ -106,10 +108,10 @@ static ssize_t job_timeout_min_show(struct kobject *kobj,
 {
 	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
 
-	return sprintf(buf, "%u\n", eclass->sched_props.job_timeout_min);
+	return sysfs_emit(buf, "%u\n", eclass->sched_props.job_timeout_min);
 }
 
-static struct kobj_attribute job_timeout_min_attr =
+static const struct kobj_attribute job_timeout_min_attr =
 __ATTR(job_timeout_min, 0644, job_timeout_min_show, job_timeout_min_store);
 
 static ssize_t job_timeout_store(struct kobject *kobj,
@@ -139,10 +141,10 @@ static ssize_t job_timeout_show(struct kobject *kobj,
 {
 	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
 
-	return sprintf(buf, "%u\n", eclass->sched_props.job_timeout_ms);
+	return sysfs_emit(buf, "%u\n", eclass->sched_props.job_timeout_ms);
 }
 
-static struct kobj_attribute job_timeout_attr =
+static const struct kobj_attribute job_timeout_attr =
 __ATTR(job_timeout_ms, 0644, job_timeout_show, job_timeout_store);
 
 static ssize_t job_timeout_default(struct kobject *kobj,
@@ -150,10 +152,10 @@ static ssize_t job_timeout_default(struct kobject *kobj,
 {
 	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
 
-	return sprintf(buf, "%u\n", eclass->defaults.job_timeout_ms);
+	return sysfs_emit(buf, "%u\n", eclass->defaults.job_timeout_ms);
 }
 
-static struct kobj_attribute job_timeout_def =
+static const struct kobj_attribute job_timeout_def =
 __ATTR(job_timeout_ms, 0444, job_timeout_default, NULL);
 
 static ssize_t job_timeout_min_default(struct kobject *kobj,
@@ -161,10 +163,10 @@ static ssize_t job_timeout_min_default(struct kobject *kobj,
 {
 	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
 
-	return sprintf(buf, "%u\n", eclass->defaults.job_timeout_min);
+	return sysfs_emit(buf, "%u\n", eclass->defaults.job_timeout_min);
 }
 
-static struct kobj_attribute job_timeout_min_def =
+static const struct kobj_attribute job_timeout_min_def =
 __ATTR(job_timeout_min, 0444, job_timeout_min_default, NULL);
 
 static ssize_t job_timeout_max_default(struct kobject *kobj,
@@ -172,10 +174,10 @@ static ssize_t job_timeout_max_default(struct kobject *kobj,
 {
 	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
 
-	return sprintf(buf, "%u\n", eclass->defaults.job_timeout_max);
+	return sysfs_emit(buf, "%u\n", eclass->defaults.job_timeout_max);
 }
 
-static struct kobj_attribute job_timeout_max_def =
+static const struct kobj_attribute job_timeout_max_def =
 __ATTR(job_timeout_max, 0444, job_timeout_max_default, NULL);
 
 static ssize_t timeslice_duration_store(struct kobject *kobj,
@@ -231,10 +233,10 @@ static ssize_t timeslice_duration_max_show(struct kobject *kobj,
 {
 	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
 
-	return sprintf(buf, "%u\n", eclass->sched_props.timeslice_max);
+	return sysfs_emit(buf, "%u\n", eclass->sched_props.timeslice_max);
 }
 
-static struct kobj_attribute timeslice_duration_max_attr =
+static const struct kobj_attribute timeslice_duration_max_attr =
 	__ATTR(timeslice_duration_max, 0644, timeslice_duration_max_show,
 	       timeslice_duration_max_store);
 
@@ -269,10 +271,10 @@ static ssize_t timeslice_duration_min_show(struct kobject *kobj,
 {
 	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
 
-	return sprintf(buf, "%u\n", eclass->sched_props.timeslice_min);
+	return sysfs_emit(buf, "%u\n", eclass->sched_props.timeslice_min);
 }
 
-static struct kobj_attribute timeslice_duration_min_attr =
+static const struct kobj_attribute timeslice_duration_min_attr =
 	__ATTR(timeslice_duration_min, 0644, timeslice_duration_min_show,
 	       timeslice_duration_min_store);
 
@@ -281,10 +283,10 @@ static ssize_t timeslice_duration_show(struct kobject *kobj,
 {
 	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
 
-	return sprintf(buf, "%u\n", eclass->sched_props.timeslice_us);
+	return sysfs_emit(buf, "%u\n", eclass->sched_props.timeslice_us);
 }
 
-static struct kobj_attribute timeslice_duration_attr =
+static const struct kobj_attribute timeslice_duration_attr =
 	__ATTR(timeslice_duration_us, 0644, timeslice_duration_show,
 	       timeslice_duration_store);
 
@@ -293,10 +295,10 @@ static ssize_t timeslice_default(struct kobject *kobj,
 {
 	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
 
-	return sprintf(buf, "%u\n", eclass->defaults.timeslice_us);
+	return sysfs_emit(buf, "%u\n", eclass->defaults.timeslice_us);
 }
 
-static struct kobj_attribute timeslice_duration_def =
+static const struct kobj_attribute timeslice_duration_def =
 __ATTR(timeslice_duration_us, 0444, timeslice_default, NULL);
 
 static ssize_t timeslice_min_default(struct kobject *kobj,
@@ -304,10 +306,10 @@ static ssize_t timeslice_min_default(struct kobject *kobj,
 {
 	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
 
-	return sprintf(buf, "%u\n", eclass->defaults.timeslice_min);
+	return sysfs_emit(buf, "%u\n", eclass->defaults.timeslice_min);
 }
 
-static struct kobj_attribute timeslice_duration_min_def =
+static const struct kobj_attribute timeslice_duration_min_def =
 __ATTR(timeslice_duration_min, 0444, timeslice_min_default, NULL);
 
 static ssize_t timeslice_max_default(struct kobject *kobj,
@@ -315,10 +317,10 @@ static ssize_t timeslice_max_default(struct kobject *kobj,
 {
 	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
 
-	return sprintf(buf, "%u\n", eclass->defaults.timeslice_max);
+	return sysfs_emit(buf, "%u\n", eclass->defaults.timeslice_max);
 }
 
-static struct kobj_attribute timeslice_duration_max_def =
+static const struct kobj_attribute timeslice_duration_max_def =
 __ATTR(timeslice_duration_max, 0444, timeslice_max_default, NULL);
 
 static ssize_t preempt_timeout_store(struct kobject *kobj,
@@ -348,10 +350,10 @@ static ssize_t preempt_timeout_show(struct kobject *kobj,
 {
 	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
 
-	return sprintf(buf, "%u\n", eclass->sched_props.preempt_timeout_us);
+	return sysfs_emit(buf, "%u\n", eclass->sched_props.preempt_timeout_us);
 }
 
-static struct kobj_attribute preempt_timeout_attr =
+static const struct kobj_attribute preempt_timeout_attr =
 __ATTR(preempt_timeout_us, 0644, preempt_timeout_show, preempt_timeout_store);
 
 static ssize_t preempt_timeout_default(struct kobject *kobj,
@@ -360,10 +362,10 @@ static ssize_t preempt_timeout_default(struct kobject *kobj,
 {
 	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
 
-	return sprintf(buf, "%u\n", eclass->defaults.preempt_timeout_us);
+	return sysfs_emit(buf, "%u\n", eclass->defaults.preempt_timeout_us);
 }
 
-static struct kobj_attribute preempt_timeout_def =
+static const struct kobj_attribute preempt_timeout_def =
 __ATTR(preempt_timeout_us, 0444, preempt_timeout_default, NULL);
 
 static ssize_t preempt_timeout_min_default(struct kobject *kobj,
@@ -372,10 +374,10 @@ static ssize_t preempt_timeout_min_default(struct kobject *kobj,
 {
 	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
 
-	return sprintf(buf, "%u\n", eclass->defaults.preempt_timeout_min);
+	return sysfs_emit(buf, "%u\n", eclass->defaults.preempt_timeout_min);
 }
 
-static struct kobj_attribute preempt_timeout_min_def =
+static const struct kobj_attribute preempt_timeout_min_def =
 __ATTR(preempt_timeout_min, 0444, preempt_timeout_min_default, NULL);
 
 static ssize_t preempt_timeout_max_default(struct kobject *kobj,
@@ -384,10 +386,10 @@ static ssize_t preempt_timeout_max_default(struct kobject *kobj,
 {
 	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent);
 
-	return sprintf(buf, "%u\n", eclass->defaults.preempt_timeout_max);
+	return sysfs_emit(buf, "%u\n", eclass->defaults.preempt_timeout_max);
 }
 
-static struct kobj_attribute preempt_timeout_max_def =
+static const struct kobj_attribute preempt_timeout_max_def =
 __ATTR(preempt_timeout_max, 0444, preempt_timeout_max_default, NULL);
 
 static ssize_t preempt_timeout_max_store(struct kobject *kobj,
@@ -420,10 +422,10 @@ static ssize_t preempt_timeout_max_show(struct kobject *kobj,
 {
 	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
 
-	return sprintf(buf, "%u\n", eclass->sched_props.preempt_timeout_max);
+	return sysfs_emit(buf, "%u\n", eclass->sched_props.preempt_timeout_max);
 }
 
-static struct kobj_attribute preempt_timeout_max_attr =
+static const struct kobj_attribute preempt_timeout_max_attr =
 	__ATTR(preempt_timeout_max, 0644, preempt_timeout_max_show,
 	       preempt_timeout_max_store);
 
@@ -457,10 +459,10 @@ static ssize_t preempt_timeout_min_show(struct kobject *kobj,
 {
 	struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj);
 
-	return sprintf(buf, "%u\n", eclass->sched_props.preempt_timeout_min);
+	return sysfs_emit(buf, "%u\n", eclass->sched_props.preempt_timeout_min);
 }
 
-static struct kobj_attribute preempt_timeout_min_attr =
+static const struct kobj_attribute preempt_timeout_min_attr =
 	__ATTR(preempt_timeout_min, 0644, preempt_timeout_min_show,
 	       preempt_timeout_min_store);
 
@@ -477,7 +479,7 @@ static const struct attribute *defaults[] = {
 	NULL
 };
 
-static const struct attribute *files[] = {
+static const struct attribute * const files[] = {
 	&job_timeout_attr.attr,
 	&job_timeout_min_attr.attr,
 	&job_timeout_max_attr.attr,
@@ -498,8 +500,8 @@ static void kobj_xe_hw_engine_class_fini(struct drm_device *drm, void *arg)
 	kobject_put(kobj);
 }
 
-	static struct kobj_eclass *
-kobj_xe_hw_engine_class(struct xe_device *xe, struct kobject *parent, char *name)
+static struct kobj_eclass *
+kobj_xe_hw_engine_class(struct xe_device *xe, struct kobject *parent, const char *name)
 {
 	struct kobj_eclass *keclass;
 	int err = 0;
@@ -513,13 +515,13 @@ kobj_xe_hw_engine_class(struct xe_device *xe, struct kobject *parent, char *name
 		kobject_put(&keclass->base);
 		return NULL;
 	}
+	keclass->xe = xe;
 
 	err = drmm_add_action_or_reset(&xe->drm, kobj_xe_hw_engine_class_fini,
 				       &keclass->base);
 	if (err)
-		drm_warn(&xe->drm,
-			 "%s: drmm_add_action_or_reset failed, err: %d\n",
-			 __func__, err);
+		return NULL;
+
 	return keclass;
 }
 
@@ -550,13 +552,8 @@ static int xe_add_hw_engine_class_defaults(struct xe_device *xe,
 	if (err)
 		goto err_object;
 
-	err = drmm_add_action_or_reset(&xe->drm, hw_engine_class_defaults_fini,
-				       kobj);
-	if (err)
-		drm_warn(&xe->drm,
-			 "%s: drmm_add_action_or_reset failed, err: %d\n",
-			 __func__, err);
-	return err;
+	return drmm_add_action_or_reset(&xe->drm, hw_engine_class_defaults_fini, kobj);
+
 err_object:
 	kobject_put(kobj);
 	return err;
@@ -567,9 +564,51 @@ static void xe_hw_engine_sysfs_kobj_release(struct kobject *kobj)
 	kfree(kobj);
 }
 
+static ssize_t xe_hw_engine_class_sysfs_attr_show(struct kobject *kobj,
+						  struct attribute *attr,
+						  char *buf)
+{
+	struct xe_device *xe = kobj_to_xe(kobj);
+	struct kobj_attribute *kattr;
+	ssize_t ret = -EIO;
+
+	kattr = container_of(attr, struct kobj_attribute, attr);
+	if (kattr->show) {
+		xe_pm_runtime_get(xe);
+		ret = kattr->show(kobj, kattr, buf);
+		xe_pm_runtime_put(xe);
+	}
+
+	return ret;
+}
+
+static ssize_t xe_hw_engine_class_sysfs_attr_store(struct kobject *kobj,
+						   struct attribute *attr,
+						   const char *buf,
+						   size_t count)
+{
+	struct xe_device *xe = kobj_to_xe(kobj);
+	struct kobj_attribute *kattr;
+	ssize_t ret = -EIO;
+
+	kattr = container_of(attr, struct kobj_attribute, attr);
+	if (kattr->store) {
+		xe_pm_runtime_get(xe);
+		ret = kattr->store(kobj, kattr, buf, count);
+		xe_pm_runtime_put(xe);
+	}
+
+	return ret;
+}
+
+static const struct sysfs_ops xe_hw_engine_class_sysfs_ops = {
+	.show = xe_hw_engine_class_sysfs_attr_show,
+	.store = xe_hw_engine_class_sysfs_attr_store,
+};
+
 static const struct kobj_type xe_hw_engine_sysfs_kobj_type = {
 	.release = xe_hw_engine_sysfs_kobj_release,
-	.sysfs_ops = &kobj_sysfs_ops,
+	.sysfs_ops = &xe_hw_engine_class_sysfs_ops,
 };
 
 static void hw_engine_class_sysfs_fini(struct drm_device *drm, void *arg)
@@ -579,6 +618,24 @@ static void hw_engine_class_sysfs_fini(struct drm_device *drm, void *arg)
 	kobject_put(kobj);
 }
 
+static const char *xe_hw_engine_class_to_str(enum xe_engine_class class)
+{
+	switch (class) {
+	case XE_ENGINE_CLASS_RENDER:
+		return "rcs";
+	case XE_ENGINE_CLASS_VIDEO_DECODE:
+		return "vcs";
+	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
+		return "vecs";
+	case XE_ENGINE_CLASS_COPY:
+		return "bcs";
+	case XE_ENGINE_CLASS_COMPUTE:
+		return "ccs";
+	default:
+		return NULL;
+	}
+}
+
 /**
  * xe_hw_engine_class_sysfs_init - Init HW engine classes on GT.
  * @gt: Xe GT.
@@ -608,7 +665,7 @@ int xe_hw_engine_class_sysfs_init(struct xe_gt *gt)
 		goto err_object;
 
 	for_each_hw_engine(hwe, gt, id) {
-		char name[MAX_ENGINE_CLASS_NAME_LEN];
+		const char *name;
 		struct kobj_eclass *keclass;
 
 		if (hwe->class == XE_ENGINE_CLASS_OTHER ||
@@ -619,24 +676,8 @@ int xe_hw_engine_class_sysfs_init(struct xe_gt *gt)
 			continue;
 
 		class_mask |= 1 << hwe->class;
-
-		switch (hwe->class) {
-		case XE_ENGINE_CLASS_RENDER:
-			strcpy(name, "rcs");
-			break;
-		case XE_ENGINE_CLASS_VIDEO_DECODE:
-			strcpy(name, "vcs");
-			break;
-		case XE_ENGINE_CLASS_VIDEO_ENHANCE:
-			strcpy(name, "vecs");
-			break;
-		case XE_ENGINE_CLASS_COPY:
-			strcpy(name, "bcs");
-			break;
-		case XE_ENGINE_CLASS_COMPUTE:
-			strcpy(name, "ccs");
-			break;
-		default:
+		name = xe_hw_engine_class_to_str(hwe->class);
+		if (!name) {
 			err = -EINVAL;
 			goto err_object;
 		}
@@ -649,26 +690,16 @@ int xe_hw_engine_class_sysfs_init(struct xe_gt *gt)
 
 		keclass->eclass = hwe->eclass;
 		err = xe_add_hw_engine_class_defaults(xe, &keclass->base);
-		if (err) {
-			drm_warn(&xe->drm,
-				 "Add .defaults to engines failed!, err: %d\n",
-				 err);
+		if (err)
 			goto err_object;
-		}
 
 		err = sysfs_create_files(&keclass->base, files);
 		if (err)
 			goto err_object;
 	}
 
-	err = drmm_add_action_or_reset(&xe->drm, hw_engine_class_sysfs_fini,
-				       kobj);
-	if (err)
-		drm_warn(&xe->drm,
-			 "%s: drmm_add_action_or_reset failed, err: %d\n",
-			 __func__, err);
+	return drmm_add_action_or_reset(&xe->drm, hw_engine_class_sysfs_fini, kobj);
 
-	return err;
 err_object:
 	kobject_put(kobj);
 	return err;
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h
index ec5ba673b3..28a0d7c909 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h
@@ -26,6 +26,8 @@ struct kobj_eclass {
 	struct kobject base;
 	/** @eclass: A pointer to the hw engine class interface */
 	struct xe_hw_engine_class_intf *eclass;
+	/** @xe: A pointer to the xe device */
+	struct xe_device *xe;
 };
 
 static inline struct xe_hw_engine_class_intf *kobj_to_eclass(struct kobject *kobj)
@@ -33,4 +35,9 @@ static inline struct xe_hw_engine_class_intf *kobj_to_eclass(struct kobject *kob
 	return container_of(kobj, struct kobj_eclass, base)->eclass;
 }
 
+static inline struct xe_device *kobj_to_xe(struct kobject *kobj)
+{
+	return container_of(kobj, struct kobj_eclass, base)->xe;
+}
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h
index 39908dec04..d7f828c76c 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_types.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h
@@ -79,23 +79,23 @@ struct xe_hw_engine_class_intf {
 	 * @defaults: default scheduling properties
 	 */
 	struct {
-		/** @set_job_timeout: Set job timeout in ms for engine */
+		/** @sched_props.set_job_timeout: Set job timeout in ms for engine */
 		u32 job_timeout_ms;
-		/** @job_timeout_min: Min job timeout in ms for engine */
+		/** @sched_props.job_timeout_min: Min job timeout in ms for engine */
 		u32 job_timeout_min;
-		/** @job_timeout_max: Max job timeout in ms for engine */
+		/** @sched_props.job_timeout_max: Max job timeout in ms for engine */
 		u32 job_timeout_max;
-		/** @timeslice_us: timeslice period in micro-seconds */
+		/** @sched_props.timeslice_us: timeslice period in micro-seconds */
 		u32 timeslice_us;
-		/** @timeslice_min: min timeslice period in micro-seconds */
+		/** @sched_props.timeslice_min: min timeslice period in micro-seconds */
 		u32 timeslice_min;
-		/** @timeslice_max: max timeslice period in micro-seconds */
+		/** @sched_props.timeslice_max: max timeslice period in micro-seconds */
 		u32 timeslice_max;
-		/** @preempt_timeout_us: preemption timeout in micro-seconds */
+		/** @sched_props.preempt_timeout_us: preemption timeout in micro-seconds */
 		u32 preempt_timeout_us;
-		/** @preempt_timeout_min: min preemption timeout in micro-seconds */
+		/** @sched_props.preempt_timeout_min: min preemption timeout in micro-seconds */
 		u32 preempt_timeout_min;
-		/** @preempt_timeout_max: max preemption timeout in micro-seconds */
+		/** @sched_props.preempt_timeout_max: max preemption timeout in micro-seconds */
 		u32 preempt_timeout_max;
 	} sched_props, defaults;
 };
@@ -116,6 +116,8 @@ struct xe_hw_engine {
 	u16 instance;
 	/** @logical_instance: logical instance of this hw engine */
 	u16 logical_instance;
+	/** @irq_offset: IRQ offset of this hw engine */
+	u16 irq_offset;
 	/** @mmio_base: MMIO base address of this hw engine*/
 	u32 mmio_base;
 	/**
@@ -162,62 +164,52 @@ struct xe_hw_engine_snapshot {
 	u16 logical_instance;
 	/** @forcewake: Force Wake information snapshot */
 	struct {
-		/** @domain: force wake domain of this hw engine */
+		/** @forcewake.domain: force wake domain of this hw engine */
 		enum xe_force_wake_domains domain;
-		/** @ref: Forcewake ref for the above domain */
+		/** @forcewake.ref: Forcewake ref for the above domain */
 		int ref;
 	} forcewake;
 	/** @mmio_base: MMIO base address of this hw engine*/
 	u32 mmio_base;
 	/** @reg: Useful MMIO register snapshot */
 	struct {
-		/** @ring_hwstam: RING_HWSTAM */
+		/** @reg.ring_execlist_status: RING_EXECLIST_STATUS */
+		u64 ring_execlist_status;
+		/** @reg.ring_execlist_sq_contents: RING_EXECLIST_SQ_CONTENTS */
+		u64 ring_execlist_sq_contents;
+		/** @reg.ring_acthd: RING_ACTHD */
+		u64 ring_acthd;
+		/** @reg.ring_bbaddr: RING_BBADDR */
+		u64 ring_bbaddr;
+		/** @reg.ring_dma_fadd: RING_DMA_FADD */
+		u64 ring_dma_fadd;
+		/** @reg.ring_hwstam: RING_HWSTAM */
 		u32 ring_hwstam;
-		/** @ring_hws_pga: RING_HWS_PGA */
+		/** @reg.ring_hws_pga: RING_HWS_PGA */
 		u32 ring_hws_pga;
-		/** @ring_execlist_status_lo: RING_EXECLIST_STATUS_LO */
-		u32 ring_execlist_status_lo;
-		/** @ring_execlist_status_hi: RING_EXECLIST_STATUS_HI */
-		u32 ring_execlist_status_hi;
-		/** @ring_execlist_sq_contents_lo: RING_EXECLIST_SQ_CONTENTS */
-		u32 ring_execlist_sq_contents_lo;
-		/** @ring_execlist_sq_contents_hi: RING_EXECLIST_SQ_CONTENTS + 4 */
-		u32 ring_execlist_sq_contents_hi;
-		/** @ring_start: RING_START */
+		/** @reg.ring_start: RING_START */
 		u32 ring_start;
-		/** @ring_head: RING_HEAD */
+		/** @reg.ring_head: RING_HEAD */
 		u32 ring_head;
-		/** @ring_tail: RING_TAIL */
+		/** @reg.ring_tail: RING_TAIL */
 		u32 ring_tail;
-		/** @ring_ctl: RING_CTL */
+		/** @reg.ring_ctl: RING_CTL */
 		u32 ring_ctl;
-		/** @ring_mi_mode: RING_MI_MODE */
+		/** @reg.ring_mi_mode: RING_MI_MODE */
 		u32 ring_mi_mode;
-		/** @ring_mode: RING_MODE */
+		/** @reg.ring_mode: RING_MODE */
 		u32 ring_mode;
-		/** @ring_imr: RING_IMR */
+		/** @reg.ring_imr: RING_IMR */
 		u32 ring_imr;
-		/** @ring_esr: RING_ESR */
+		/** @reg.ring_esr: RING_ESR */
 		u32 ring_esr;
-		/** @ring_emr: RING_EMR */
+		/** @reg.ring_emr: RING_EMR */
 		u32 ring_emr;
-		/** @ring_eir: RING_EIR */
+		/** @reg.ring_eir: RING_EIR */
 		u32 ring_eir;
-		/** @ring_acthd_udw: RING_ACTHD_UDW */
-		u32 ring_acthd_udw;
-		/** @ring_acthd: RING_ACTHD */
-		u32 ring_acthd;
-		/** @ring_bbaddr_udw: RING_BBADDR_UDW */
-		u32 ring_bbaddr_udw;
-		/** @ring_bbaddr: RING_BBADDR */
-		u32 ring_bbaddr;
-		/** @ring_dma_fadd_udw: RING_DMA_FADD_UDW */
-		u32 ring_dma_fadd_udw;
-		/** @ring_dma_fadd: RING_DMA_FADD */
-		u32 ring_dma_fadd;
-		/** @ipehr: IPEHR */
+		/** @reg.ipehr: IPEHR */
 		u32 ipehr;
-		/** @rcu_mode: RCU_MODE */
+		/** @reg.rcu_mode: RCU_MODE */
 		u32 rcu_mode;
 	} reg;
 };
diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c
index a5de3e7b0b..f872ef1031 100644
--- a/drivers/gpu/drm/xe/xe_hw_fence.c
+++ b/drivers/gpu/drm/xe/xe_hw_fence.c
@@ -130,7 +130,7 @@ void xe_hw_fence_ctx_init(struct xe_hw_fence_ctx *ctx, struct xe_gt *gt,
 	ctx->irq = irq;
 	ctx->dma_fence_ctx = dma_fence_context_alloc(1);
 	ctx->next_seqno = XE_FENCE_INITIAL_SEQNO;
-	sprintf(ctx->name, "%s", name);
+	snprintf(ctx->name, sizeof(ctx->name), "%s", name);
 }
 
 void xe_hw_fence_ctx_finish(struct xe_hw_fence_ctx *ctx)
diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
index a6f43446c7..d37f1dea9f 100644
--- a/drivers/gpu/drm/xe/xe_hwmon.c
+++ b/drivers/gpu/drm/xe/xe_hwmon.c
@@ -10,12 +10,15 @@
 #include <drm/drm_managed.h>
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_mchbar_regs.h"
+#include "regs/xe_pcode_regs.h"
 #include "xe_device.h"
 #include "xe_gt.h"
 #include "xe_hwmon.h"
 #include "xe_mmio.h"
 #include "xe_pcode.h"
 #include "xe_pcode_api.h"
+#include "xe_sriov.h"
+#include "xe_pm.h"
 
 enum xe_hwmon_reg {
 	REG_PKG_RAPL_LIMIT,
@@ -31,6 +34,12 @@ enum xe_hwmon_reg_operation {
 	REG_READ64,
 };
 
+enum xe_hwmon_channel {
+	CHANNEL_CARD,
+	CHANNEL_PKG,
+	CHANNEL_MAX,
+};
+
 /*
  * SF_* - scale factors for particular quantities according to hwmon spec.
  */
@@ -66,61 +75,61 @@ struct xe_hwmon {
 	int scl_shift_energy;
 	/** @scl_shift_time: pkg time unit */
 	int scl_shift_time;
-	/** @ei: Energy info for energy1_input */
-	struct xe_hwmon_energy_info ei;
+	/** @ei: Energy info for energyN_input */
+	struct xe_hwmon_energy_info ei[CHANNEL_MAX];
 };
 
-static u32 xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg)
+static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg,
+				      int channel)
 {
 	struct xe_device *xe = gt_to_xe(hwmon->gt);
-	struct xe_reg reg = XE_REG(0);
 
 	switch (hwmon_reg) {
 	case REG_PKG_RAPL_LIMIT:
-		if (xe->info.platform == XE_DG2)
-			reg = PCU_CR_PACKAGE_RAPL_LIMIT;
-		else if (xe->info.platform == XE_PVC)
-			reg = PVC_GT0_PACKAGE_RAPL_LIMIT;
+		if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG)
+			return PVC_GT0_PACKAGE_RAPL_LIMIT;
+		else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG))
+			return PCU_CR_PACKAGE_RAPL_LIMIT;
 		break;
 	case REG_PKG_POWER_SKU:
-		if (xe->info.platform == XE_DG2)
-			reg = PCU_CR_PACKAGE_POWER_SKU;
-		else if (xe->info.platform == XE_PVC)
-			reg = PVC_GT0_PACKAGE_POWER_SKU;
+		if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG)
+			return PVC_GT0_PACKAGE_POWER_SKU;
+		else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG))
+			return PCU_CR_PACKAGE_POWER_SKU;
 		break;
 	case REG_PKG_POWER_SKU_UNIT:
-		if (xe->info.platform == XE_DG2)
-			reg = PCU_CR_PACKAGE_POWER_SKU_UNIT;
-		else if (xe->info.platform == XE_PVC)
-			reg = PVC_GT0_PACKAGE_POWER_SKU_UNIT;
+		if (xe->info.platform == XE_PVC)
+			return PVC_GT0_PACKAGE_POWER_SKU_UNIT;
+		else if (xe->info.platform == XE_DG2)
+			return PCU_CR_PACKAGE_POWER_SKU_UNIT;
 		break;
 	case REG_GT_PERF_STATUS:
-		if (xe->info.platform == XE_DG2)
-			reg = GT_PERF_STATUS;
+		if (xe->info.platform == XE_DG2 && channel == CHANNEL_PKG)
+			return GT_PERF_STATUS;
 		break;
 	case REG_PKG_ENERGY_STATUS:
-		if (xe->info.platform == XE_DG2)
-			reg = PCU_CR_PACKAGE_ENERGY_STATUS;
-		else if (xe->info.platform == XE_PVC)
-			reg = PVC_GT0_PLATFORM_ENERGY_STATUS;
+		if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG)
+			return PVC_GT0_PLATFORM_ENERGY_STATUS;
+		else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG))
+			return PCU_CR_PACKAGE_ENERGY_STATUS;
 		break;
 	default:
 		drm_warn(&xe->drm, "Unknown xe hwmon reg id: %d\n", hwmon_reg);
 		break;
 	}
 
-	return reg.raw;
+	return XE_REG(0);
 }
 
 static void xe_hwmon_process_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg,
 				 enum xe_hwmon_reg_operation operation, u64 *value,
-				 u32 clr, u32 set)
+				 u32 clr, u32 set, int channel)
 {
 	struct xe_reg reg;
 
-	reg.raw = xe_hwmon_get_reg(hwmon, hwmon_reg);
+	reg = xe_hwmon_get_reg(hwmon, hwmon_reg, channel);
 
-	if (!reg.raw)
+	if (!xe_reg_is_valid(reg))
 		return;
 
 	switch (operation) {
@@ -148,13 +157,13 @@ static void xe_hwmon_process_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon
  * same pattern for sysfs, allow arbitrary PL1 limits to be set but display
  * clamped values when read.
  */
-static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, long *value)
+static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, int channel, long *value)
 {
 	u64 reg_val, min, max;
 
 	mutex_lock(&hwmon->hwmon_lock);
 
-	xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_READ32, &reg_val, 0, 0);
+	xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_READ32, &reg_val, 0, 0, channel);
 	/* Check if PL1 limit is disabled */
 	if (!(reg_val & PKG_PWR_LIM_1_EN)) {
 		*value = PL1_DISABLE;
@@ -164,7 +173,7 @@ static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, long *value)
 	reg_val = REG_FIELD_GET(PKG_PWR_LIM_1, reg_val);
 	*value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power);
 
-	xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU, REG_READ64, &reg_val, 0, 0);
+	xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU, REG_READ64, &reg_val, 0, 0, channel);
 	min = REG_FIELD_GET(PKG_MIN_PWR, reg_val);
 	min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power);
 	max = REG_FIELD_GET(PKG_MAX_PWR, reg_val);
@@ -176,7 +185,7 @@ unlock:
 	mutex_unlock(&hwmon->hwmon_lock);
 }
 
-static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, long value)
+static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, int channel, long value)
 {
 	int ret = 0;
 	u64 reg_val;
@@ -186,14 +195,15 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, long value)
 	/* Disable PL1 limit and verify, as limit cannot be disabled on all platforms */
 	if (value == PL1_DISABLE) {
 		xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW32, &reg_val,
-				     PKG_PWR_LIM_1_EN, 0);
+				     PKG_PWR_LIM_1_EN, 0, channel);
 		xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_READ32, &reg_val,
-				     PKG_PWR_LIM_1_EN, 0);
+				     PKG_PWR_LIM_1_EN, 0, channel);
 
 		if (reg_val & PKG_PWR_LIM_1_EN) {
+			drm_warn(&gt_to_xe(hwmon->gt)->drm, "PL1 disable is not supported!\n");
 			ret = -EOPNOTSUPP;
-			goto unlock;
 		}
+		goto unlock;
 	}
 
 	/* Computation in 64-bits to avoid overflow. Round to nearest. */
@@ -201,17 +211,17 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, long value)
 	reg_val = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, reg_val);
 
 	xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW32, &reg_val,
-			     PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, reg_val);
+			     PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, reg_val, channel);
 unlock:
 	mutex_unlock(&hwmon->hwmon_lock);
 	return ret;
 }
 
-static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, long *value)
+static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, int channel, long *value)
 {
 	u64 reg_val;
 
-	xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU, REG_READ32, &reg_val, 0, 0);
+	xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU, REG_READ32, &reg_val, 0, 0, channel);
 	reg_val = REG_FIELD_GET(PKG_TDP, reg_val);
 	*value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power);
 }
@@ -234,16 +244,16 @@ static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, long *value)
  * the hwmon API. Using x86_64 128 bit arithmetic (see mul_u64_u32_shr()),
  * a 'long' of 63 bits, SF_ENERGY of 1e6 (~20 bits) and
  * hwmon->scl_shift_energy of 14 bits we have 57 (63 - 20 + 14) bits before
- * energy1_input overflows. This at 1000 W is an overflow duration of 278 years.
+ * energyN_input overflows. This at 1000 W is an overflow duration of 278 years.
  */
 static void
-xe_hwmon_energy_get(struct xe_hwmon *hwmon, long *energy)
+xe_hwmon_energy_get(struct xe_hwmon *hwmon, int channel, long *energy)
 {
-	struct xe_hwmon_energy_info *ei = &hwmon->ei;
+	struct xe_hwmon_energy_info *ei = &hwmon->ei[channel];
 	u64 reg_val;
 
 	xe_hwmon_process_reg(hwmon, REG_PKG_ENERGY_STATUS, REG_READ32,
-			     &reg_val, 0, 0);
+			     &reg_val, 0, 0, channel);
 
 	if (reg_val >= ei->reg_val_prev)
 		ei->accum_energy += reg_val - ei->reg_val_prev;
@@ -257,23 +267,24 @@ xe_hwmon_energy_get(struct xe_hwmon *hwmon, long *energy)
 }
 
 static ssize_t
-xe_hwmon_power1_max_interval_show(struct device *dev, struct device_attribute *attr,
-				  char *buf)
+xe_hwmon_power_max_interval_show(struct device *dev, struct device_attribute *attr,
+				 char *buf)
 {
 	struct xe_hwmon *hwmon = dev_get_drvdata(dev);
 	u32 x, y, x_w = 2; /* 2 bits */
 	u64 r, tau4, out;
+	int sensor_index = to_sensor_dev_attr(attr)->index;
 
-	xe_device_mem_access_get(gt_to_xe(hwmon->gt));
+	xe_pm_runtime_get(gt_to_xe(hwmon->gt));
 
 	mutex_lock(&hwmon->hwmon_lock);
 
 	xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT,
-			     REG_READ32, &r, 0, 0);
+			     REG_READ32, &r, 0, 0, sensor_index);
 
 	mutex_unlock(&hwmon->hwmon_lock);
 
-	xe_device_mem_access_put(gt_to_xe(hwmon->gt));
+	xe_pm_runtime_put(gt_to_xe(hwmon->gt));
 
 	x = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_X, r);
 	y = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_Y, r);
@@ -297,14 +308,15 @@ xe_hwmon_power1_max_interval_show(struct device *dev, struct device_attribute *a
 }
 
 static ssize_t
-xe_hwmon_power1_max_interval_store(struct device *dev, struct device_attribute *attr,
-				   const char *buf, size_t count)
+xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *attr,
+				  const char *buf, size_t count)
 {
 	struct xe_hwmon *hwmon = dev_get_drvdata(dev);
 	u32 x, y, rxy, x_w = 2; /* 2 bits */
 	u64 tau4, r, max_win;
 	unsigned long val;
 	int ret;
+	int sensor_index = to_sensor_dev_attr(attr)->index;
 
 	ret = kstrtoul(buf, 0, &val);
 	if (ret)
@@ -323,7 +335,7 @@ xe_hwmon_power1_max_interval_store(struct device *dev, struct device_attribute *
 
 	/*
 	 * val must be < max in hwmon interface units. The steps below are
-	 * explained in xe_hwmon_power1_max_interval_show()
+	 * explained in xe_hwmon_power_max_interval_show()
 	 */
 	r = FIELD_PREP(PKG_MAX_WIN, PKG_MAX_WIN_DEFAULT);
 	x = REG_FIELD_GET(PKG_MAX_WIN_X, r);
@@ -352,26 +364,31 @@ xe_hwmon_power1_max_interval_store(struct device *dev, struct device_attribute *
 
 	rxy = REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_X, x) | REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_Y, y);
 
-	xe_device_mem_access_get(gt_to_xe(hwmon->gt));
+	xe_pm_runtime_get(gt_to_xe(hwmon->gt));
 
 	mutex_lock(&hwmon->hwmon_lock);
 
 	xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW32, (u64 *)&r,
-			     PKG_PWR_LIM_1_TIME, rxy);
+			     PKG_PWR_LIM_1_TIME, rxy, sensor_index);
 
 	mutex_unlock(&hwmon->hwmon_lock);
 
-	xe_device_mem_access_put(gt_to_xe(hwmon->gt));
+	xe_pm_runtime_put(gt_to_xe(hwmon->gt));
 
 	return count;
 }
 
 static SENSOR_DEVICE_ATTR(power1_max_interval, 0664,
-			  xe_hwmon_power1_max_interval_show,
-			  xe_hwmon_power1_max_interval_store, 0);
+			  xe_hwmon_power_max_interval_show,
+			  xe_hwmon_power_max_interval_store, CHANNEL_CARD);
+
+static SENSOR_DEVICE_ATTR(power2_max_interval, 0664,
+			  xe_hwmon_power_max_interval_show,
+			  xe_hwmon_power_max_interval_store, CHANNEL_PKG);
 
 static struct attribute *hwmon_attributes[] = {
 	&sensor_dev_attr_power1_max_interval.dev_attr.attr,
+	&sensor_dev_attr_power2_max_interval.dev_attr.attr,
 	NULL
 };
 
@@ -382,12 +399,11 @@ static umode_t xe_hwmon_attributes_visible(struct kobject *kobj,
 	struct xe_hwmon *hwmon = dev_get_drvdata(dev);
 	int ret = 0;
 
-	xe_device_mem_access_get(gt_to_xe(hwmon->gt));
+	xe_pm_runtime_get(gt_to_xe(hwmon->gt));
 
-	if (attr == &sensor_dev_attr_power1_max_interval.dev_attr.attr)
-		ret = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT) ? attr->mode : 0;
+	ret = xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, index)) ? attr->mode : 0;
 
-	xe_device_mem_access_put(gt_to_xe(hwmon->gt));
+	xe_pm_runtime_put(gt_to_xe(hwmon->gt));
 
 	return ret;
 }
@@ -402,11 +418,12 @@ static const struct attribute_group *hwmon_groups[] = {
 	NULL
 };
 
-static const struct hwmon_channel_info *hwmon_info[] = {
-	HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_CRIT),
-	HWMON_CHANNEL_INFO(curr, HWMON_C_CRIT),
-	HWMON_CHANNEL_INFO(in, HWMON_I_INPUT),
-	HWMON_CHANNEL_INFO(energy, HWMON_E_INPUT),
+static const struct hwmon_channel_info * const hwmon_info[] = {
+	HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL,
+			   HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_CRIT | HWMON_P_LABEL),
+	HWMON_CHANNEL_INFO(curr, HWMON_C_LABEL, HWMON_C_CRIT | HWMON_C_LABEL),
+	HWMON_CHANNEL_INFO(in, HWMON_I_INPUT | HWMON_I_LABEL, HWMON_I_INPUT | HWMON_I_LABEL),
+	HWMON_CHANNEL_INFO(energy, HWMON_E_INPUT | HWMON_E_LABEL, HWMON_E_INPUT | HWMON_E_LABEL),
 	NULL
 };
 
@@ -429,7 +446,8 @@ static int xe_hwmon_pcode_write_i1(struct xe_gt *gt, u32 uval)
 			      uval);
 }
 
-static int xe_hwmon_power_curr_crit_read(struct xe_hwmon *hwmon, long *value, u32 scale_factor)
+static int xe_hwmon_power_curr_crit_read(struct xe_hwmon *hwmon, int channel,
+					 long *value, u32 scale_factor)
 {
 	int ret;
 	u32 uval;
@@ -447,7 +465,8 @@ unlock:
 	return ret;
 }
 
-static int xe_hwmon_power_curr_crit_write(struct xe_hwmon *hwmon, long value, u32 scale_factor)
+static int xe_hwmon_power_curr_crit_write(struct xe_hwmon *hwmon, int channel,
+					  long value, u32 scale_factor)
 {
 	int ret;
 	u32 uval;
@@ -461,117 +480,131 @@ static int xe_hwmon_power_curr_crit_write(struct xe_hwmon *hwmon, long value, u3
 	return ret;
 }
 
-static void xe_hwmon_get_voltage(struct xe_hwmon *hwmon, long *value)
+static void xe_hwmon_get_voltage(struct xe_hwmon *hwmon, int channel, long *value)
 {
 	u64 reg_val;
 
 	xe_hwmon_process_reg(hwmon, REG_GT_PERF_STATUS,
-			     REG_READ32, &reg_val, 0, 0);
+			     REG_READ32, &reg_val, 0, 0, channel);
 	/* HW register value in units of 2.5 millivolt */
 	*value = DIV_ROUND_CLOSEST(REG_FIELD_GET(VOLTAGE_MASK, reg_val) * 2500, SF_VOLTAGE);
 }
 
 static umode_t
-xe_hwmon_power_is_visible(struct xe_hwmon *hwmon, u32 attr, int chan)
+xe_hwmon_power_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel)
 {
 	u32 uval;
 
 	switch (attr) {
 	case hwmon_power_max:
-		return xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT) ? 0664 : 0;
+		return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT,
+				       channel)) ? 0664 : 0;
 	case hwmon_power_rated_max:
-		return xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU) ? 0444 : 0;
+		return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU,
+				       channel)) ? 0444 : 0;
 	case hwmon_power_crit:
-		return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) ||
-			!(uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644;
+		if (channel == CHANNEL_PKG)
+			return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) ||
+				!(uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644;
+		break;
+	case hwmon_power_label:
+		return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT,
+				       channel)) ? 0444 : 0;
 	default:
 		return 0;
 	}
+	return 0;
 }
 
 static int
-xe_hwmon_power_read(struct xe_hwmon *hwmon, u32 attr, int chan, long *val)
+xe_hwmon_power_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val)
 {
 	switch (attr) {
 	case hwmon_power_max:
-		xe_hwmon_power_max_read(hwmon, val);
+		xe_hwmon_power_max_read(hwmon, channel, val);
 		return 0;
 	case hwmon_power_rated_max:
-		xe_hwmon_power_rated_max_read(hwmon, val);
+		xe_hwmon_power_rated_max_read(hwmon, channel, val);
 		return 0;
 	case hwmon_power_crit:
-		return xe_hwmon_power_curr_crit_read(hwmon, val, SF_POWER);
+		return xe_hwmon_power_curr_crit_read(hwmon, channel, val, SF_POWER);
 	default:
 		return -EOPNOTSUPP;
 	}
 }
 
 static int
-xe_hwmon_power_write(struct xe_hwmon *hwmon, u32 attr, int chan, long val)
+xe_hwmon_power_write(struct xe_hwmon *hwmon, u32 attr, int channel, long val)
 {
 	switch (attr) {
 	case hwmon_power_max:
-		return xe_hwmon_power_max_write(hwmon, val);
+		return xe_hwmon_power_max_write(hwmon, channel, val);
 	case hwmon_power_crit:
-		return xe_hwmon_power_curr_crit_write(hwmon, val, SF_POWER);
+		return xe_hwmon_power_curr_crit_write(hwmon, channel, val, SF_POWER);
 	default:
 		return -EOPNOTSUPP;
 	}
 }
 
 static umode_t
-xe_hwmon_curr_is_visible(const struct xe_hwmon *hwmon, u32 attr)
+xe_hwmon_curr_is_visible(const struct xe_hwmon *hwmon, u32 attr, int channel)
 {
 	u32 uval;
 
 	switch (attr) {
 	case hwmon_curr_crit:
-		return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) ||
-			(uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644;
+	case hwmon_curr_label:
+		if (channel == CHANNEL_PKG)
+			return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) ||
+				(uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644;
+		break;
 	default:
 		return 0;
 	}
+	return 0;
 }
 
 static int
-xe_hwmon_curr_read(struct xe_hwmon *hwmon, u32 attr, long *val)
+xe_hwmon_curr_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val)
 {
 	switch (attr) {
 	case hwmon_curr_crit:
-		return xe_hwmon_power_curr_crit_read(hwmon, val, SF_CURR);
+		return xe_hwmon_power_curr_crit_read(hwmon, channel, val, SF_CURR);
 	default:
 		return -EOPNOTSUPP;
 	}
 }
 
 static int
-xe_hwmon_curr_write(struct xe_hwmon *hwmon, u32 attr, long val)
+xe_hwmon_curr_write(struct xe_hwmon *hwmon, u32 attr, int channel, long val)
 {
 	switch (attr) {
 	case hwmon_curr_crit:
-		return xe_hwmon_power_curr_crit_write(hwmon, val, SF_CURR);
+		return xe_hwmon_power_curr_crit_write(hwmon, channel, val, SF_CURR);
 	default:
 		return -EOPNOTSUPP;
 	}
 }
 
 static umode_t
-xe_hwmon_in_is_visible(struct xe_hwmon *hwmon, u32 attr)
+xe_hwmon_in_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel)
 {
 	switch (attr) {
 	case hwmon_in_input:
-		return xe_hwmon_get_reg(hwmon, REG_GT_PERF_STATUS) ? 0444 : 0;
+	case hwmon_in_label:
+		return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_GT_PERF_STATUS,
+				       channel)) ? 0444 : 0;
 	default:
 		return 0;
 	}
 }
 
 static int
-xe_hwmon_in_read(struct xe_hwmon *hwmon, u32 attr, long *val)
+xe_hwmon_in_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val)
 {
 	switch (attr) {
 	case hwmon_in_input:
-		xe_hwmon_get_voltage(hwmon, val);
+		xe_hwmon_get_voltage(hwmon, channel, val);
 		return 0;
 	default:
 		return -EOPNOTSUPP;
@@ -579,22 +612,24 @@ xe_hwmon_in_read(struct xe_hwmon *hwmon, u32 attr, long *val)
 }
 
 static umode_t
-xe_hwmon_energy_is_visible(struct xe_hwmon *hwmon, u32 attr)
+xe_hwmon_energy_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel)
 {
 	switch (attr) {
 	case hwmon_energy_input:
-		return xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS) ? 0444 : 0;
+	case hwmon_energy_label:
+		return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS,
+				       channel)) ? 0444 : 0;
 	default:
 		return 0;
 	}
 }
 
 static int
-xe_hwmon_energy_read(struct xe_hwmon *hwmon, u32 attr, long *val)
+xe_hwmon_energy_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val)
 {
 	switch (attr) {
 	case hwmon_energy_input:
-		xe_hwmon_energy_get(hwmon, val);
+		xe_hwmon_energy_get(hwmon, channel, val);
 		return 0;
 	default:
 		return -EOPNOTSUPP;
@@ -608,27 +643,27 @@ xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type,
 	struct xe_hwmon *hwmon = (struct xe_hwmon *)drvdata;
 	int ret;
 
-	xe_device_mem_access_get(gt_to_xe(hwmon->gt));
+	xe_pm_runtime_get(gt_to_xe(hwmon->gt));
 
 	switch (type) {
 	case hwmon_power:
 		ret = xe_hwmon_power_is_visible(hwmon, attr, channel);
 		break;
 	case hwmon_curr:
-		ret = xe_hwmon_curr_is_visible(hwmon, attr);
+		ret = xe_hwmon_curr_is_visible(hwmon, attr, channel);
 		break;
 	case hwmon_in:
-		ret = xe_hwmon_in_is_visible(hwmon, attr);
+		ret = xe_hwmon_in_is_visible(hwmon, attr, channel);
 		break;
 	case hwmon_energy:
-		ret = xe_hwmon_energy_is_visible(hwmon, attr);
+		ret = xe_hwmon_energy_is_visible(hwmon, attr, channel);
 		break;
 	default:
 		ret = 0;
 		break;
 	}
 
-	xe_device_mem_access_put(gt_to_xe(hwmon->gt));
+	xe_pm_runtime_put(gt_to_xe(hwmon->gt));
 
 	return ret;
 }
@@ -640,27 +675,27 @@ xe_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr,
 	struct xe_hwmon *hwmon = dev_get_drvdata(dev);
 	int ret;
 
-	xe_device_mem_access_get(gt_to_xe(hwmon->gt));
+	xe_pm_runtime_get(gt_to_xe(hwmon->gt));
 
 	switch (type) {
 	case hwmon_power:
 		ret = xe_hwmon_power_read(hwmon, attr, channel, val);
 		break;
 	case hwmon_curr:
-		ret = xe_hwmon_curr_read(hwmon, attr, val);
+		ret = xe_hwmon_curr_read(hwmon, attr, channel, val);
 		break;
 	case hwmon_in:
-		ret = xe_hwmon_in_read(hwmon, attr, val);
+		ret = xe_hwmon_in_read(hwmon, attr, channel, val);
 		break;
 	case hwmon_energy:
-		ret = xe_hwmon_energy_read(hwmon, attr, val);
+		ret = xe_hwmon_energy_read(hwmon, attr, channel, val);
 		break;
 	default:
 		ret = -EOPNOTSUPP;
 		break;
 	}
 
-	xe_device_mem_access_put(gt_to_xe(hwmon->gt));
+	xe_pm_runtime_put(gt_to_xe(hwmon->gt));
 
 	return ret;
 }
@@ -672,29 +707,49 @@ xe_hwmon_write(struct device *dev, enum hwmon_sensor_types type, u32 attr,
 	struct xe_hwmon *hwmon = dev_get_drvdata(dev);
 	int ret;
 
-	xe_device_mem_access_get(gt_to_xe(hwmon->gt));
+	xe_pm_runtime_get(gt_to_xe(hwmon->gt));
 
 	switch (type) {
 	case hwmon_power:
 		ret = xe_hwmon_power_write(hwmon, attr, channel, val);
 		break;
 	case hwmon_curr:
-		ret = xe_hwmon_curr_write(hwmon, attr, val);
+		ret = xe_hwmon_curr_write(hwmon, attr, channel, val);
 		break;
 	default:
 		ret = -EOPNOTSUPP;
 		break;
 	}
 
-	xe_device_mem_access_put(gt_to_xe(hwmon->gt));
+	xe_pm_runtime_put(gt_to_xe(hwmon->gt));
 
 	return ret;
 }
 
+static int xe_hwmon_read_label(struct device *dev,
+			       enum hwmon_sensor_types type,
+			       u32 attr, int channel, const char **str)
+{
+	switch (type) {
+	case hwmon_power:
+	case hwmon_energy:
+	case hwmon_curr:
+	case hwmon_in:
+		if (channel == CHANNEL_CARD)
+			*str = "card";
+		else if (channel == CHANNEL_PKG)
+			*str = "pkg";
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static const struct hwmon_ops hwmon_ops = {
 	.is_visible = xe_hwmon_is_visible,
 	.read = xe_hwmon_read,
 	.write = xe_hwmon_write,
+	.read_string = xe_hwmon_read_label,
 };
 
 static const struct hwmon_chip_info hwmon_chip_info = {
@@ -708,14 +763,15 @@ xe_hwmon_get_preregistration_info(struct xe_device *xe)
 	struct xe_hwmon *hwmon = xe->hwmon;
 	long energy;
 	u64 val_sku_unit = 0;
+	int channel;
 
 	/*
 	 * The contents of register PKG_POWER_SKU_UNIT do not change,
 	 * so read it once and store the shift values.
 	 */
-	if (xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT)) {
+	if (xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT, 0))) {
 		xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU_UNIT,
-				     REG_READ32, &val_sku_unit, 0, 0);
+				     REG_READ32, &val_sku_unit, 0, 0, 0);
 		hwmon->scl_shift_power = REG_FIELD_GET(PKG_PWR_UNIT, val_sku_unit);
 		hwmon->scl_shift_energy = REG_FIELD_GET(PKG_ENERGY_UNIT, val_sku_unit);
 		hwmon->scl_shift_time = REG_FIELD_GET(PKG_TIME_UNIT, val_sku_unit);
@@ -725,8 +781,9 @@ xe_hwmon_get_preregistration_info(struct xe_device *xe)
 	 * Initialize 'struct xe_hwmon_energy_info', i.e. set fields to the
 	 * first value of the energy register read
 	 */
-	if (xe_hwmon_is_visible(hwmon, hwmon_energy, hwmon_energy_input, 0))
-		xe_hwmon_energy_get(hwmon, &energy);
+	for (channel = 0; channel < CHANNEL_MAX; channel++)
+		if (xe_hwmon_is_visible(hwmon, hwmon_energy, hwmon_energy_input, channel))
+			xe_hwmon_energy_get(hwmon, channel, &energy);
 }
 
 static void xe_hwmon_mutex_destroy(void *arg)
@@ -745,6 +802,10 @@ void xe_hwmon_register(struct xe_device *xe)
 	if (!IS_DGFX(xe))
 		return;
 
+	/* hwmon is not available on VFs */
+	if (IS_SRIOV_VF(xe))
+		return;
+
 	hwmon = devm_kzalloc(dev, sizeof(*hwmon), GFP_KERNEL);
 	if (!hwmon)
 		return;
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index d1f5ba4bb7..9968063531 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -9,15 +9,18 @@
 
 #include <drm/drm_managed.h>
 
+#include "display/xe_display.h"
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_regs.h"
 #include "xe_device.h"
-#include "xe_display.h"
 #include "xe_drv.h"
+#include "xe_gsc_proxy.h"
 #include "xe_gt.h"
 #include "xe_guc.h"
 #include "xe_hw_engine.h"
+#include "xe_memirq.h"
 #include "xe_mmio.h"
+#include "xe_sriov.h"
 
 /*
  * Interrupt registers for a unit are always consecutive and ordered
@@ -129,6 +132,7 @@ void xe_irq_enable_hwe(struct xe_gt *gt)
 	u32 ccs_mask, bcs_mask;
 	u32 irqs, dmask, smask;
 	u32 gsc_mask = 0;
+	u32 heci_mask = 0;
 
 	if (xe_device_uc_enabled(xe)) {
 		irqs = GT_RENDER_USER_INTERRUPT |
@@ -178,14 +182,23 @@ void xe_irq_enable_hwe(struct xe_gt *gt)
 		xe_mmio_write32(gt, VCS2_VCS3_INTR_MASK, ~dmask);
 		xe_mmio_write32(gt, VECS0_VECS1_INTR_MASK, ~dmask);
 
-		if (xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_OTHER))
-			gsc_mask = irqs;
-		else if (HAS_HECI_GSCFI(xe))
+		/*
+		 * the heci2 interrupt is enabled via the same register as the
+		 * GSCCS interrupts, but it has its own mask register.
+		 */
+		if (xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_OTHER)) {
+			gsc_mask = irqs | GSC_ER_COMPLETE;
+			heci_mask = GSC_IRQ_INTF(1);
+		} else if (HAS_HECI_GSCFI(xe)) {
 			gsc_mask = GSC_IRQ_INTF(1);
+		}
+
 		if (gsc_mask) {
-			xe_mmio_write32(gt, GUNIT_GSC_INTR_ENABLE, gsc_mask);
+			xe_mmio_write32(gt, GUNIT_GSC_INTR_ENABLE, gsc_mask | heci_mask);
 			xe_mmio_write32(gt, GUNIT_GSC_INTR_MASK, ~gsc_mask);
 		}
+		if (heci_mask)
+			xe_mmio_write32(gt, HECI2_RSVD_INTR_MASK, ~(heci_mask << 16));
 	}
 }
 
@@ -232,6 +245,8 @@ gt_other_irq_handler(struct xe_gt *gt, const u8 instance, const u16 iir)
 		return xe_guc_irq_handler(&gt->uc.guc, iir);
 	if (instance == OTHER_MEDIA_GUC_INSTANCE && xe_gt_is_media_type(gt))
 		return xe_guc_irq_handler(&gt->uc.guc, iir);
+	if (instance == OTHER_GSC_HECI2_INSTANCE && xe_gt_is_media_type(gt))
+		return xe_gsc_proxy_irq_handler(&gt->uc.gsc, iir);
 
 	if (instance != OTHER_GUC_INSTANCE &&
 	    instance != OTHER_MEDIA_GUC_INSTANCE) {
@@ -249,15 +264,23 @@ static struct xe_gt *pick_engine_gt(struct xe_tile *tile,
 	if (MEDIA_VER(xe) < 13)
 		return tile->primary_gt;
 
-	if (class == XE_ENGINE_CLASS_VIDEO_DECODE ||
-	    class == XE_ENGINE_CLASS_VIDEO_ENHANCE)
+	switch (class) {
+	case XE_ENGINE_CLASS_VIDEO_DECODE:
+	case XE_ENGINE_CLASS_VIDEO_ENHANCE:
 		return tile->media_gt;
-
-	if (class == XE_ENGINE_CLASS_OTHER &&
-	    (instance == OTHER_MEDIA_GUC_INSTANCE || instance == OTHER_GSC_INSTANCE))
-		return tile->media_gt;
-
-	return tile->primary_gt;
+	case XE_ENGINE_CLASS_OTHER:
+		switch (instance) {
+		case OTHER_MEDIA_GUC_INSTANCE:
+		case OTHER_GSC_INSTANCE:
+		case OTHER_GSC_HECI2_INSTANCE:
+			return tile->media_gt;
+		default:
+			break;
+		};
+		fallthrough;
+	default:
+		return tile->primary_gt;
+	}
 }
 
 static void gt_irq_handler(struct xe_tile *tile,
@@ -303,7 +326,6 @@ static void gt_irq_handler(struct xe_tile *tile,
 					xe_heci_gsc_irq_handler(xe, intr_vec);
 				else
 					gt_other_irq_handler(engine_gt, instance, intr_vec);
-				continue;
 			}
 		}
 	}
@@ -419,7 +441,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
 		 * irq as device is inaccessible.
 		 */
 		if (master_ctl == REG_GENMASK(31, 0)) {
-			dev_dbg(tile_to_xe(tile)->drm.dev,
+			drm_dbg(&tile_to_xe(tile)->drm,
 				"Ignore this IRQ as device might be in DPC containment.\n");
 			return IRQ_HANDLED;
 		}
@@ -484,6 +506,7 @@ static void gt_irq_reset(struct xe_tile *tile)
 	    HAS_HECI_GSCFI(tile_to_xe(tile))) {
 		xe_mmio_write32(mmio, GUNIT_GSC_INTR_ENABLE, 0);
 		xe_mmio_write32(mmio, GUNIT_GSC_INTR_MASK, ~0);
+		xe_mmio_write32(mmio, HECI2_RSVD_INTR_MASK, ~0);
 	}
 
 	xe_mmio_write32(mmio, GPM_WGBOXPERF_INTR_ENABLE, 0);
@@ -498,6 +521,9 @@ static void xelp_irq_reset(struct xe_tile *tile)
 
 	gt_irq_reset(tile);
 
+	if (IS_SRIOV_VF(tile_to_xe(tile)))
+		return;
+
 	mask_and_disable(tile, PCU_IRQ_OFFSET);
 }
 
@@ -508,6 +534,9 @@ static void dg1_irq_reset(struct xe_tile *tile)
 
 	gt_irq_reset(tile);
 
+	if (IS_SRIOV_VF(tile_to_xe(tile)))
+		return;
+
 	mask_and_disable(tile, PCU_IRQ_OFFSET);
 }
 
@@ -518,11 +547,34 @@ static void dg1_irq_reset_mstr(struct xe_tile *tile)
 	xe_mmio_write32(mmio, GFX_MSTR_IRQ, ~0);
 }
 
+static void vf_irq_reset(struct xe_device *xe)
+{
+	struct xe_tile *tile;
+	unsigned int id;
+
+	xe_assert(xe, IS_SRIOV_VF(xe));
+
+	if (GRAPHICS_VERx100(xe) < 1210)
+		xelp_intr_disable(xe);
+	else
+		xe_assert(xe, xe_device_has_memirq(xe));
+
+	for_each_tile(tile, xe, id) {
+		if (xe_device_has_memirq(xe))
+			xe_memirq_reset(&tile->sriov.vf.memirq);
+		else
+			gt_irq_reset(tile);
+	}
+}
+
 static void xe_irq_reset(struct xe_device *xe)
 {
 	struct xe_tile *tile;
 	u8 id;
 
+	if (IS_SRIOV_VF(xe))
+		return vf_irq_reset(xe);
+
 	for_each_tile(tile, xe, id) {
 		if (GRAPHICS_VERx100(xe) >= 1210)
 			dg1_irq_reset(tile);
@@ -545,8 +597,26 @@ static void xe_irq_reset(struct xe_device *xe)
 	}
 }
 
+static void vf_irq_postinstall(struct xe_device *xe)
+{
+	struct xe_tile *tile;
+	unsigned int id;
+
+	for_each_tile(tile, xe, id)
+		if (xe_device_has_memirq(xe))
+			xe_memirq_postinstall(&tile->sriov.vf.memirq);
+
+	if (GRAPHICS_VERx100(xe) < 1210)
+		xelp_intr_enable(xe, true);
+	else
+		xe_assert(xe, xe_device_has_memirq(xe));
+}
+
 static void xe_irq_postinstall(struct xe_device *xe)
 {
+	if (IS_SRIOV_VF(xe))
+		return vf_irq_postinstall(xe);
+
 	xe_display_irq_postinstall(xe, xe_root_mmio_gt(xe));
 
 	/*
@@ -563,8 +633,30 @@ static void xe_irq_postinstall(struct xe_device *xe)
 		xelp_intr_enable(xe, true);
 }
 
+static irqreturn_t vf_mem_irq_handler(int irq, void *arg)
+{
+	struct xe_device *xe = arg;
+	struct xe_tile *tile;
+	unsigned int id;
+
+	spin_lock(&xe->irq.lock);
+	if (!xe->irq.enabled) {
+		spin_unlock(&xe->irq.lock);
+		return IRQ_NONE;
+	}
+	spin_unlock(&xe->irq.lock);
+
+	for_each_tile(tile, xe, id)
+		xe_memirq_handler(&tile->sriov.vf.memirq);
+
+	return IRQ_HANDLED;
+}
+
 static irq_handler_t xe_irq_handler(struct xe_device *xe)
 {
+	if (IS_SRIOV_VF(xe) && xe_device_has_memirq(xe))
+		return vf_mem_irq_handler;
+
 	if (GRAPHICS_VERx100(xe) >= 1210)
 		return dg1_irq_handler;
 	else
@@ -590,8 +682,9 @@ static void irq_uninstall(struct drm_device *drm, void *arg)
 int xe_irq_install(struct xe_device *xe)
 {
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	unsigned int irq_flags = PCI_IRQ_MSIX;
 	irq_handler_t irq_handler;
-	int err, irq;
+	int err, irq, nvec;
 
 	irq_handler = xe_irq_handler(xe);
 	if (!irq_handler) {
@@ -601,7 +694,19 @@ int xe_irq_install(struct xe_device *xe)
 
 	xe_irq_reset(xe);
 
-	err = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI | PCI_IRQ_MSIX);
+	nvec = pci_msix_vec_count(pdev);
+	if (nvec <= 0) {
+		if (nvec == -EINVAL) {
+			/* MSIX capability is not supported in the device, using MSI */
+			irq_flags = PCI_IRQ_MSI;
+			nvec = 1;
+		} else {
+			drm_err(&xe->drm, "MSIX: Failed getting count\n");
+			return nvec;
+		}
+	}
+
+	err = pci_alloc_irq_vectors(pdev, nvec, nvec, irq_flags);
 	if (err < 0) {
 		drm_err(&xe->drm, "MSI/MSIX: Failed to enable support %d\n", err);
 		return err;
diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c
index 0d7c5514e0..418661a889 100644
--- a/drivers/gpu/drm/xe/xe_lmtt.c
+++ b/drivers/gpu/drm/xe/xe_lmtt.c
@@ -35,7 +35,7 @@
 
 static bool xe_has_multi_level_lmtt(struct xe_device *xe)
 {
-	return xe->info.platform == XE_PVC;
+	return GRAPHICS_VERx100(xe) >= 1260;
 }
 
 static struct xe_tile *lmtt_to_tile(struct xe_lmtt *lmtt)
@@ -70,8 +70,8 @@ static struct xe_lmtt_pt *lmtt_pt_alloc(struct xe_lmtt *lmtt, unsigned int level
 				  PAGE_ALIGN(lmtt->ops->lmtt_pte_size(level) *
 					     lmtt->ops->lmtt_pte_num(level)),
 				  ttm_bo_type_kernel,
-				  XE_BO_CREATE_VRAM_IF_DGFX(lmtt_to_tile(lmtt)) |
-				  XE_BO_CREATE_PINNED_BIT);
+				  XE_BO_FLAG_VRAM_IF_DGFX(lmtt_to_tile(lmtt)) |
+				  XE_BO_NEEDS_64K | XE_BO_FLAG_PINNED);
 	if (IS_ERR(bo)) {
 		err = PTR_ERR(bo);
 		goto out_free_pt;
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 72f04a656e..d7bf7bc9dc 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -5,8 +5,11 @@
 
 #include "xe_lrc.h"
 
+#include <linux/ascii85.h>
+
 #include "instructions/xe_mi_commands.h"
 #include "instructions/xe_gfxpipe_commands.h"
+#include "instructions/xe_gfx_state_commands.h"
 #include "regs/xe_engine_regs.h"
 #include "regs/xe_gpu_commands.h"
 #include "regs/xe_lrc_layout.h"
@@ -19,15 +22,32 @@
 #include "xe_gt_printk.h"
 #include "xe_hw_fence.h"
 #include "xe_map.h"
+#include "xe_memirq.h"
+#include "xe_sriov.h"
 #include "xe_vm.h"
 
-#define LRC_VALID				(1 << 0)
-#define LRC_PRIVILEGE				(1 << 8)
-#define LRC_ADDRESSING_MODE_SHIFT		3
+#define LRC_VALID				BIT_ULL(0)
+#define LRC_PRIVILEGE				BIT_ULL(8)
+#define LRC_ADDRESSING_MODE			GENMASK_ULL(4, 3)
 #define LRC_LEGACY_64B_CONTEXT			3
 
-#define ENGINE_CLASS_SHIFT			61
-#define ENGINE_INSTANCE_SHIFT			48
+#define LRC_ENGINE_CLASS			GENMASK_ULL(63, 61)
+#define LRC_ENGINE_INSTANCE			GENMASK_ULL(53, 48)
+
+struct xe_lrc_snapshot {
+	struct xe_bo *lrc_bo;
+	void *lrc_snapshot;
+	unsigned long lrc_size, lrc_offset;
+
+	u32 context_desc;
+	u32 head;
+	struct {
+		u32 internal;
+		u32 memory;
+	} tail;
+	u32 start_seqno;
+	u32 seqno;
+};
 
 static struct xe_device *
 lrc_to_xe(struct xe_lrc *lrc)
@@ -529,6 +549,27 @@ static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
 	/* TODO: Timestamp */
 }
 
+static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe)
+{
+	struct xe_memirq *memirq = &gt_to_tile(hwe->gt)->sriov.vf.memirq;
+	struct xe_device *xe = gt_to_xe(hwe->gt);
+
+	if (!IS_SRIOV_VF(xe) || !xe_device_has_memirq(xe))
+		return;
+
+	regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM |
+					MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT;
+	regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr;
+	regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq);
+
+	regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) |
+				       MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED;
+	regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr;
+	regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq);
+	regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr;
+	regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq);
+}
+
 static int lrc_ring_mi_mode(struct xe_hw_engine *hwe)
 {
 	struct xe_device *xe = gt_to_xe(hwe->gt);
@@ -611,7 +652,7 @@ static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \
 	iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \
 	return map; \
 } \
-static inline u32 __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \
+static inline u32 __maybe_unused __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \
 { \
 	return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \
 } \
@@ -664,6 +705,7 @@ static void *empty_lrc_data(struct xe_hw_engine *hwe)
 	regs = data + LRC_PPHWSP_SIZE;
 	set_offsets(regs, reg_offsets(xe, hwe->class), hwe);
 	set_context_control(regs, hwe);
+	set_memory_based_intr(regs, hwe);
 	reset_stop_ring(regs, hwe);
 
 	return data;
@@ -700,8 +742,9 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
 	lrc->bo = xe_bo_create_pin_map(xe, tile, vm,
 				      ring_size + xe_lrc_size(xe, hwe->class),
 				      ttm_bo_type_kernel,
-				      XE_BO_CREATE_VRAM_IF_DGFX(tile) |
-				      XE_BO_CREATE_GGTT_BIT);
+				      XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+				      XE_BO_FLAG_GGTT |
+				      XE_BO_FLAG_GGTT_INVALIDATE);
 	if (IS_ERR(lrc->bo))
 		return PTR_ERR(lrc->bo);
 
@@ -752,7 +795,7 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
 		xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid);
 
 	lrc->desc = LRC_VALID;
-	lrc->desc |= LRC_LEGACY_64B_CONTEXT << LRC_ADDRESSING_MODE_SHIFT;
+	lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT);
 	/* TODO: Priority */
 
 	/* While this appears to have something about privileged batches or
@@ -762,8 +805,8 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
 		lrc->desc |= LRC_PRIVILEGE;
 
 	if (GRAPHICS_VERx100(xe) < 1250) {
-		lrc->desc |= (u64)hwe->instance << ENGINE_INSTANCE_SHIFT;
-		lrc->desc |= (u64)hwe->class << ENGINE_CLASS_SHIFT;
+		lrc->desc |= FIELD_PREP(LRC_ENGINE_INSTANCE, hwe->instance);
+		lrc->desc |= FIELD_PREP(LRC_ENGINE_CLASS, hwe->class);
 	}
 
 	arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE;
@@ -953,6 +996,20 @@ static int dump_mi_command(struct drm_printer *p,
 			drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]);
 		return numdw;
 
+	case MI_LOAD_REGISTER_MEM & MI_OPCODE:
+		drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n",
+			   inst_header,
+			   dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "",
+			   dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : "");
+		if (numdw == 4)
+			drm_printf(p, " - %#6x = %#010llx\n",
+				   dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2])));
+		else
+			drm_printf(p, " - %*ph (%s)\n",
+				   (int)sizeof(u32) * (numdw - 1), dw + 1,
+				   numdw < 4 ? "truncated" : "malformed");
+		return numdw;
+
 	case MI_FORCE_WAKEUP:
 		drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header);
 		return numdw;
@@ -996,6 +1053,8 @@ static int dump_gfxpipe_command(struct drm_printer *p,
 	MATCH(GPGPU_CSR_BASE_ADDRESS);
 	MATCH(STATE_COMPUTE_MODE);
 	MATCH3D(3DSTATE_BTD);
+	MATCH(STATE_SYSTEM_MEM_FENCE_ADDRESS);
+	MATCH(STATE_CONTEXT_DATA_BASE_ADDRESS);
 
 	MATCH3D(3DSTATE_VF_STATISTICS);
 
@@ -1020,6 +1079,7 @@ static int dump_gfxpipe_command(struct drm_printer *p,
 	MATCH3D(3DSTATE_WM);
 	MATCH3D(3DSTATE_CONSTANT_VS);
 	MATCH3D(3DSTATE_CONSTANT_GS);
+	MATCH3D(3DSTATE_CONSTANT_PS);
 	MATCH3D(3DSTATE_SAMPLE_MASK);
 	MATCH3D(3DSTATE_CONSTANT_HS);
 	MATCH3D(3DSTATE_CONSTANT_DS);
@@ -1112,6 +1172,31 @@ static int dump_gfxpipe_command(struct drm_printer *p,
 	}
 }
 
+static int dump_gfx_state_command(struct drm_printer *p,
+				  struct xe_gt *gt,
+				  u32 *dw,
+				  int remaining_dw)
+{
+	u32 numdw = instr_dw(*dw);
+	u32 opcode = REG_FIELD_GET(GFX_STATE_OPCODE, *dw);
+
+	/*
+	 * Make sure we haven't mis-parsed a number of dwords that exceeds the
+	 * remaining size of the LRC.
+	 */
+	if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
+		numdw = remaining_dw;
+
+	switch (*dw & (XE_INSTR_GFX_STATE | GFX_STATE_OPCODE)) {
+	MATCH(STATE_WRITE_INLINE);
+
+	default:
+		drm_printf(p, "[%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n",
+			   *dw, opcode, numdw);
+		return numdw;
+	}
+}
+
 void xe_lrc_dump_default(struct drm_printer *p,
 			 struct xe_gt *gt,
 			 enum xe_engine_class hwe_class)
@@ -1136,6 +1221,8 @@ void xe_lrc_dump_default(struct drm_printer *p,
 			num_dw = dump_mi_command(p, gt, dw, remaining_dw);
 		} else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) {
 			num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw);
+		} else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFX_STATE) {
+			num_dw = dump_gfx_state_command(p, gt, dw, remaining_dw);
 		} else {
 			num_dw = min(instr_dw(*dw), remaining_dw);
 			drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n",
@@ -1259,3 +1346,114 @@ void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *b
 		bb->len += num_dw;
 	}
 }
+
+struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
+{
+	struct xe_lrc_snapshot *snapshot = kmalloc(sizeof(*snapshot), GFP_NOWAIT);
+
+	if (!snapshot)
+		return NULL;
+
+	if (lrc->bo && lrc->bo->vm)
+		xe_vm_get(lrc->bo->vm);
+
+	snapshot->context_desc = xe_lrc_ggtt_addr(lrc);
+	snapshot->head = xe_lrc_ring_head(lrc);
+	snapshot->tail.internal = lrc->ring.tail;
+	snapshot->tail.memory = xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL);
+	snapshot->start_seqno = xe_lrc_start_seqno(lrc);
+	snapshot->seqno = xe_lrc_seqno(lrc);
+	snapshot->lrc_bo = xe_bo_get(lrc->bo);
+	snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc);
+	snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset;
+	snapshot->lrc_snapshot = NULL;
+	return snapshot;
+}
+
+void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot)
+{
+	struct xe_bo *bo;
+	struct xe_vm *vm;
+	struct iosys_map src;
+
+	if (!snapshot)
+		return;
+
+	bo = snapshot->lrc_bo;
+	vm = bo->vm;
+	snapshot->lrc_bo = NULL;
+
+	snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL);
+	if (!snapshot->lrc_snapshot)
+		goto put_bo;
+
+	dma_resv_lock(bo->ttm.base.resv, NULL);
+	if (!ttm_bo_vmap(&bo->ttm, &src)) {
+		xe_map_memcpy_from(xe_bo_device(bo),
+				   snapshot->lrc_snapshot, &src, snapshot->lrc_offset,
+				   snapshot->lrc_size);
+		ttm_bo_vunmap(&bo->ttm, &src);
+	} else {
+		kvfree(snapshot->lrc_snapshot);
+		snapshot->lrc_snapshot = NULL;
+	}
+	dma_resv_unlock(bo->ttm.base.resv);
+put_bo:
+	xe_bo_put(bo);
+	if (vm)
+		xe_vm_put(vm);
+}
+
+void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p)
+{
+	unsigned long i;
+
+	if (!snapshot)
+		return;
+
+	drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc);
+	drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head);
+	drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n",
+		   snapshot->tail.internal, snapshot->tail.memory);
+	drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno);
+	drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno);
+
+	if (!snapshot->lrc_snapshot)
+		return;
+
+	drm_printf(p, "\t[HWSP].length: 0x%x\n", LRC_PPHWSP_SIZE);
+	drm_puts(p, "\t[HWSP].data: ");
+	for (i = 0; i < LRC_PPHWSP_SIZE; i += sizeof(u32)) {
+		u32 *val = snapshot->lrc_snapshot + i;
+		char dumped[ASCII85_BUFSZ];
+
+		drm_puts(p, ascii85_encode(*val, dumped));
+	}
+
+	drm_printf(p, "\n\t[HWCTX].length: 0x%lx\n", snapshot->lrc_size - LRC_PPHWSP_SIZE);
+	drm_puts(p, "\t[HWCTX].data: ");
+	for (; i < snapshot->lrc_size; i += sizeof(u32)) {
+		u32 *val = snapshot->lrc_snapshot + i;
+		char dumped[ASCII85_BUFSZ];
+
+		drm_puts(p, ascii85_encode(*val, dumped));
+	}
+	drm_puts(p, "\n");
+}
+
+void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot)
+{
+	if (!snapshot)
+		return;
+
+	kvfree(snapshot->lrc_snapshot);
+	if (snapshot->lrc_bo) {
+		struct xe_vm *vm;
+
+		vm = snapshot->lrc_bo->vm;
+		xe_bo_put(snapshot->lrc_bo);
+		if (vm)
+			xe_vm_put(vm);
+	}
+	kfree(snapshot);
+}
diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
index 28b1d3f404..d32fa31faa 100644
--- a/drivers/gpu/drm/xe/xe_lrc.h
+++ b/drivers/gpu/drm/xe/xe_lrc.h
@@ -55,4 +55,9 @@ void xe_lrc_dump_default(struct drm_printer *p,
 
 void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb);
 
+struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc);
+void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot);
+void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p);
+void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot);
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h
index 7822033606..b716df0dfb 100644
--- a/drivers/gpu/drm/xe/xe_lrc_types.h
+++ b/drivers/gpu/drm/xe/xe_lrc_types.h
@@ -28,11 +28,11 @@ struct xe_lrc {
 
 	/** @ring: submission ring state */
 	struct {
-		/** @size: size of submission ring */
+		/** @ring.size: size of submission ring */
 		u32 size;
-		/** @tail: tail of submission ring */
+		/** @ring.tail: tail of submission ring */
 		u32 tail;
-		/** @old_tail: shadow of tail */
+		/** @ring.old_tail: shadow of tail */
 		u32 old_tail;
 	} ring;
 
@@ -43,4 +43,6 @@ struct xe_lrc {
 	struct xe_hw_fence_ctx fence_ctx;
 };
 
+struct xe_lrc_snapshot;
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_memirq.c b/drivers/gpu/drm/xe/xe_memirq.c
new file mode 100644
index 0000000000..95b6e9d7b7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_memirq.c
@@ -0,0 +1,431 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_guc_regs.h"
+#include "regs/xe_regs.h"
+
+#include "xe_assert.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_device_types.h"
+#include "xe_gt.h"
+#include "xe_gt_printk.h"
+#include "xe_guc.h"
+#include "xe_hw_engine.h"
+#include "xe_map.h"
+#include "xe_memirq.h"
+#include "xe_sriov.h"
+#include "xe_sriov_printk.h"
+
+#define memirq_assert(m, condition)	xe_tile_assert(memirq_to_tile(m), condition)
+#define memirq_debug(m, msg...)		xe_sriov_dbg_verbose(memirq_to_xe(m), "MEMIRQ: " msg)
+
+static struct xe_tile *memirq_to_tile(struct xe_memirq *memirq)
+{
+	return container_of(memirq, struct xe_tile, sriov.vf.memirq);
+}
+
+static struct xe_device *memirq_to_xe(struct xe_memirq *memirq)
+{
+	return tile_to_xe(memirq_to_tile(memirq));
+}
+
+static const char *guc_name(struct xe_guc *guc)
+{
+	return xe_gt_is_media_type(guc_to_gt(guc)) ? "media GuC" : "GuC";
+}
+
+/**
+ * DOC: Memory Based Interrupts
+ *
+ * MMIO register based interrupts infrastructure used for non-virtualized mode
+ * or SRIOV-8 (which supports 8 Virtual Functions) does not scale efficiently
+ * to allow delivering interrupts to a large number of Virtual machines or
+ * containers. Memory based interrupt status reporting provides an efficient
+ * and scalable infrastructure.
+ *
+ * For memory based interrupt status reporting hardware sequence is:
+ *  * Engine writes the interrupt event to memory
+ *    (Pointer to memory location is provided by SW. This memory surface must
+ *    be mapped to system memory and must be marked as un-cacheable (UC) on
+ *    Graphics IP Caches)
+ *  * Engine triggers an interrupt to host.
+ */
+
+/**
+ * DOC: Memory Based Interrupts Page Layout
+ *
+ * `Memory Based Interrupts`_ requires three different objects, which are
+ * called "page" in the specs, even if they aren't page-sized or aligned.
+ *
+ * To simplify the code we allocate a single page size object and then use
+ * offsets to embedded "pages". The address of those "pages" are then
+ * programmed in the HW via LRI and LRM in the context image.
+ *
+ * - _`Interrupt Status Report Page`: this page contains the interrupt
+ *   status vectors for each unit. Each bit in the interrupt vectors is
+ *   converted to a byte, with the byte being set to 0xFF when an
+ *   interrupt is triggered; interrupt vectors are 16b big so each unit
+ *   gets 16B. One space is reserved for each bit in one of the
+ *   GT_INTR_DWx registers, so this object needs a total of 1024B.
+ *   This object needs to be 4KiB aligned.
+ *
+ * - _`Interrupt Source Report Page`: this is the equivalent of the
+ *   GEN11_GT_INTR_DWx registers, with each bit in those registers being
+ *   mapped to a byte here. The offsets are the same, just bytes instead
+ *   of bits. This object needs to be cacheline aligned.
+ *
+ * - Interrupt Mask: the HW needs a location to fetch the interrupt
+ *   mask vector to be used by the LRM in the context, so we just use
+ *   the next available space in the interrupt page.
+ *
+ * ::
+ *
+ *   0x0000   +===========+  <== Interrupt Status Report Page
+ *            |           |
+ *            |           |     ____ +----+----------------+
+ *            |           |    /     |  0 | USER INTERRUPT |
+ *            +-----------+ __/      |  1 |                |
+ *            |  HWE(n)   | __       |    | CTX SWITCH     |
+ *            +-----------+   \      |    | WAIT SEMAPHORE |
+ *            |           |    \____ | 15 |                |
+ *            |           |          +----+----------------+
+ *            |           |
+ *   0x0400   +===========+  <== Interrupt Source Report Page
+ *            |  HWE(0)   |
+ *            |  HWE(1)   |
+ *            |           |
+ *            |  HWE(x)   |
+ *   0x0440   +===========+  <== Interrupt Enable Mask
+ *            |           |
+ *            |           |
+ *            +-----------+
+ */
+
+static void __release_xe_bo(struct drm_device *drm, void *arg)
+{
+	struct xe_bo *bo = arg;
+
+	xe_bo_unpin_map_no_vm(bo);
+}
+
+static int memirq_alloc_pages(struct xe_memirq *memirq)
+{
+	struct xe_device *xe = memirq_to_xe(memirq);
+	struct xe_tile *tile = memirq_to_tile(memirq);
+	struct xe_bo *bo;
+	int err;
+
+	BUILD_BUG_ON(!IS_ALIGNED(XE_MEMIRQ_SOURCE_OFFSET, SZ_64));
+	BUILD_BUG_ON(!IS_ALIGNED(XE_MEMIRQ_STATUS_OFFSET, SZ_4K));
+
+	/* XXX: convert to managed bo */
+	bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K,
+				  ttm_bo_type_kernel,
+				  XE_BO_FLAG_SYSTEM |
+				  XE_BO_FLAG_GGTT |
+				  XE_BO_FLAG_GGTT_INVALIDATE |
+				  XE_BO_FLAG_NEEDS_UC |
+				  XE_BO_FLAG_NEEDS_CPU_ACCESS);
+	if (IS_ERR(bo)) {
+		err = PTR_ERR(bo);
+		goto out;
+	}
+
+	memirq_assert(memirq, !xe_bo_is_vram(bo));
+	memirq_assert(memirq, !memirq->bo);
+
+	iosys_map_memset(&bo->vmap, 0, 0, SZ_4K);
+
+	memirq->bo = bo;
+	memirq->source = IOSYS_MAP_INIT_OFFSET(&bo->vmap, XE_MEMIRQ_SOURCE_OFFSET);
+	memirq->status = IOSYS_MAP_INIT_OFFSET(&bo->vmap, XE_MEMIRQ_STATUS_OFFSET);
+	memirq->mask = IOSYS_MAP_INIT_OFFSET(&bo->vmap, XE_MEMIRQ_ENABLE_OFFSET);
+
+	memirq_assert(memirq, !memirq->source.is_iomem);
+	memirq_assert(memirq, !memirq->status.is_iomem);
+	memirq_assert(memirq, !memirq->mask.is_iomem);
+
+	memirq_debug(memirq, "page offsets: source %#x status %#x\n",
+		     xe_memirq_source_ptr(memirq), xe_memirq_status_ptr(memirq));
+
+	return drmm_add_action_or_reset(&xe->drm, __release_xe_bo, memirq->bo);
+
+out:
+	xe_sriov_err(memirq_to_xe(memirq),
+		     "Failed to allocate memirq page (%pe)\n", ERR_PTR(err));
+	return err;
+}
+
+static void memirq_set_enable(struct xe_memirq *memirq, bool enable)
+{
+	iosys_map_wr(&memirq->mask, 0, u32, enable ? GENMASK(15, 0) : 0);
+
+	memirq->enabled = enable;
+}
+
+/**
+ * xe_memirq_init - Initialize data used by `Memory Based Interrupts`_.
+ * @memirq: the &xe_memirq to initialize
+ *
+ * Allocate `Interrupt Source Report Page`_ and `Interrupt Status Report Page`_
+ * used by `Memory Based Interrupts`_.
+ *
+ * These allocations are managed and will be implicitly released on unload.
+ *
+ * Note: This function shall be called only by the VF driver.
+ *
+ * If this function fails then VF driver won't be able to operate correctly.
+ * If `Memory Based Interrupts`_ are not used this function will return 0.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_memirq_init(struct xe_memirq *memirq)
+{
+	struct xe_device *xe = memirq_to_xe(memirq);
+	int err;
+
+	memirq_assert(memirq, IS_SRIOV_VF(xe));
+
+	if (!xe_device_has_memirq(xe))
+		return 0;
+
+	err = memirq_alloc_pages(memirq);
+	if (unlikely(err))
+		return err;
+
+	/* we need to start with all irqs enabled */
+	memirq_set_enable(memirq, true);
+
+	return 0;
+}
+
+/**
+ * xe_memirq_source_ptr - Get GGTT's offset of the `Interrupt Source Report Page`_.
+ * @memirq: the &xe_memirq to query
+ *
+ * Shall be called only on VF driver when `Memory Based Interrupts`_ are used
+ * and xe_memirq_init() didn't fail.
+ *
+ * Return: GGTT's offset of the `Interrupt Source Report Page`_.
+ */
+u32 xe_memirq_source_ptr(struct xe_memirq *memirq)
+{
+	memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq)));
+	memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq)));
+	memirq_assert(memirq, memirq->bo);
+
+	return xe_bo_ggtt_addr(memirq->bo) + XE_MEMIRQ_SOURCE_OFFSET;
+}
+
+/**
+ * xe_memirq_status_ptr - Get GGTT's offset of the `Interrupt Status Report Page`_.
+ * @memirq: the &xe_memirq to query
+ *
+ * Shall be called only on VF driver when `Memory Based Interrupts`_ are used
+ * and xe_memirq_init() didn't fail.
+ *
+ * Return: GGTT's offset of the `Interrupt Status Report Page`_.
+ */
+u32 xe_memirq_status_ptr(struct xe_memirq *memirq)
+{
+	memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq)));
+	memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq)));
+	memirq_assert(memirq, memirq->bo);
+
+	return xe_bo_ggtt_addr(memirq->bo) + XE_MEMIRQ_STATUS_OFFSET;
+}
+
+/**
+ * xe_memirq_enable_ptr - Get GGTT's offset of the Interrupt Enable Mask.
+ * @memirq: the &xe_memirq to query
+ *
+ * Shall be called only on VF driver when `Memory Based Interrupts`_ are used
+ * and xe_memirq_init() didn't fail.
+ *
+ * Return: GGTT's offset of the Interrupt Enable Mask.
+ */
+u32 xe_memirq_enable_ptr(struct xe_memirq *memirq)
+{
+	memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq)));
+	memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq)));
+	memirq_assert(memirq, memirq->bo);
+
+	return xe_bo_ggtt_addr(memirq->bo) + XE_MEMIRQ_ENABLE_OFFSET;
+}
+
+/**
+ * xe_memirq_init_guc - Prepare GuC for `Memory Based Interrupts`_.
+ * @memirq: the &xe_memirq
+ * @guc: the &xe_guc to setup
+ *
+ * Register `Interrupt Source Report Page`_ and `Interrupt Status Report Page`_
+ * to be used by the GuC when `Memory Based Interrupts`_ are required.
+ *
+ * Shall be called only on VF driver when `Memory Based Interrupts`_ are used
+ * and xe_memirq_init() didn't fail.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_memirq_init_guc(struct xe_memirq *memirq, struct xe_guc *guc)
+{
+	bool is_media = xe_gt_is_media_type(guc_to_gt(guc));
+	u32 offset = is_media ? ilog2(INTR_MGUC) : ilog2(INTR_GUC);
+	u32 source, status;
+	int err;
+
+	memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq)));
+	memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq)));
+	memirq_assert(memirq, memirq->bo);
+
+	source = xe_memirq_source_ptr(memirq) + offset;
+	status = xe_memirq_status_ptr(memirq) + offset * SZ_16;
+
+	err = xe_guc_self_cfg64(guc, GUC_KLV_SELF_CFG_MEMIRQ_SOURCE_ADDR_KEY,
+				source);
+	if (unlikely(err))
+		goto failed;
+
+	err = xe_guc_self_cfg64(guc, GUC_KLV_SELF_CFG_MEMIRQ_STATUS_ADDR_KEY,
+				status);
+	if (unlikely(err))
+		goto failed;
+
+	return 0;
+
+failed:
+	xe_sriov_err(memirq_to_xe(memirq),
+		     "Failed to setup report pages in %s (%pe)\n",
+		     guc_name(guc), ERR_PTR(err));
+	return err;
+}
+
+/**
+ * xe_memirq_reset - Disable processing of `Memory Based Interrupts`_.
+ * @memirq: struct xe_memirq
+ *
+ * This is part of the driver IRQ setup flow.
+ *
+ * This function shall only be used by the VF driver on platforms that use
+ * `Memory Based Interrupts`_.
+ */
+void xe_memirq_reset(struct xe_memirq *memirq)
+{
+	memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq)));
+	memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq)));
+
+	if (memirq->bo)
+		memirq_set_enable(memirq, false);
+}
+
+/**
+ * xe_memirq_postinstall - Enable processing of `Memory Based Interrupts`_.
+ * @memirq: the &xe_memirq
+ *
+ * This is part of the driver IRQ setup flow.
+ *
+ * This function shall only be used by the VF driver on platforms that use
+ * `Memory Based Interrupts`_.
+ */
+void xe_memirq_postinstall(struct xe_memirq *memirq)
+{
+	memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq)));
+	memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq)));
+
+	if (memirq->bo)
+		memirq_set_enable(memirq, true);
+}
+
+static bool memirq_received(struct xe_memirq *memirq, struct iosys_map *vector,
+			    u16 offset, const char *name)
+{
+	u8 value;
+
+	value = iosys_map_rd(vector, offset, u8);
+	if (value) {
+		if (value != 0xff)
+			xe_sriov_err_ratelimited(memirq_to_xe(memirq),
+						 "Unexpected memirq value %#x from %s at %u\n",
+						 value, name, offset);
+		iosys_map_wr(vector, offset, u8, 0x00);
+	}
+
+	return value;
+}
+
+static void memirq_dispatch_engine(struct xe_memirq *memirq, struct iosys_map *status,
+				   struct xe_hw_engine *hwe)
+{
+	memirq_debug(memirq, "STATUS %s %*ph\n", hwe->name, 16, status->vaddr);
+
+	if (memirq_received(memirq, status, ilog2(GT_RENDER_USER_INTERRUPT), hwe->name))
+		xe_hw_engine_handle_irq(hwe, GT_RENDER_USER_INTERRUPT);
+}
+
+static void memirq_dispatch_guc(struct xe_memirq *memirq, struct iosys_map *status,
+				struct xe_guc *guc)
+{
+	const char *name = guc_name(guc);
+
+	memirq_debug(memirq, "STATUS %s %*ph\n", name, 16, status->vaddr);
+
+	if (memirq_received(memirq, status, ilog2(GUC_INTR_GUC2HOST), name))
+		xe_guc_irq_handler(guc, GUC_INTR_GUC2HOST);
+}
+
+/**
+ * xe_memirq_handler - The `Memory Based Interrupts`_ Handler.
+ * @memirq: the &xe_memirq
+ *
+ * This function reads and dispatches `Memory Based Interrupts`.
+ */
+void xe_memirq_handler(struct xe_memirq *memirq)
+{
+	struct xe_device *xe = memirq_to_xe(memirq);
+	struct xe_tile *tile = memirq_to_tile(memirq);
+	struct xe_hw_engine *hwe;
+	enum xe_hw_engine_id id;
+	struct iosys_map map;
+	unsigned int gtid;
+	struct xe_gt *gt;
+
+	if (!memirq->bo)
+		return;
+
+	memirq_assert(memirq, !memirq->source.is_iomem);
+	memirq_debug(memirq, "SOURCE %*ph\n", 32, memirq->source.vaddr);
+	memirq_debug(memirq, "SOURCE %*ph\n", 32, memirq->source.vaddr + 32);
+
+	for_each_gt(gt, xe, gtid) {
+		if (gt->tile != tile)
+			continue;
+
+		for_each_hw_engine(hwe, gt, id) {
+			if (memirq_received(memirq, &memirq->source, hwe->irq_offset, "SRC")) {
+				map = IOSYS_MAP_INIT_OFFSET(&memirq->status,
+							    hwe->irq_offset * SZ_16);
+				memirq_dispatch_engine(memirq, &map, hwe);
+			}
+		}
+	}
+
+	/* GuC and media GuC (if present) must be checked separately */
+
+	if (memirq_received(memirq, &memirq->source, ilog2(INTR_GUC), "SRC")) {
+		map = IOSYS_MAP_INIT_OFFSET(&memirq->status, ilog2(INTR_GUC) * SZ_16);
+		memirq_dispatch_guc(memirq, &map, &tile->primary_gt->uc.guc);
+	}
+
+	if (!tile->media_gt)
+		return;
+
+	if (memirq_received(memirq, &memirq->source, ilog2(INTR_MGUC), "SRC")) {
+		map = IOSYS_MAP_INIT_OFFSET(&memirq->status, ilog2(INTR_MGUC) * SZ_16);
+		memirq_dispatch_guc(memirq, &map, &tile->media_gt->uc.guc);
+	}
+}
diff --git a/drivers/gpu/drm/xe/xe_memirq.h b/drivers/gpu/drm/xe/xe_memirq.h
new file mode 100644
index 0000000000..2d40d03c30
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_memirq.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_MEMIRQ_H_
+#define _XE_MEMIRQ_H_
+
+#include <linux/types.h>
+
+struct xe_guc;
+struct xe_memirq;
+
+int xe_memirq_init(struct xe_memirq *memirq);
+
+u32 xe_memirq_source_ptr(struct xe_memirq *memirq);
+u32 xe_memirq_status_ptr(struct xe_memirq *memirq);
+u32 xe_memirq_enable_ptr(struct xe_memirq *memirq);
+
+void xe_memirq_reset(struct xe_memirq *memirq);
+void xe_memirq_postinstall(struct xe_memirq *memirq);
+void xe_memirq_handler(struct xe_memirq *memirq);
+
+int xe_memirq_init_guc(struct xe_memirq *memirq, struct xe_guc *guc);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_memirq_types.h b/drivers/gpu/drm/xe/xe_memirq_types.h
new file mode 100644
index 0000000000..625b6b8736
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_memirq_types.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_MEMIRQ_TYPES_H_
+#define _XE_MEMIRQ_TYPES_H_
+
+#include <linux/iosys-map.h>
+
+struct xe_bo;
+
+/* ISR */
+#define XE_MEMIRQ_STATUS_OFFSET		0x0
+/* IIR */
+#define XE_MEMIRQ_SOURCE_OFFSET		0x400
+/* IMR */
+#define XE_MEMIRQ_ENABLE_OFFSET		0x440
+
+/**
+ * struct xe_memirq - Data used by the `Memory Based Interrupts`_.
+ *
+ * @bo: buffer object with `Memory Based Interrupts Page Layout`_.
+ * @source: iosys pointer to `Interrupt Source Report Page`_.
+ * @status: iosys pointer to `Interrupt Status Report Page`_.
+ * @mask: iosys pointer to Interrupt Enable Mask.
+ * @enabled: internal flag used to control processing of the interrupts.
+ */
+struct xe_memirq {
+	struct xe_bo *bo;
+	struct iosys_map source;
+	struct iosys_map status;
+	struct iosys_map mask;
+	bool enabled;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 7a6ad3469d..198f5c2189 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -12,9 +12,11 @@
 #include <drm/ttm/ttm_tt.h>
 #include <drm/xe_drm.h>
 
-#include "generated/xe_wa_oob.h"
+#include <generated/xe_wa_oob.h>
+
 #include "instructions/xe_mi_commands.h"
 #include "regs/xe_gpu_commands.h"
+#include "regs/xe_gtt_defs.h"
 #include "tests/xe_test.h"
 #include "xe_assert.h"
 #include "xe_bb.h"
@@ -32,7 +34,6 @@
 #include "xe_sync.h"
 #include "xe_trace.h"
 #include "xe_vm.h"
-#include "xe_wa.h"
 
 /**
  * struct xe_migrate - migrate context.
@@ -71,6 +72,16 @@ struct xe_migrate {
 #define NUM_KERNEL_PDE 17
 #define NUM_PT_SLOTS 32
 #define LEVEL0_PAGE_TABLE_ENCODE_SIZE SZ_2M
+#define MAX_NUM_PTE 512
+
+/*
+ * Although MI_STORE_DATA_IMM's "length" field is 10-bits, 0x3FE is the largest
+ * legal value accepted.  Since that instruction field is always stored in
+ * (val-2) format, this translates to 0x400 dwords for the true maximum length
+ * of the instruction.  Subtracting the instruction header (1 dword) and
+ * address (2 dwords), that leaves 0x3FD dwords (0x1FE qwords) for PTE values.
+ */
+#define MAX_PTE_PER_SDI 0x1FE
 
 /**
  * xe_tile_migrate_engine() - Get this tile's migrate engine.
@@ -144,8 +155,8 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 	bo = xe_bo_create_pin_map(vm->xe, tile, vm,
 				  num_entries * XE_PAGE_SIZE,
 				  ttm_bo_type_kernel,
-				  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
-				  XE_BO_CREATE_PINNED_BIT);
+				  XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+				  XE_BO_FLAG_PINNED);
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
 
@@ -288,10 +299,6 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 }
 
 /*
- * Due to workaround 16017236439, odd instance hardware copy engines are
- * faster than even instance ones.
- * This function returns the mask involving all fast copy engines and the
- * reserved copy engine to be used as logical mask for migrate engine.
  * Including the reserved copy engine is required to avoid deadlocks due to
  * migrate jobs servicing the faults gets stuck behind the job that faulted.
  */
@@ -305,8 +312,7 @@ static u32 xe_migrate_usm_logical_mask(struct xe_gt *gt)
 		if (hwe->class != XE_ENGINE_CLASS_COPY)
 			continue;
 
-		if (!XE_WA(gt, 16017236439) ||
-		    xe_gt_is_usm_hwe(gt, hwe) || hwe->instance & 1)
+		if (xe_gt_is_usm_hwe(gt, hwe))
 			logical_mask |= BIT(hwe->logical_instance);
 	}
 
@@ -357,10 +363,14 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
 		if (!hwe || !logical_mask)
 			return ERR_PTR(-EINVAL);
 
+		/*
+		 * XXX: Currently only reserving 1 (likely slow) BCS instance on
+		 * PVC, may want to revisit if performance is needed.
+		 */
 		m->q = xe_exec_queue_create(xe, vm, logical_mask, 1, hwe,
 					    EXEC_QUEUE_FLAG_KERNEL |
 					    EXEC_QUEUE_FLAG_PERMANENT |
-					    EXEC_QUEUE_FLAG_HIGH_PRIORITY);
+					    EXEC_QUEUE_FLAG_HIGH_PRIORITY, 0);
 	} else {
 		m->q = xe_exec_queue_create_class(xe, primary_gt, vm,
 						  XE_ENGINE_CLASS_COPY,
@@ -451,13 +461,13 @@ static u32 pte_update_size(struct xe_migrate *m,
 	} else {
 		/* Clip L0 to available size */
 		u64 size = min(*L0, (u64)avail_pts * SZ_2M);
-		u64 num_4k_pages = DIV_ROUND_UP(size, XE_PAGE_SIZE);
+		u32 num_4k_pages = (size + XE_PAGE_SIZE - 1) >> XE_PTE_SHIFT;
 
 		*L0 = size;
 		*L0_ofs = xe_migrate_vm_addr(pt_ofs, 0);
 
 		/* MI_STORE_DATA_IMM */
-		cmds += 3 * DIV_ROUND_UP(num_4k_pages, 0x1ff);
+		cmds += 3 * DIV_ROUND_UP(num_4k_pages, MAX_PTE_PER_SDI);
 
 		/* PDE qwords */
 		cmds += num_4k_pages * 2;
@@ -492,7 +502,7 @@ static void emit_pte(struct xe_migrate *m,
 	ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE);
 
 	while (ptes) {
-		u32 chunk = min(0x1ffU, ptes);
+		u32 chunk = min(MAX_PTE_PER_SDI, ptes);
 
 		bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk);
 		bb->cs[bb->len++] = ofs;
@@ -973,7 +983,6 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 	struct xe_res_cursor src_it;
 	struct ttm_resource *src = dst;
 	int err;
-	int pass = 0;
 
 	if (!clear_vram)
 		xe_res_first_sg(xe_bo_sg(bo), 0, bo->size, &src_it);
@@ -994,8 +1003,6 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 
 		clear_L0 = xe_migrate_res_sizes(m, &src_it);
 
-		drm_dbg(&xe->drm, "Pass %u, size: %llu\n", pass++, clear_L0);
-
 		/* Calculate final sizes and batch size.. */
 		batch_size = 2 +
 			pte_update_size(m, clear_vram, src, &src_it,
@@ -1111,7 +1118,7 @@ static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs,
 	 * This shouldn't be possible in practice.. might change when 16K
 	 * pages are used. Hence the assert.
 	 */
-	xe_tile_assert(tile, update->qwords <= 0x1ff);
+	xe_tile_assert(tile, update->qwords < MAX_NUM_PTE);
 	if (!ppgtt_ofs)
 		ppgtt_ofs = xe_migrate_vram_ofs(tile_to_xe(tile),
 						xe_bo_addr(update->pt_bo, 0,
@@ -1120,7 +1127,7 @@ static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs,
 	do {
 		u64 addr = ppgtt_ofs + ofs * 8;
 
-		chunk = min(update->qwords, 0x1ffU);
+		chunk = min(size, MAX_PTE_PER_SDI);
 
 		/* Ensure populatefn can do memset64 by aligning bb->cs */
 		if (!(bb->len & 1))
@@ -1299,7 +1306,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 		batch_size = 6 + num_updates * 2;
 
 	for (i = 0; i < num_updates; i++) {
-		u32 num_cmds = DIV_ROUND_UP(updates[i].qwords, 0x1ff);
+		u32 num_cmds = DIV_ROUND_UP(updates[i].qwords, MAX_PTE_PER_SDI);
 
 		/* align noop + MI_STORE_DATA_IMM cmd prefix */
 		batch_size += 4 * num_cmds + updates[i].qwords * 2;
@@ -1327,7 +1334,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 						 GFP_KERNEL, true, 0);
 			if (IS_ERR(sa_bo)) {
 				err = PTR_ERR(sa_bo);
-				goto err;
+				goto err_bb;
 			}
 
 			ppgtt_ofs = NUM_KERNEL_PDE +
@@ -1378,7 +1385,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 					 update_idx);
 	if (IS_ERR(job)) {
 		err = PTR_ERR(job);
-		goto err_bb;
+		goto err_sa;
 	}
 
 	/* Wait on BO move */
@@ -1427,12 +1434,12 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 
 err_job:
 	xe_sched_job_put(job);
+err_sa:
+	drm_suballoc_free(sa_bo, NULL);
 err_bb:
 	if (!q)
 		mutex_unlock(&m->job_mutex);
 	xe_bb_free(bb, NULL);
-err:
-	drm_suballoc_free(sa_bo, NULL);
 	return ERR_PTR(err);
 }
 
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 02f7808f28..334637511e 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -20,6 +20,7 @@
 #include "xe_gt_mcr.h"
 #include "xe_macros.h"
 #include "xe_module.h"
+#include "xe_sriov.h"
 #include "xe_tile.h"
 
 #define XEHP_MTCFG_ADDR		XE_REG(0x101800)
@@ -162,6 +163,42 @@ static int xe_determine_lmem_bar_size(struct xe_device *xe)
 	return 0;
 }
 
+static inline u64 get_flat_ccs_offset(struct xe_gt *gt, u64 tile_size)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	u64 offset;
+	u32 reg;
+
+	if (GRAPHICS_VER(xe) >= 20) {
+		u64 ccs_size = tile_size / 512;
+		u64 offset_hi, offset_lo;
+		u32 nodes, num_enabled;
+
+		reg = xe_mmio_read32(gt, MIRROR_FUSE3);
+		nodes = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, reg);
+		num_enabled = hweight32(nodes); /* Number of enabled l3 nodes */
+
+		reg = xe_gt_mcr_unicast_read_any(gt, XE2_FLAT_CCS_BASE_RANGE_LOWER);
+		offset_lo = REG_FIELD_GET(XE2_FLAT_CCS_BASE_LOWER_ADDR_MASK, reg);
+
+		reg = xe_gt_mcr_unicast_read_any(gt, XE2_FLAT_CCS_BASE_RANGE_UPPER);
+		offset_hi = REG_FIELD_GET(XE2_FLAT_CCS_BASE_UPPER_ADDR_MASK, reg);
+
+		offset = offset_hi << 32; /* HW view bits 39:32 */
+		offset |= offset_lo << 6; /* HW view bits 31:6 */
+		offset *= num_enabled; /* convert to SW view */
+
+		/* We don't expect any holes */
+		xe_assert_msg(xe, offset == (xe_mmio_read64_2x32(gt, GSMBASE) - ccs_size),
+			      "Hole between CCS and GSM.\n");
+	} else {
+		reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR);
+		offset = (u64)REG_FIELD_GET(XEHP_FLAT_CCS_PTR, reg) * SZ_64K;
+	}
+
+	return offset;
+}
+
 /**
  * xe_mmio_tile_vram_size() - Collect vram size and offset information
  * @tile: tile to get info for
@@ -206,8 +243,7 @@ static int xe_mmio_tile_vram_size(struct xe_tile *tile, u64 *vram_size,
 
 	/* minus device usage */
 	if (xe->info.has_flat_ccs) {
-		reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR);
-		offset = (u64)REG_FIELD_GET(GENMASK(31, 8), reg) * SZ_64K;
+		offset = get_flat_ccs_offset(gt, *tile_size);
 	} else {
 		offset = xe_mmio_read64_2x32(gt, GSMBASE);
 	}
@@ -359,26 +395,9 @@ static void mmio_fini(struct drm_device *drm, void *arg)
 		iounmap(xe->mem.vram.mapping);
 }
 
-static int xe_verify_lmem_ready(struct xe_device *xe)
-{
-	struct xe_gt *gt = xe_root_mmio_gt(xe);
-
-	/*
-	 * The boot firmware initializes local memory and assesses its health.
-	 * If memory training fails, the punit will have been instructed to
-	 * keep the GT powered down; we won't be able to communicate with it
-	 * and we should not continue with driver initialization.
-	 */
-	if (IS_DGFX(xe) && !(xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT)) {
-		drm_err(&xe->drm, "VRAM not initialized by firmware\n");
-		return -ENODEV;
-	}
-
-	return 0;
-}
-
 int xe_mmio_init(struct xe_device *xe)
 {
+	struct xe_tile *root_tile = xe_device_get_root_tile(xe);
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
 	const int mmio_bar = 0;
 
@@ -394,23 +413,83 @@ int xe_mmio_init(struct xe_device *xe)
 		return -EIO;
 	}
 
+	/* Setup first tile; other tiles (if present) will be setup later. */
+	root_tile->mmio.size = SZ_16M;
+	root_tile->mmio.regs = xe->mmio.regs;
+
 	return drmm_add_action_or_reset(&xe->drm, mmio_fini, xe);
 }
 
-int xe_mmio_root_tile_init(struct xe_device *xe)
+u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg)
 {
-	struct xe_tile *root_tile = xe_device_get_root_tile(xe);
-	int err;
+	struct xe_tile *tile = gt_to_tile(gt);
 
-	/* Setup first tile; other tiles (if present) will be setup later. */
-	root_tile->mmio.size = SZ_16M;
-	root_tile->mmio.regs = xe->mmio.regs;
+	if (reg.addr < gt->mmio.adj_limit)
+		reg.addr += gt->mmio.adj_offset;
 
-	err = xe_verify_lmem_ready(xe);
-	if (err)
-		return err;
+	return readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr);
+}
 
-	return 0;
+u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg)
+{
+	struct xe_tile *tile = gt_to_tile(gt);
+
+	if (reg.addr < gt->mmio.adj_limit)
+		reg.addr += gt->mmio.adj_offset;
+
+	return readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr);
+}
+
+void xe_mmio_write32(struct xe_gt *gt, struct xe_reg reg, u32 val)
+{
+	struct xe_tile *tile = gt_to_tile(gt);
+
+	if (reg.addr < gt->mmio.adj_limit)
+		reg.addr += gt->mmio.adj_offset;
+
+	writel(val, (reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr);
+}
+
+u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg)
+{
+	struct xe_tile *tile = gt_to_tile(gt);
+
+	if (reg.addr < gt->mmio.adj_limit)
+		reg.addr += gt->mmio.adj_offset;
+
+	return readl((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr);
+}
+
+u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, u32 set)
+{
+	u32 old, reg_val;
+
+	old = xe_mmio_read32(gt, reg);
+	reg_val = (old & ~clr) | set;
+	xe_mmio_write32(gt, reg, reg_val);
+
+	return old;
+}
+
+int xe_mmio_write32_and_verify(struct xe_gt *gt,
+			       struct xe_reg reg, u32 val, u32 mask, u32 eval)
+{
+	u32 reg_val;
+
+	xe_mmio_write32(gt, reg, val);
+	reg_val = xe_mmio_read32(gt, reg);
+
+	return (reg_val & mask) != eval ? -EINVAL : 0;
+}
+
+bool xe_mmio_in_range(const struct xe_gt *gt,
+		      const struct xe_mmio_range *range,
+		      struct xe_reg reg)
+{
+	if (reg.addr < gt->mmio.adj_limit)
+		reg.addr += gt->mmio.adj_offset;
+
+	return range && reg.addr >= range->start && reg.addr <= range->end;
 }
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
index 98de5c13c8..a3cd7b3036 100644
--- a/drivers/gpu/drm/xe/xe_mmio.h
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -21,83 +21,15 @@ struct xe_device;
 #define LMEM_BAR		2
 
 int xe_mmio_init(struct xe_device *xe);
-int xe_mmio_root_tile_init(struct xe_device *xe);
 void xe_mmio_probe_tiles(struct xe_device *xe);
 
-static inline u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg)
-{
-	struct xe_tile *tile = gt_to_tile(gt);
-
-	if (reg.addr < gt->mmio.adj_limit)
-		reg.addr += gt->mmio.adj_offset;
-
-	return readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr);
-}
-
-static inline u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg)
-{
-	struct xe_tile *tile = gt_to_tile(gt);
-
-	if (reg.addr < gt->mmio.adj_limit)
-		reg.addr += gt->mmio.adj_offset;
-
-	return readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr);
-}
-
-static inline void xe_mmio_write32(struct xe_gt *gt,
-				   struct xe_reg reg, u32 val)
-{
-	struct xe_tile *tile = gt_to_tile(gt);
-
-	if (reg.addr < gt->mmio.adj_limit)
-		reg.addr += gt->mmio.adj_offset;
-
-	writel(val, (reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr);
-}
-
-static inline u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg)
-{
-	struct xe_tile *tile = gt_to_tile(gt);
-
-	if (reg.addr < gt->mmio.adj_limit)
-		reg.addr += gt->mmio.adj_offset;
-
-	return readl((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr);
-}
-
-static inline u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr,
-				u32 set)
-{
-	u32 old, reg_val;
-
-	old = xe_mmio_read32(gt, reg);
-	reg_val = (old & ~clr) | set;
-	xe_mmio_write32(gt, reg, reg_val);
-
-	return old;
-}
-
-static inline int xe_mmio_write32_and_verify(struct xe_gt *gt,
-					     struct xe_reg reg, u32 val,
-					     u32 mask, u32 eval)
-{
-	u32 reg_val;
-
-	xe_mmio_write32(gt, reg, val);
-	reg_val = xe_mmio_read32(gt, reg);
-
-	return (reg_val & mask) != eval ? -EINVAL : 0;
-}
-
-static inline bool xe_mmio_in_range(const struct xe_gt *gt,
-				    const struct xe_mmio_range *range,
-				    struct xe_reg reg)
-{
-	if (reg.addr < gt->mmio.adj_limit)
-		reg.addr += gt->mmio.adj_offset;
-
-	return range && reg.addr >= range->start && reg.addr <= range->end;
-}
+u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg);
+u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg);
+void xe_mmio_write32(struct xe_gt *gt, struct xe_reg reg, u32 val);
+u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg);
+u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, u32 set);
+int xe_mmio_write32_and_verify(struct xe_gt *gt, struct xe_reg reg, u32 val, u32 mask, u32 eval);
+bool xe_mmio_in_range(const struct xe_gt *gt, const struct xe_mmio_range *range, struct xe_reg reg);
 
 int xe_mmio_probe_vram(struct xe_device *xe);
 u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg);
diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index ef79552e4f..1e92f8ee07 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -13,13 +13,14 @@
 #include "xe_gt_mcr.h"
 #include "xe_mmio.h"
 #include "xe_platform_types.h"
+#include "xe_sriov.h"
 #include "xe_step_types.h"
 
 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
-#define mocs_dbg drm_dbg
+#define mocs_dbg xe_gt_dbg
 #else
 __printf(2, 3)
-static inline void mocs_dbg(const struct drm_device *dev,
+static inline void mocs_dbg(const struct xe_gt *gt,
 			    const char *format, ...)
 { /* noop */ }
 #endif
@@ -71,7 +72,7 @@ struct xe_mocs_info {
 /* Helper defines */
 #define XELP_NUM_MOCS_ENTRIES	64  /* 63-64 are reserved, but configured. */
 #define PVC_NUM_MOCS_ENTRIES	3
-#define MTL_NUM_MOCS_ENTRIES    16
+#define MTL_NUM_MOCS_ENTRIES	16
 #define XE2_NUM_MOCS_ENTRIES	16
 
 /* (e)LLC caching options */
@@ -290,18 +291,6 @@ static const struct xe_mocs_entry dg2_mocs_desc[] = {
 	MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
 };
 
-static const struct xe_mocs_entry dg2_mocs_desc_g10_ax[] = {
-	/* Wa_14011441408: Set Go to Memory for MOCS#0 */
-	MOCS_ENTRY(0, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
-	/* UC - Coherent; GO:Memory */
-	MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
-	/* UC - Non-Coherent; GO:Memory */
-	MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)),
-
-	/* WB - LC */
-	MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
-};
-
 static const struct xe_mocs_entry pvc_mocs_desc[] = {
 	/* Error */
 	MOCS_ENTRY(0, 0, L3_3_WB),
@@ -386,6 +375,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
 
 	switch (xe->info.platform) {
 	case XE_LUNARLAKE:
+	case XE_BATTLEMAGE:
 		info->size = ARRAY_SIZE(xe2_mocs_table);
 		info->table = xe2_mocs_table;
 		info->n_entries = XE2_NUM_MOCS_ENTRIES;
@@ -409,17 +399,14 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
 		info->unused_entries_index = 1;
 		break;
 	case XE_DG2:
-		if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G10 &&
-		    xe->info.step.graphics >= STEP_A0 &&
-		    xe->info.step.graphics <= STEP_B0) {
-			info->size = ARRAY_SIZE(dg2_mocs_desc_g10_ax);
-			info->table = dg2_mocs_desc_g10_ax;
-		} else {
-			info->size = ARRAY_SIZE(dg2_mocs_desc);
-			info->table = dg2_mocs_desc;
-		}
+		info->size = ARRAY_SIZE(dg2_mocs_desc);
+		info->table = dg2_mocs_desc;
 		info->uc_index = 1;
-		info->n_entries = XELP_NUM_MOCS_ENTRIES;
+		/*
+		 * Last entry is RO on hardware, don't bother with what was
+		 * written when checking later
+		 */
+		info->n_entries = XELP_NUM_MOCS_ENTRIES - 1;
 		info->unused_entries_index = 3;
 		break;
 	case XE_DG1:
@@ -480,24 +467,34 @@ static u32 get_entry_control(const struct xe_mocs_info *info,
 	return info->table[info->unused_entries_index].control_value;
 }
 
-static void __init_mocs_table(struct xe_gt *gt,
-			      const struct xe_mocs_info *info)
+static bool regs_are_mcr(struct xe_gt *gt)
 {
 	struct xe_device *xe = gt_to_xe(gt);
 
+	if (xe_gt_is_media_type(gt))
+		return MEDIA_VER(xe) >= 20;
+	else
+		return GRAPHICS_VERx100(xe) >= 1250;
+}
+
+static void __init_mocs_table(struct xe_gt *gt,
+			      const struct xe_mocs_info *info)
+{
 	unsigned int i;
 	u32 mocs;
 
-	mocs_dbg(&gt_to_xe(gt)->drm, "entries:%d\n", info->n_entries);
-	drm_WARN_ONCE(&xe->drm, !info->unused_entries_index,
-		      "Unused entries index should have been defined\n");
-	for (i = 0;
-	     i < info->n_entries ? (mocs = get_entry_control(info, i)), 1 : 0;
-	     i++) {
-		mocs_dbg(&gt_to_xe(gt)->drm, "GLOB_MOCS[%d] 0x%x 0x%x\n", i,
+	xe_gt_WARN_ONCE(gt, !info->unused_entries_index,
+			"Unused entries index should have been defined\n");
+
+	mocs_dbg(gt, "mocs entries: %d\n", info->n_entries);
+
+	for (i = 0; i < info->n_entries; i++) {
+		mocs = get_entry_control(info, i);
+
+		mocs_dbg(gt, "GLOB_MOCS[%d] 0x%x 0x%x\n", i,
 			 XELP_GLOBAL_MOCS(i).addr, mocs);
 
-		if (GRAPHICS_VERx100(gt_to_xe(gt)) > 1250)
+		if (regs_are_mcr(gt))
 			xe_gt_mcr_multicast_write(gt, XEHP_GLOBAL_MOCS(i), mocs);
 		else
 			xe_mmio_write32(gt, XELP_GLOBAL_MOCS(i), mocs);
@@ -528,16 +525,16 @@ static void init_l3cc_table(struct xe_gt *gt,
 	unsigned int i;
 	u32 l3cc;
 
-	mocs_dbg(&gt_to_xe(gt)->drm, "entries:%d\n", info->n_entries);
-	for (i = 0;
-	     i < (info->n_entries + 1) / 2 ?
-	     (l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i),
-				  get_entry_l3cc(info, 2 * i + 1))), 1 : 0;
-	     i++) {
-		mocs_dbg(&gt_to_xe(gt)->drm, "LNCFCMOCS[%d] 0x%x 0x%x\n", i, XELP_LNCFCMOCS(i).addr,
-			 l3cc);
+	mocs_dbg(gt, "l3cc entries: %d\n", info->n_entries);
+
+	for (i = 0; i < (info->n_entries + 1) / 2; i++) {
+		l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i),
+				    get_entry_l3cc(info, 2 * i + 1));
 
-		if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250)
+		mocs_dbg(gt, "LNCFCMOCS[%d] 0x%x 0x%x\n", i,
+			 XELP_LNCFCMOCS(i).addr, l3cc);
+
+		if (regs_are_mcr(gt))
 			xe_gt_mcr_multicast_write(gt, XEHP_LNCFCMOCS(i), l3cc);
 		else
 			xe_mmio_write32(gt, XELP_LNCFCMOCS(i), l3cc);
@@ -558,6 +555,9 @@ void xe_mocs_init(struct xe_gt *gt)
 	struct xe_mocs_info table;
 	unsigned int flags;
 
+	if (IS_SRIOV_VF(gt_to_xe(gt)))
+		return;
+
 	/*
 	 * MOCS settings are split between "GLOB_MOCS" and/or "LNCFCMOCS"
 	 * registers depending on platform.
@@ -567,7 +567,10 @@ void xe_mocs_init(struct xe_gt *gt)
 	 * performed by the GuC.
 	 */
 	flags = get_mocs_settings(gt_to_xe(gt), &table);
-	mocs_dbg(&gt_to_xe(gt)->drm, "flag:0x%x\n", flags);
+	mocs_dbg(gt, "flag:0x%x\n", flags);
+
+	if (IS_SRIOV_VF(gt_to_xe(gt)))
+		return;
 
 	if (flags & HAS_GLOBAL_MOCS)
 		__init_mocs_table(gt, &table);
diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
index 110b698646..ceb8345cbc 100644
--- a/drivers/gpu/drm/xe/xe_module.c
+++ b/drivers/gpu/drm/xe/xe_module.c
@@ -48,6 +48,13 @@ module_param_named_unsafe(force_probe, xe_modparam.force_probe, charp, 0400);
 MODULE_PARM_DESC(force_probe,
 		 "Force probe options for specified devices. See CONFIG_DRM_XE_FORCE_PROBE for details.");
 
+#ifdef CONFIG_PCI_IOV
+module_param_named(max_vfs, xe_modparam.max_vfs, uint, 0400);
+MODULE_PARM_DESC(max_vfs,
+		 "Limit number of Virtual Functions (VFs) that could be managed. "
+		 "(0 = no VFs [default]; N = allow up to N VFs)");
+#endif
+
 struct init_funcs {
 	int (*init)(void);
 	void (*exit)(void);
diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h
index 88ef0e8b2b..b369984f08 100644
--- a/drivers/gpu/drm/xe/xe_module.h
+++ b/drivers/gpu/drm/xe/xe_module.h
@@ -18,6 +18,9 @@ struct xe_modparam {
 	char *huc_firmware_path;
 	char *gsc_firmware_path;
 	char *force_probe;
+#ifdef CONFIG_PCI_IOV
+	unsigned int max_vfs;
+#endif
 };
 
 extern struct xe_modparam xe_modparam;
diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
index 1ff6bc79e7..d5b516f115 100644
--- a/drivers/gpu/drm/xe/xe_pat.c
+++ b/drivers/gpu/drm/xe/xe_pat.c
@@ -13,6 +13,7 @@
 #include "xe_gt.h"
 #include "xe_gt_mcr.h"
 #include "xe_mmio.h"
+#include "xe_sriov.h"
 
 #define _PAT_ATS				0x47fc
 #define _PAT_INDEX(index)			_PICK_EVEN_2RANGES(index, 8, \
@@ -141,6 +142,7 @@ static const struct xe_pat_table_entry xe2_pat_table[] = {
 
 /* Special PAT values programmed outside the main table */
 static const struct xe_pat_table_entry xe2_pat_ats = XE2_PAT( 0, 0, 0, 0, 3, 3 );
+static const struct xe_pat_table_entry xe2_pat_pta = XE2_PAT( 0, 0, 0, 0, 3, 0 );
 
 u16 xe_pat_index_get_coh_mode(struct xe_device *xe, u16 pat_index)
 {
@@ -173,7 +175,6 @@ static void xelp_dump(struct xe_gt *gt, struct drm_printer *p)
 	struct xe_device *xe = gt_to_xe(gt);
 	int i, err;
 
-	xe_device_mem_access_get(xe);
 	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
 	if (err)
 		goto err_fw;
@@ -191,7 +192,6 @@ static void xelp_dump(struct xe_gt *gt, struct drm_printer *p)
 	err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
 err_fw:
 	xe_assert(xe, !err);
-	xe_device_mem_access_put(xe);
 }
 
 static const struct xe_pat_ops xelp_pat_ops = {
@@ -204,7 +204,6 @@ static void xehp_dump(struct xe_gt *gt, struct drm_printer *p)
 	struct xe_device *xe = gt_to_xe(gt);
 	int i, err;
 
-	xe_device_mem_access_get(xe);
 	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
 	if (err)
 		goto err_fw;
@@ -224,7 +223,6 @@ static void xehp_dump(struct xe_gt *gt, struct drm_printer *p)
 	err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
 err_fw:
 	xe_assert(xe, !err);
-	xe_device_mem_access_put(xe);
 }
 
 static const struct xe_pat_ops xehp_pat_ops = {
@@ -237,7 +235,6 @@ static void xehpc_dump(struct xe_gt *gt, struct drm_printer *p)
 	struct xe_device *xe = gt_to_xe(gt);
 	int i, err;
 
-	xe_device_mem_access_get(xe);
 	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
 	if (err)
 		goto err_fw;
@@ -255,7 +252,6 @@ static void xehpc_dump(struct xe_gt *gt, struct drm_printer *p)
 	err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
 err_fw:
 	xe_assert(xe, !err);
-	xe_device_mem_access_put(xe);
 }
 
 static const struct xe_pat_ops xehpc_pat_ops = {
@@ -268,7 +264,6 @@ static void xelpg_dump(struct xe_gt *gt, struct drm_printer *p)
 	struct xe_device *xe = gt_to_xe(gt);
 	int i, err;
 
-	xe_device_mem_access_get(xe);
 	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
 	if (err)
 		goto err_fw;
@@ -291,7 +286,6 @@ static void xelpg_dump(struct xe_gt *gt, struct drm_printer *p)
 	err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
 err_fw:
 	xe_assert(xe, !err);
-	xe_device_mem_access_put(xe);
 }
 
 /*
@@ -309,6 +303,9 @@ static void xe2lpg_program_pat(struct xe_gt *gt, const struct xe_pat_table_entry
 {
 	program_pat_mcr(gt, table, n_entries);
 	xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_ATS), xe2_pat_ats.value);
+
+	if (IS_DGFX(gt_to_xe(gt)))
+		xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_PTA), xe2_pat_pta.value);
 }
 
 static void xe2lpm_program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[],
@@ -316,6 +313,9 @@ static void xe2lpm_program_pat(struct xe_gt *gt, const struct xe_pat_table_entry
 {
 	program_pat(gt, table, n_entries);
 	xe_mmio_write32(gt, XE_REG(_PAT_ATS), xe2_pat_ats.value);
+
+	if (IS_DGFX(gt_to_xe(gt)))
+		xe_mmio_write32(gt, XE_REG(_PAT_PTA), xe2_pat_pta.value);
 }
 
 static void xe2_dump(struct xe_gt *gt, struct drm_printer *p)
@@ -324,7 +324,6 @@ static void xe2_dump(struct xe_gt *gt, struct drm_printer *p)
 	int i, err;
 	u32 pat;
 
-	xe_device_mem_access_get(xe);
 	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
 	if (err)
 		goto err_fw;
@@ -369,7 +368,6 @@ static void xe2_dump(struct xe_gt *gt, struct drm_printer *p)
 	err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
 err_fw:
 	xe_assert(xe, !err);
-	xe_device_mem_access_put(xe);
 }
 
 static const struct xe_pat_ops xe2_pat_ops = {
@@ -433,6 +431,14 @@ void xe_pat_init_early(struct xe_device *xe)
 		drm_err(&xe->drm, "Missing PAT table for platform with graphics version %d.%02d!\n",
 			GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100);
 	}
+
+	/* VFs can't program nor dump PAT settings */
+	if (IS_SRIOV_VF(xe))
+		xe->pat.ops = NULL;
+
+	xe_assert(xe, !xe->pat.ops || xe->pat.ops->dump);
+	xe_assert(xe, !xe->pat.ops || xe->pat.ops->program_graphics);
+	xe_assert(xe, !xe->pat.ops || MEDIA_VER(xe) < 13 || xe->pat.ops->program_media);
 }
 
 void xe_pat_init(struct xe_gt *gt)
@@ -452,7 +458,7 @@ void xe_pat_dump(struct xe_gt *gt, struct drm_printer *p)
 {
 	struct xe_device *xe = gt_to_xe(gt);
 
-	if (!xe->pat.ops->dump)
+	if (!xe->pat.ops)
 		return;
 
 	xe->pat.ops->dump(gt, p);
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index dcc5ded155..f326dbb1ce 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -15,9 +15,9 @@
 #include <drm/drm_drv.h>
 #include <drm/xe_pciids.h>
 
+#include "display/xe_display.h"
 #include "regs/xe_gt_regs.h"
 #include "xe_device.h"
-#include "xe_display.h"
 #include "xe_drv.h"
 #include "xe_gt.h"
 #include "xe_macros.h"
@@ -165,7 +165,7 @@ static const struct xe_graphics_desc graphics_xelpg = {
 	.has_asid = 1, \
 	.has_flat_ccs = 1, \
 	.has_range_tlb_invalidation = 1, \
-	.has_usm = 0 /* FIXME: implementation missing */, \
+	.has_usm = 1, \
 	.va_bits = 48, \
 	.vm_max_level = 4, \
 	.hw_engine_mask = \
@@ -174,7 +174,7 @@ static const struct xe_graphics_desc graphics_xelpg = {
 		GENMASK(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0)
 
 static const struct xe_graphics_desc graphics_xe2 = {
-	.name = "Xe2_LPG",
+	.name = "Xe2_LPG / Xe2_HPG",
 
 	XE2_GFX_FEATURES,
 };
@@ -185,8 +185,8 @@ static const struct xe_media_desc media_xem = {
 	.rel = 0,
 
 	.hw_engine_mask =
-		BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) |
-		BIT(XE_HW_ENGINE_VECS0),
+		GENMASK(XE_HW_ENGINE_VCS7, XE_HW_ENGINE_VCS0) |
+		GENMASK(XE_HW_ENGINE_VECS3, XE_HW_ENGINE_VECS0),
 };
 
 static const struct xe_media_desc media_xehpm = {
@@ -195,21 +195,23 @@ static const struct xe_media_desc media_xehpm = {
 	.rel = 55,
 
 	.hw_engine_mask =
-		BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) |
-		BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VECS1),
+		GENMASK(XE_HW_ENGINE_VCS7, XE_HW_ENGINE_VCS0) |
+		GENMASK(XE_HW_ENGINE_VECS3, XE_HW_ENGINE_VECS0),
 };
 
 static const struct xe_media_desc media_xelpmp = {
 	.name = "Xe_LPM+",
 	.hw_engine_mask =
-		BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) |
-		BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_GSCCS0)
+		GENMASK(XE_HW_ENGINE_VCS7, XE_HW_ENGINE_VCS0) |
+		GENMASK(XE_HW_ENGINE_VECS3, XE_HW_ENGINE_VECS0) |
+		BIT(XE_HW_ENGINE_GSCCS0)
 };
 
 static const struct xe_media_desc media_xe2 = {
-	.name = "Xe2_LPM",
+	.name = "Xe2_LPM / Xe2_HPM",
 	.hw_engine_mask =
-		BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VECS0), /* TODO: GSC0 */
+		GENMASK(XE_HW_ENGINE_VCS7, XE_HW_ENGINE_VCS0) |
+		GENMASK(XE_HW_ENGINE_VECS3, XE_HW_ENGINE_VECS0), /* TODO: GSC0 */
 };
 
 static const struct xe_device_desc tgl_desc = {
@@ -333,6 +335,13 @@ static const struct xe_device_desc mtl_desc = {
 
 static const struct xe_device_desc lnl_desc = {
 	PLATFORM(XE_LUNARLAKE),
+	.has_display = true,
+	.require_force_probe = true,
+};
+
+static const struct xe_device_desc bmg_desc __maybe_unused = {
+	DGFX_FEATURES,
+	PLATFORM(XE_BATTLEMAGE),
 	.require_force_probe = true,
 };
 
@@ -340,15 +349,18 @@ static const struct xe_device_desc lnl_desc = {
 __diag_pop();
 
 /* Map of GMD_ID values to graphics IP */
-static struct gmdid_map graphics_ip_map[] = {
+static const struct gmdid_map graphics_ip_map[] = {
 	{ 1270, &graphics_xelpg },
 	{ 1271, &graphics_xelpg },
+	{ 1274, &graphics_xelpg },	/* Xe_LPG+ */
+	{ 2001, &graphics_xe2 },
 	{ 2004, &graphics_xe2 },
 };
 
 /* Map of GMD_ID values to media IP */
-static struct gmdid_map media_ip_map[] = {
+static const struct gmdid_map media_ip_map[] = {
 	{ 1300, &media_xelpmp },
+	{ 1301, &media_xe2 },
 	{ 2000, &media_xe2 },
 };
 
@@ -737,8 +749,6 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (err)
 		return err;
 
-	xe_sriov_probe_early(xe, desc->has_sriov);
-
 	err = xe_device_probe_early(xe);
 	if (err)
 		return err;
@@ -774,16 +784,26 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		str_yes_no(xe_device_has_sriov(xe)),
 		xe_sriov_mode_to_string(xe_device_sriov_mode(xe)));
 
+	err = xe_pm_init_early(xe);
+	if (err)
+		return err;
+
 	err = xe_device_probe(xe);
 	if (err)
 		return err;
 
-	xe_pm_init(xe);
+	err = xe_pm_init(xe);
+	if (err)
+		goto err_driver_cleanup;
 
 	drm_dbg(&xe->drm, "d3cold: capable=%s\n",
 		str_yes_no(xe->d3cold.capable));
 
 	return 0;
+
+err_driver_cleanup:
+	xe_pci_remove(pdev);
+	return err;
 }
 
 static void xe_pci_shutdown(struct pci_dev *pdev)
diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c
index b324dc2a5d..a5e7da8cf9 100644
--- a/drivers/gpu/drm/xe/xe_pcode.c
+++ b/drivers/gpu/drm/xe/xe_pcode.c
@@ -10,6 +10,7 @@
 
 #include <drm/drm_managed.h>
 
+#include "xe_device.h"
 #include "xe_gt.h"
 #include "xe_mmio.h"
 #include "xe_pcode_api.h"
@@ -43,8 +44,6 @@ static int pcode_mailbox_status(struct xe_gt *gt)
 		[PCODE_ERROR_MASK] = {-EPROTO, "Unknown"},
 	};
 
-	lockdep_assert_held(&gt->pcode.lock);
-
 	err = xe_mmio_read32(gt, PCODE_MAILBOX) & PCODE_ERROR_MASK;
 	if (err) {
 		drm_err(&gt_to_xe(gt)->drm, "PCODE Mailbox failed: %d %s", err,
@@ -55,17 +54,15 @@ static int pcode_mailbox_status(struct xe_gt *gt)
 	return 0;
 }
 
-static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1,
-			    unsigned int timeout_ms, bool return_data,
-			    bool atomic)
+static int __pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1,
+			      unsigned int timeout_ms, bool return_data,
+			      bool atomic)
 {
 	int err;
 
 	if (gt_to_xe(gt)->info.skip_pcode)
 		return 0;
 
-	lockdep_assert_held(&gt->pcode.lock);
-
 	if ((xe_mmio_read32(gt, PCODE_MAILBOX) & PCODE_READY) != 0)
 		return -EAGAIN;
 
@@ -74,7 +71,7 @@ static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1,
 	xe_mmio_write32(gt, PCODE_MAILBOX, PCODE_READY | mbox);
 
 	err = xe_mmio_wait32(gt, PCODE_MAILBOX, PCODE_READY, 0,
-			     timeout_ms * 1000, NULL, atomic);
+			     timeout_ms * USEC_PER_MSEC, NULL, atomic);
 	if (err)
 		return err;
 
@@ -87,6 +84,18 @@ static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1,
 	return pcode_mailbox_status(gt);
 }
 
+static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1,
+			    unsigned int timeout_ms, bool return_data,
+			    bool atomic)
+{
+	if (gt_to_xe(gt)->info.skip_pcode)
+		return 0;
+
+	lockdep_assert_held(&gt->pcode.lock);
+
+	return __pcode_mailbox_rw(gt, mbox, data0, data1, timeout_ms, return_data, atomic);
+}
+
 int xe_pcode_write_timeout(struct xe_gt *gt, u32 mbox, u32 data, int timeout)
 {
 	int err;
@@ -109,15 +118,19 @@ int xe_pcode_read(struct xe_gt *gt, u32 mbox, u32 *val, u32 *val1)
 	return err;
 }
 
-static int xe_pcode_try_request(struct xe_gt *gt, u32 mbox,
-				u32 request, u32 reply_mask, u32 reply,
-				u32 *status, bool atomic, int timeout_us)
+static int pcode_try_request(struct xe_gt *gt, u32 mbox,
+			     u32 request, u32 reply_mask, u32 reply,
+			     u32 *status, bool atomic, int timeout_us, bool locked)
 {
 	int slept, wait = 10;
 
 	for (slept = 0; slept < timeout_us; slept += wait) {
-		*status = pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true,
-					   atomic);
+		if (locked)
+			*status = pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true,
+						   atomic);
+		else
+			*status = __pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true,
+						     atomic);
 		if ((*status == 0) && ((request & reply_mask) == reply))
 			return 0;
 
@@ -158,8 +171,8 @@ int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request,
 
 	mutex_lock(&gt->pcode.lock);
 
-	ret = xe_pcode_try_request(gt, mbox, request, reply_mask, reply, &status,
-				   false, timeout_base_ms * 1000);
+	ret = pcode_try_request(gt, mbox, request, reply_mask, reply, &status,
+				false, timeout_base_ms * 1000, true);
 	if (!ret)
 		goto out;
 
@@ -177,8 +190,8 @@ int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request,
 		"PCODE timeout, retrying with preemption disabled\n");
 	drm_WARN_ON_ONCE(&gt_to_xe(gt)->drm, timeout_base_ms > 1);
 	preempt_disable();
-	ret = xe_pcode_try_request(gt, mbox, request, reply_mask, reply, &status,
-				   true, timeout_base_ms * 1000);
+	ret = pcode_try_request(gt, mbox, request, reply_mask, reply, &status,
+				true, 50 * 1000, true);
 	preempt_enable();
 
 out:
@@ -238,59 +251,71 @@ unlock:
 }
 
 /**
- * xe_pcode_init - Ensure PCODE is initialized
- * @gt: gt instance
+ * xe_pcode_ready - Ensure PCODE is initialized
+ * @xe: xe instance
+ * @locked: true if lock held, false otherwise
  *
- * This function ensures that PCODE is properly initialized. To be called during
- * probe and resume paths.
+ * PCODE init mailbox is polled only on root gt of root tile
+ * as the root tile provides the initialization is complete only
+ * after all the tiles have completed the initialization.
+ * Called only on early probe without locks and with locks in
+ * resume path.
  *
- * It returns 0 on success, and -error number on failure.
+ * Returns 0 on success, and -error number on failure.
  */
-int xe_pcode_init(struct xe_gt *gt)
+int xe_pcode_ready(struct xe_device *xe, bool locked)
 {
 	u32 status, request = DGFX_GET_INIT_STATUS;
+	struct xe_gt *gt = xe_root_mmio_gt(xe);
 	int timeout_us = 180000000; /* 3 min */
 	int ret;
 
-	if (gt_to_xe(gt)->info.skip_pcode)
+	if (xe->info.skip_pcode)
 		return 0;
 
-	if (!IS_DGFX(gt_to_xe(gt)))
+	if (!IS_DGFX(xe))
 		return 0;
 
-	mutex_lock(&gt->pcode.lock);
-	ret = xe_pcode_try_request(gt, DGFX_PCODE_STATUS, request,
-				   DGFX_INIT_STATUS_COMPLETE,
-				   DGFX_INIT_STATUS_COMPLETE,
-				   &status, false, timeout_us);
-	mutex_unlock(&gt->pcode.lock);
+	if (locked)
+		mutex_lock(&gt->pcode.lock);
+
+	ret = pcode_try_request(gt, DGFX_PCODE_STATUS, request,
+				DGFX_INIT_STATUS_COMPLETE,
+				DGFX_INIT_STATUS_COMPLETE,
+				&status, false, timeout_us, locked);
+
+	if (locked)
+		mutex_unlock(&gt->pcode.lock);
 
 	if (ret)
-		drm_err(&gt_to_xe(gt)->drm,
+		drm_err(&xe->drm,
 			"PCODE initialization timedout after: 3 min\n");
 
 	return ret;
 }
 
 /**
- * xe_pcode_probe - Prepare xe_pcode and also ensure PCODE is initialized.
+ * xe_pcode_init: initialize components of PCODE
  * @gt: gt instance
  *
- * This function initializes the xe_pcode component, and when needed, it ensures
- * that PCODE has properly performed its initialization and it is really ready
- * to go. To be called once only during probe.
- *
- * It returns 0 on success, and -error number on failure.
+ * This function initializes the xe_pcode component.
+ * To be called once only during probe.
  */
-int xe_pcode_probe(struct xe_gt *gt)
+void xe_pcode_init(struct xe_gt *gt)
 {
 	drmm_mutex_init(&gt_to_xe(gt)->drm, &gt->pcode.lock);
+}
 
-	if (gt_to_xe(gt)->info.skip_pcode)
-		return 0;
-
-	if (!IS_DGFX(gt_to_xe(gt)))
-		return 0;
-
-	return xe_pcode_init(gt);
+/**
+ * xe_pcode_probe_early: initializes PCODE
+ * @xe: xe instance
+ *
+ * This function checks the initialization status of PCODE
+ * To be called once only during early probe without locks.
+ *
+ * Returns 0 on success, error code otherwise
+ */
+int xe_pcode_probe_early(struct xe_device *xe)
+{
+	return xe_pcode_ready(xe, false);
 }
diff --git a/drivers/gpu/drm/xe/xe_pcode.h b/drivers/gpu/drm/xe/xe_pcode.h
index 08cb1d047c..3f54c6d2a5 100644
--- a/drivers/gpu/drm/xe/xe_pcode.h
+++ b/drivers/gpu/drm/xe/xe_pcode.h
@@ -8,9 +8,11 @@
 
 #include <linux/types.h>
 struct xe_gt;
+struct xe_device;
 
-int xe_pcode_probe(struct xe_gt *gt);
-int xe_pcode_init(struct xe_gt *gt);
+void xe_pcode_init(struct xe_gt *gt);
+int xe_pcode_probe_early(struct xe_device *xe);
+int xe_pcode_ready(struct xe_device *xe, bool locked);
 int xe_pcode_init_min_freq_table(struct xe_gt *gt, u32 min_gt_freq,
 				 u32 max_gt_freq);
 int xe_pcode_read(struct xe_gt *gt, u32 mbox, u32 *val, u32 *val1);
diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h
index 5935cfe302..f153ce96f6 100644
--- a/drivers/gpu/drm/xe/xe_pcode_api.h
+++ b/drivers/gpu/drm/xe/xe_pcode_api.h
@@ -42,6 +42,13 @@
 #define	    POWER_SETUP_I1_SHIFT		6	/* 10.6 fixed point format */
 #define	    POWER_SETUP_I1_DATA_MASK		REG_GENMASK(15, 0)
 
+#define   PCODE_FREQUENCY_CONFIG		0x6e
+/* Frequency Config Sub Commands (param1) */
+#define     PCODE_MBOX_FC_SC_READ_FUSED_P0	0x0
+#define     PCODE_MBOX_FC_SC_READ_FUSED_PN	0x1
+/* Domain IDs (param2) */
+#define     PCODE_MBOX_DOMAIN_HBM		0x2
+
 struct pcode_err_decode {
 	int errno;
 	const char *str;
diff --git a/drivers/gpu/drm/xe/xe_platform_types.h b/drivers/gpu/drm/xe/xe_platform_types.h
index 553f53dbd0..79b7042c45 100644
--- a/drivers/gpu/drm/xe/xe_platform_types.h
+++ b/drivers/gpu/drm/xe/xe_platform_types.h
@@ -22,6 +22,7 @@ enum xe_platform {
 	XE_PVC,
 	XE_METEORLAKE,
 	XE_LUNARLAKE,
+	XE_BATTLEMAGE,
 };
 
 enum xe_subplatform {
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index b429c2876a..37fbeda12d 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -10,11 +10,11 @@
 #include <drm/drm_managed.h>
 #include <drm/ttm/ttm_placement.h>
 
+#include "display/xe_display.h"
 #include "xe_bo.h"
 #include "xe_bo_evict.h"
 #include "xe_device.h"
 #include "xe_device_sysfs.h"
-#include "xe_display.h"
 #include "xe_ggtt.h"
 #include "xe_gt.h"
 #include "xe_guc.h"
@@ -25,23 +25,55 @@
 /**
  * DOC: Xe Power Management
  *
- * Xe PM shall be guided by the simplicity.
- * Use the simplest hook options whenever possible.
- * Let's not reinvent the runtime_pm references and hooks.
- * Shall have a clear separation of display and gt underneath this component.
+ * Xe PM implements the main routines for both system level suspend states and
+ * for the opportunistic runtime suspend states.
  *
- * What's next:
+ * System Level Suspend (S-States) - In general this is OS initiated suspend
+ * driven by ACPI for achieving S0ix (a.k.a. S2idle, freeze), S3 (suspend to ram),
+ * S4 (disk). The main functions here are `xe_pm_suspend` and `xe_pm_resume`. They
+ * are the main point for the suspend to and resume from these states.
  *
- * For now s2idle and s3 are only working in integrated devices. The next step
- * is to iterate through all VRAM's BO backing them up into the system memory
- * before allowing the system suspend.
+ * PCI Device Suspend (D-States) - This is the opportunistic PCIe device low power
+ * state D3, controlled by the PCI subsystem and ACPI with the help from the
+ * runtime_pm infrastructure.
+ * PCI D3 is special and can mean D3hot, where Vcc power is on for keeping memory
+ * alive and quicker low latency resume or D3Cold where Vcc power is off for
+ * better power savings.
+ * The Vcc control of PCI hierarchy can only be controlled at the PCI root port
+ * level, while the device driver can be behind multiple bridges/switches and
+ * paired with other devices. For this reason, the PCI subsystem cannot perform
+ * the transition towards D3Cold. The lowest runtime PM possible from the PCI
+ * subsystem is D3hot. Then, if all these paired devices in the same root port
+ * are in D3hot, ACPI will assist here and run its own methods (_PR3 and _OFF)
+ * to perform the transition from D3hot to D3cold. Xe may disallow this
+ * transition by calling pci_d3cold_disable(root_pdev) before going to runtime
+ * suspend. It will be based on runtime conditions such as VRAM usage for a
+ * quick and low latency resume for instance.
  *
- * Also runtime_pm needs to be here from the beginning.
+ * Runtime PM - This infrastructure provided by the Linux kernel allows the
+ * device drivers to indicate when the can be runtime suspended, so the device
+ * could be put at D3 (if supported), or allow deeper package sleep states
+ * (PC-states), and/or other low level power states. Xe PM component provides
+ * `xe_pm_runtime_suspend` and `xe_pm_runtime_resume` functions that PCI
+ * subsystem will call before transition to/from runtime suspend.
  *
- * RC6/RPS are also critical PM features. Let's start with GuCRC and GuC SLPC
- * and no wait boost. Frequency optimizations should come on a next stage.
+ * Also, Xe PM provides get and put functions that Xe driver will use to
+ * indicate activity. In order to avoid locking complications with the memory
+ * management, whenever possible, these get and put functions needs to be called
+ * from the higher/outer levels.
+ * The main cases that need to be protected from the outer levels are: IOCTL,
+ * sysfs, debugfs, dma-buf sharing, GPU execution.
+ *
+ * This component is not responsible for GT idleness (RC6) nor GT frequency
+ * management (RPS).
  */
 
+#ifdef CONFIG_LOCKDEP
+struct lockdep_map xe_pm_runtime_lockdep_map = {
+	.name = "xe_pm_runtime_lockdep_map"
+};
+#endif
+
 /**
  * xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle
  * @xe: xe device instance
@@ -54,13 +86,15 @@ int xe_pm_suspend(struct xe_device *xe)
 	u8 id;
 	int err;
 
+	drm_dbg(&xe->drm, "Suspending device\n");
+
 	for_each_gt(gt, xe, id)
 		xe_gt_suspend_prepare(gt);
 
 	/* FIXME: Super racey... */
 	err = xe_bo_evict_all(xe);
 	if (err)
-		return err;
+		goto err;
 
 	xe_display_pm_suspend(xe);
 
@@ -68,7 +102,7 @@ int xe_pm_suspend(struct xe_device *xe)
 		err = xe_gt_suspend(gt);
 		if (err) {
 			xe_display_pm_resume(xe);
-			return err;
+			goto err;
 		}
 	}
 
@@ -76,7 +110,11 @@ int xe_pm_suspend(struct xe_device *xe)
 
 	xe_display_pm_suspend_late(xe);
 
+	drm_dbg(&xe->drm, "Device suspended\n");
 	return 0;
+err:
+	drm_dbg(&xe->drm, "Device suspend failed %d\n", err);
+	return err;
 }
 
 /**
@@ -92,14 +130,14 @@ int xe_pm_resume(struct xe_device *xe)
 	u8 id;
 	int err;
 
+	drm_dbg(&xe->drm, "Resuming device\n");
+
 	for_each_tile(tile, xe, id)
 		xe_wa_apply_tile_workarounds(tile);
 
-	for_each_gt(gt, xe, id) {
-		err = xe_pcode_init(gt);
-		if (err)
-			return err;
-	}
+	err = xe_pcode_ready(xe, true);
+	if (err)
+		return err;
 
 	xe_display_pm_resume_early(xe);
 
@@ -109,7 +147,7 @@ int xe_pm_resume(struct xe_device *xe)
 	 */
 	err = xe_bo_restore_kernel(xe);
 	if (err)
-		return err;
+		goto err;
 
 	xe_irq_resume(xe);
 
@@ -120,22 +158,35 @@ int xe_pm_resume(struct xe_device *xe)
 
 	err = xe_bo_restore_user(xe);
 	if (err)
-		return err;
+		goto err;
 
+	drm_dbg(&xe->drm, "Device resumed\n");
 	return 0;
+err:
+	drm_dbg(&xe->drm, "Device resume failed %d\n", err);
+	return err;
 }
 
-static bool xe_pm_pci_d3cold_capable(struct pci_dev *pdev)
+static bool xe_pm_pci_d3cold_capable(struct xe_device *xe)
 {
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
 	struct pci_dev *root_pdev;
 
 	root_pdev = pcie_find_root_port(pdev);
 	if (!root_pdev)
 		return false;
 
-	/* D3Cold requires PME capability and _PR3 power resource */
-	if (!pci_pme_capable(root_pdev, PCI_D3cold) || !pci_pr3_present(root_pdev))
+	/* D3Cold requires PME capability */
+	if (!pci_pme_capable(root_pdev, PCI_D3cold)) {
+		drm_dbg(&xe->drm, "d3cold: PME# not supported\n");
+		return false;
+	}
+
+	/* D3Cold requires _PR3 power resource */
+	if (!pci_pr3_present(root_pdev)) {
+		drm_dbg(&xe->drm, "d3cold: ACPI _PR3 not present\n");
 		return false;
+	}
 
 	return true;
 }
@@ -163,26 +214,60 @@ static void xe_pm_runtime_init(struct xe_device *xe)
 	pm_runtime_put(dev);
 }
 
-void xe_pm_init(struct xe_device *xe)
+int xe_pm_init_early(struct xe_device *xe)
 {
-	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+	int err;
+
+	INIT_LIST_HEAD(&xe->mem_access.vram_userfault.list);
+
+	err = drmm_mutex_init(&xe->drm, &xe->mem_access.vram_userfault.lock);
+	if (err)
+		return err;
+
+	err = drmm_mutex_init(&xe->drm, &xe->d3cold.lock);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+/**
+ * xe_pm_init - Initialize Xe Power Management
+ * @xe: xe device instance
+ *
+ * This component is responsible for System and Device sleep states.
+ *
+ * Returns 0 for success, negative error code otherwise.
+ */
+int xe_pm_init(struct xe_device *xe)
+{
+	int err;
 
 	/* For now suspend/resume is only allowed with GuC */
 	if (!xe_device_uc_enabled(xe))
-		return;
-
-	drmm_mutex_init(&xe->drm, &xe->d3cold.lock);
+		return 0;
 
-	xe->d3cold.capable = xe_pm_pci_d3cold_capable(pdev);
+	xe->d3cold.capable = xe_pm_pci_d3cold_capable(xe);
 
 	if (xe->d3cold.capable) {
-		xe_device_sysfs_init(xe);
-		xe_pm_set_vram_threshold(xe, DEFAULT_VRAM_THRESHOLD);
+		err = xe_device_sysfs_init(xe);
+		if (err)
+			return err;
+
+		err = xe_pm_set_vram_threshold(xe, DEFAULT_VRAM_THRESHOLD);
+		if (err)
+			return err;
 	}
 
 	xe_pm_runtime_init(xe);
+
+	return 0;
 }
 
+/**
+ * xe_pm_runtime_fini - Finalize Runtime PM
+ * @xe: xe device instance
+ */
 void xe_pm_runtime_fini(struct xe_device *xe)
 {
 	struct device *dev = xe->drm.dev;
@@ -212,24 +297,44 @@ struct task_struct *xe_pm_read_callback_task(struct xe_device *xe)
 	return READ_ONCE(xe->pm_callback_task);
 }
 
+/**
+ * xe_pm_runtime_suspended - Check if runtime_pm state is suspended
+ * @xe: xe device instance
+ *
+ * This does not provide any guarantee that the device is going to remain
+ * suspended as it might be racing with the runtime state transitions.
+ * It can be used only as a non-reliable assertion, to ensure that we are not in
+ * the sleep state while trying to access some memory for instance.
+ *
+ * Returns true if PCI device is suspended, false otherwise.
+ */
+bool xe_pm_runtime_suspended(struct xe_device *xe)
+{
+	return pm_runtime_suspended(xe->drm.dev);
+}
+
+/**
+ * xe_pm_runtime_suspend - Prepare our device for D3hot/D3Cold
+ * @xe: xe device instance
+ *
+ * Returns 0 for success, negative error code otherwise.
+ */
 int xe_pm_runtime_suspend(struct xe_device *xe)
 {
+	struct xe_bo *bo, *on;
 	struct xe_gt *gt;
 	u8 id;
 	int err = 0;
 
-	if (xe->d3cold.allowed && xe_device_mem_access_ongoing(xe))
-		return -EBUSY;
-
 	/* Disable access_ongoing asserts and prevent recursive pm calls */
 	xe_pm_write_callback_task(xe, current);
 
 	/*
-	 * The actual xe_device_mem_access_put() is always async underneath, so
+	 * The actual xe_pm_runtime_put() is always async underneath, so
 	 * exactly where that is called should makes no difference to us. However
 	 * we still need to be very careful with the locks that this callback
 	 * acquires and the locks that are acquired and held by any callers of
-	 * xe_device_mem_access_get(). We already have the matching annotation
+	 * xe_runtime_pm_get(). We already have the matching annotation
 	 * on that side, but we also need it here. For example lockdep should be
 	 * able to tell us if the following scenario is in theory possible:
 	 *
@@ -237,15 +342,25 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
 	 * lock(A)                       |
 	 *                               | xe_pm_runtime_suspend()
 	 *                               |      lock(A)
-	 * xe_device_mem_access_get()    |
+	 * xe_pm_runtime_get()           |
 	 *
 	 * This will clearly deadlock since rpm core needs to wait for
 	 * xe_pm_runtime_suspend() to complete, but here we are holding lock(A)
 	 * on CPU0 which prevents CPU1 making forward progress.  With the
-	 * annotation here and in xe_device_mem_access_get() lockdep will see
+	 * annotation here and in xe_pm_runtime_get() lockdep will see
 	 * the potential lock inversion and give us a nice splat.
 	 */
-	lock_map_acquire(&xe_device_mem_access_lockdep_map);
+	lock_map_acquire(&xe_pm_runtime_lockdep_map);
+
+	/*
+	 * Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify
+	 * also checks and delets bo entry from user fault list.
+	 */
+	mutex_lock(&xe->mem_access.vram_userfault.lock);
+	list_for_each_entry_safe(bo, on,
+				 &xe->mem_access.vram_userfault.list, vram_userfault_link)
+		xe_bo_runtime_pm_release_mmap_offset(bo);
+	mutex_unlock(&xe->mem_access.vram_userfault.lock);
 
 	if (xe->d3cold.allowed) {
 		err = xe_bo_evict_all(xe);
@@ -261,11 +376,17 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
 
 	xe_irq_suspend(xe);
 out:
-	lock_map_release(&xe_device_mem_access_lockdep_map);
+	lock_map_release(&xe_pm_runtime_lockdep_map);
 	xe_pm_write_callback_task(xe, NULL);
 	return err;
 }
 
+/**
+ * xe_pm_runtime_resume - Waking up from D3hot/D3Cold
+ * @xe: xe device instance
+ *
+ * Returns 0 for success, negative error code otherwise.
+ */
 int xe_pm_runtime_resume(struct xe_device *xe)
 {
 	struct xe_gt *gt;
@@ -275,7 +396,7 @@ int xe_pm_runtime_resume(struct xe_device *xe)
 	/* Disable access_ongoing asserts and prevent recursive pm calls */
 	xe_pm_write_callback_task(xe, current);
 
-	lock_map_acquire(&xe_device_mem_access_lockdep_map);
+	lock_map_acquire(&xe_pm_runtime_lockdep_map);
 
 	/*
 	 * It can be possible that xe has allowed d3cold but other pcie devices
@@ -286,11 +407,9 @@ int xe_pm_runtime_resume(struct xe_device *xe)
 	xe->d3cold.power_lost = xe_guc_in_reset(&gt->uc.guc);
 
 	if (xe->d3cold.allowed && xe->d3cold.power_lost) {
-		for_each_gt(gt, xe, id) {
-			err = xe_pcode_init(gt);
-			if (err)
-				goto out;
-		}
+		err = xe_pcode_ready(xe, true);
+		if (err)
+			goto out;
 
 		/*
 		 * This only restores pinned memory which is the memory
@@ -312,27 +431,147 @@ int xe_pm_runtime_resume(struct xe_device *xe)
 			goto out;
 	}
 out:
-	lock_map_release(&xe_device_mem_access_lockdep_map);
+	lock_map_release(&xe_pm_runtime_lockdep_map);
 	xe_pm_write_callback_task(xe, NULL);
 	return err;
 }
 
-int xe_pm_runtime_get(struct xe_device *xe)
+/*
+ * For places where resume is synchronous it can be quite easy to deadlock
+ * if we are not careful. Also in practice it might be quite timing
+ * sensitive to ever see the 0 -> 1 transition with the callers locks
+ * held, so deadlocks might exist but are hard for lockdep to ever see.
+ * With this in mind, help lockdep learn about the potentially scary
+ * stuff that can happen inside the runtime_resume callback by acquiring
+ * a dummy lock (it doesn't protect anything and gets compiled out on
+ * non-debug builds).  Lockdep then only needs to see the
+ * xe_pm_runtime_lockdep_map -> runtime_resume callback once, and then can
+ * hopefully validate all the (callers_locks) -> xe_pm_runtime_lockdep_map.
+ * For example if the (callers_locks) are ever grabbed in the
+ * runtime_resume callback, lockdep should give us a nice splat.
+ */
+static void pm_runtime_lockdep_prime(void)
 {
-	return pm_runtime_get_sync(xe->drm.dev);
+	lock_map_acquire(&xe_pm_runtime_lockdep_map);
+	lock_map_release(&xe_pm_runtime_lockdep_map);
+}
+
+/**
+ * xe_pm_runtime_get - Get a runtime_pm reference and resume synchronously
+ * @xe: xe device instance
+ */
+void xe_pm_runtime_get(struct xe_device *xe)
+{
+	pm_runtime_get_noresume(xe->drm.dev);
+
+	if (xe_pm_read_callback_task(xe) == current)
+		return;
+
+	pm_runtime_lockdep_prime();
+	pm_runtime_resume(xe->drm.dev);
+}
+
+/**
+ * xe_pm_runtime_put - Put the runtime_pm reference back and mark as idle
+ * @xe: xe device instance
+ */
+void xe_pm_runtime_put(struct xe_device *xe)
+{
+	if (xe_pm_read_callback_task(xe) == current) {
+		pm_runtime_put_noidle(xe->drm.dev);
+	} else {
+		pm_runtime_mark_last_busy(xe->drm.dev);
+		pm_runtime_put(xe->drm.dev);
+	}
 }
 
-int xe_pm_runtime_put(struct xe_device *xe)
+/**
+ * xe_pm_runtime_get_ioctl - Get a runtime_pm reference before ioctl
+ * @xe: xe device instance
+ *
+ * Returns: Any number greater than or equal to 0 for success, negative error
+ * code otherwise.
+ */
+int xe_pm_runtime_get_ioctl(struct xe_device *xe)
 {
-	pm_runtime_mark_last_busy(xe->drm.dev);
-	return pm_runtime_put(xe->drm.dev);
+	if (WARN_ON(xe_pm_read_callback_task(xe) == current))
+		return -ELOOP;
+
+	pm_runtime_lockdep_prime();
+	return pm_runtime_get_sync(xe->drm.dev);
 }
 
+/**
+ * xe_pm_runtime_get_if_active - Get a runtime_pm reference if device active
+ * @xe: xe device instance
+ *
+ * Returns: Any number greater than or equal to 0 for success, negative error
+ * code otherwise.
+ */
 int xe_pm_runtime_get_if_active(struct xe_device *xe)
 {
-	return pm_runtime_get_if_active(xe->drm.dev, true);
+	return pm_runtime_get_if_active(xe->drm.dev);
 }
 
+/**
+ * xe_pm_runtime_get_if_in_use - Get a runtime_pm reference and resume if needed
+ * @xe: xe device instance
+ *
+ * Returns: True if device is awake and the reference was taken, false otherwise.
+ */
+bool xe_pm_runtime_get_if_in_use(struct xe_device *xe)
+{
+	if (xe_pm_read_callback_task(xe) == current) {
+		/* The device is awake, grab the ref and move on */
+		pm_runtime_get_noresume(xe->drm.dev);
+		return true;
+	}
+
+	return pm_runtime_get_if_in_use(xe->drm.dev) > 0;
+}
+
+/**
+ * xe_pm_runtime_get_noresume - Bump runtime PM usage counter without resuming
+ * @xe: xe device instance
+ *
+ * This function should be used in inner places where it is surely already
+ * protected by outer-bound callers of `xe_pm_runtime_get`.
+ * It will warn if not protected.
+ * The reference should be put back after this function regardless, since it
+ * will always bump the usage counter, regardless.
+ */
+void xe_pm_runtime_get_noresume(struct xe_device *xe)
+{
+	bool ref;
+
+	ref = xe_pm_runtime_get_if_in_use(xe);
+
+	if (drm_WARN(&xe->drm, !ref, "Missing outer runtime PM protection\n"))
+		pm_runtime_get_noresume(xe->drm.dev);
+}
+
+/**
+ * xe_pm_runtime_resume_and_get - Resume, then get a runtime_pm ref if awake.
+ * @xe: xe device instance
+ *
+ * Returns: True if device is awake and the reference was taken, false otherwise.
+ */
+bool xe_pm_runtime_resume_and_get(struct xe_device *xe)
+{
+	if (xe_pm_read_callback_task(xe) == current) {
+		/* The device is awake, grab the ref and move on */
+		pm_runtime_get_noresume(xe->drm.dev);
+		return true;
+	}
+
+	pm_runtime_lockdep_prime();
+	return pm_runtime_resume_and_get(xe->drm.dev) >= 0;
+}
+
+/**
+ * xe_pm_assert_unbounded_bridge - Disable PM on unbounded pcie parent bridge
+ * @xe: xe device instance
+ */
 void xe_pm_assert_unbounded_bridge(struct xe_device *xe)
 {
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
@@ -347,6 +586,13 @@ void xe_pm_assert_unbounded_bridge(struct xe_device *xe)
 	}
 }
 
+/**
+ * xe_pm_set_vram_threshold - Set a vram threshold for allowing/blocking D3Cold
+ * @xe: xe device instance
+ * @threshold: VRAM size in bites for the D3cold threshold
+ *
+ * Returns 0 for success, negative error code otherwise.
+ */
 int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold)
 {
 	struct ttm_resource_manager *man;
@@ -371,6 +617,13 @@ int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold)
 	return 0;
 }
 
+/**
+ * xe_pm_d3cold_allowed_toggle - Check conditions to toggle d3cold.allowed
+ * @xe: xe device instance
+ *
+ * To be called during runtime_pm idle callback.
+ * Check for all the D3Cold conditions ahead of runtime suspend.
+ */
 void xe_pm_d3cold_allowed_toggle(struct xe_device *xe)
 {
 	struct ttm_resource_manager *man;
diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h
index 6b9031f7af..18b0613fe5 100644
--- a/drivers/gpu/drm/xe/xe_pm.h
+++ b/drivers/gpu/drm/xe/xe_pm.h
@@ -20,13 +20,19 @@ struct xe_device;
 int xe_pm_suspend(struct xe_device *xe);
 int xe_pm_resume(struct xe_device *xe);
 
-void xe_pm_init(struct xe_device *xe);
+int xe_pm_init_early(struct xe_device *xe);
+int xe_pm_init(struct xe_device *xe);
 void xe_pm_runtime_fini(struct xe_device *xe);
+bool xe_pm_runtime_suspended(struct xe_device *xe);
 int xe_pm_runtime_suspend(struct xe_device *xe);
 int xe_pm_runtime_resume(struct xe_device *xe);
-int xe_pm_runtime_get(struct xe_device *xe);
-int xe_pm_runtime_put(struct xe_device *xe);
+void xe_pm_runtime_get(struct xe_device *xe);
+int xe_pm_runtime_get_ioctl(struct xe_device *xe);
+void xe_pm_runtime_put(struct xe_device *xe);
 int xe_pm_runtime_get_if_active(struct xe_device *xe);
+bool xe_pm_runtime_get_if_in_use(struct xe_device *xe);
+void xe_pm_runtime_get_noresume(struct xe_device *xe);
+bool xe_pm_runtime_resume_and_get(struct xe_device *xe);
 void xe_pm_assert_unbounded_bridge(struct xe_device *xe);
 int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold);
 void xe_pm_d3cold_allowed_toggle(struct xe_device *xe);
diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c
index 7bce2a3326..5b243b7feb 100644
--- a/drivers/gpu/drm/xe/xe_preempt_fence.c
+++ b/drivers/gpu/drm/xe/xe_preempt_fence.c
@@ -23,11 +23,19 @@ static void preempt_fence_work_func(struct work_struct *w)
 		q->ops->suspend_wait(q);
 
 	dma_fence_signal(&pfence->base);
-	dma_fence_end_signalling(cookie);
-
+	/*
+	 * Opt for keep everything in the fence critical section. This looks really strange since we
+	 * have just signalled the fence, however the preempt fences are all signalled via single
+	 * global ordered-wq, therefore anything that happens in this callback can easily block
+	 * progress on the entire wq, which itself may prevent other published preempt fences from
+	 * ever signalling.  Therefore try to keep everything here in the callback in the fence
+	 * critical section. For example if something below grabs a scary lock like vm->lock,
+	 * lockdep should complain since we also hold that lock whilst waiting on preempt fences to
+	 * complete.
+	 */
 	xe_vm_queue_rebind_worker(q->vm);
-
 	xe_exec_queue_put(q);
+	dma_fence_end_signalling(cookie);
 }
 
 static const char *
@@ -49,7 +57,7 @@ static bool preempt_fence_enable_signaling(struct dma_fence *fence)
 	struct xe_exec_queue *q = pfence->q;
 
 	pfence->error = q->ops->suspend(q);
-	queue_work(system_unbound_wq, &pfence->preempt_work);
+	queue_work(q->vm->xe->preempt_fence_wq, &pfence->preempt_work);
 	return true;
 }
 
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index c1a833b1cb..5b7930f46c 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -5,6 +5,7 @@
 
 #include "xe_pt.h"
 
+#include "regs/xe_gtt_defs.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_drm_client.h"
@@ -108,11 +109,11 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
 	pt->level = level;
 	bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K,
 				  ttm_bo_type_kernel,
-				  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
-				  XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT |
-				  XE_BO_CREATE_PINNED_BIT |
-				  XE_BO_CREATE_NO_RESV_EVICT |
-				  XE_BO_PAGETABLE);
+				  XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+				  XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE |
+				  XE_BO_FLAG_PINNED |
+				  XE_BO_FLAG_NO_RESV_EVICT |
+				  XE_BO_FLAG_PAGETABLE);
 	if (IS_ERR(bo)) {
 		err = PTR_ERR(bo);
 		goto err_kfree;
@@ -618,7 +619,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
 	struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id];
 	int ret;
 
-	if (vma && (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) &&
+	if ((vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) &&
 	    (is_devmem || !IS_DGFX(xe)))
 		xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
 
@@ -877,8 +878,7 @@ static void xe_pt_commit_bind(struct xe_vma *vma,
 
 static int
 xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma,
-		   struct xe_vm_pgtable_update *entries, u32 *num_entries,
-		   bool rebind)
+		   struct xe_vm_pgtable_update *entries, u32 *num_entries)
 {
 	int err;
 
@@ -1136,8 +1136,7 @@ static int invalidation_fence_init(struct xe_gt *gt,
 	spin_lock_irq(&gt->tlb_invalidation.lock);
 	dma_fence_init(&ifence->base.base, &invalidation_fence_ops,
 		       &gt->tlb_invalidation.lock,
-		       gt->tlb_invalidation.fence_context,
-		       ++gt->tlb_invalidation.fence_seqno);
+		       dma_fence_context_alloc(1), 1);
 	spin_unlock_irq(&gt->tlb_invalidation.lock);
 
 	INIT_LIST_HEAD(&ifence->base.link);
@@ -1234,9 +1233,16 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
 	       "Preparing bind, with range [%llx...%llx) engine %p.\n",
 	       xe_vma_start(vma), xe_vma_end(vma), q);
 
-	err = xe_pt_prepare_bind(tile, vma, entries, &num_entries, rebind);
+	err = xe_pt_prepare_bind(tile, vma, entries, &num_entries);
 	if (err)
 		goto err;
+
+	err = dma_resv_reserve_fences(xe_vm_resv(vm), 1);
+	if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
+		err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1);
+	if (err)
+		goto err;
+
 	xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries));
 
 	xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries);
@@ -1579,6 +1585,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu
 	struct dma_fence *fence = NULL;
 	struct invalidation_fence *ifence;
 	struct xe_range_fence *rfence;
+	int err;
 
 	LLIST_HEAD(deferred);
 
@@ -1596,6 +1603,12 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu
 	xe_pt_calc_rfence_interval(vma, &unbind_pt_update, entries,
 				   num_entries);
 
+	err = dma_resv_reserve_fences(xe_vm_resv(vm), 1);
+	if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
+		err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1);
+	if (err)
+		return ERR_PTR(err);
+
 	ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
 	if (!ifence)
 		return ERR_PTR(-ENOMEM);
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 1a6f2f51ba..df407d73e5 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -12,6 +12,7 @@
 #include <drm/xe_drm.h>
 
 #include "regs/xe_engine_regs.h"
+#include "regs/xe_gt_regs.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_exec_queue.h"
@@ -147,8 +148,8 @@ query_engine_cycles(struct xe_device *xe,
 	if (!hwe)
 		return -EINVAL;
 
-	xe_device_mem_access_get(xe);
-	xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	if (xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL))
+		return -EIO;
 
 	__read_timestamps(gt,
 			  RING_TIMESTAMP(hwe->mmio_base),
@@ -159,7 +160,6 @@ query_engine_cycles(struct xe_device *xe,
 			  cpu_clock);
 
 	xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
-	xe_device_mem_access_put(xe);
 	resp.width = 36;
 
 	/* Only write to the output fields of user query */
@@ -198,7 +198,7 @@ static int query_engines(struct xe_device *xe,
 		return -EINVAL;
 	}
 
-	engines = kmalloc(size, GFP_KERNEL);
+	engines = kzalloc(size, GFP_KERNEL);
 	if (!engines)
 		return -ENOMEM;
 
@@ -212,14 +212,10 @@ static int query_engines(struct xe_device *xe,
 			engines->engines[i].instance.engine_instance =
 				hwe->logical_instance;
 			engines->engines[i].instance.gt_id = gt->info.id;
-			engines->engines[i].instance.pad = 0;
-			memset(engines->engines[i].reserved, 0,
-			       sizeof(engines->engines[i].reserved));
 
 			i++;
 		}
 
-	engines->pad = 0;
 	engines->num_engines = i;
 
 	if (copy_to_user(query_ptr, engines, size)) {
@@ -407,6 +403,13 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query
 				BIT(gt_to_tile(gt)->id) << 1;
 		gt_list->gt_list[id].far_mem_regions = xe->info.mem_region_mask ^
 			gt_list->gt_list[id].near_mem_regions;
+
+		gt_list->gt_list[id].ip_ver_major =
+			REG_FIELD_GET(GMD_ID_ARCH_MASK, gt->info.gmdid);
+		gt_list->gt_list[id].ip_ver_minor =
+			REG_FIELD_GET(GMD_ID_RELEASE_MASK, gt->info.gmdid);
+		gt_list->gt_list[id].ip_ver_rev =
+			REG_FIELD_GET(GMD_ID_REVID, gt->info.gmdid);
 	}
 
 	if (copy_to_user(query_ptr, gt_list, size)) {
@@ -437,9 +440,7 @@ static int query_hwconfig(struct xe_device *xe,
 	if (!hwconfig)
 		return -ENOMEM;
 
-	xe_device_mem_access_get(xe);
 	xe_guc_hwconfig_copy(&gt->uc.guc, hwconfig);
-	xe_device_mem_access_put(xe);
 
 	if (copy_to_user(query_ptr, hwconfig, size)) {
 		kfree(hwconfig);
@@ -520,6 +521,79 @@ static int query_gt_topology(struct xe_device *xe,
 	return 0;
 }
 
+static int
+query_uc_fw_version(struct xe_device *xe, struct drm_xe_device_query *query)
+{
+	struct drm_xe_query_uc_fw_version __user *query_ptr = u64_to_user_ptr(query->data);
+	size_t size = sizeof(struct drm_xe_query_uc_fw_version);
+	struct drm_xe_query_uc_fw_version resp;
+	struct xe_uc_fw_version *version = NULL;
+
+	if (query->size == 0) {
+		query->size = size;
+		return 0;
+	} else if (XE_IOCTL_DBG(xe, query->size != size)) {
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&resp, query_ptr, size))
+		return -EFAULT;
+
+	if (XE_IOCTL_DBG(xe, resp.pad || resp.pad2 || resp.reserved))
+		return -EINVAL;
+
+	switch (resp.uc_type) {
+	case XE_QUERY_UC_TYPE_GUC_SUBMISSION: {
+		struct xe_guc *guc = &xe->tiles[0].primary_gt->uc.guc;
+
+		version = &guc->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY];
+		break;
+	}
+	case XE_QUERY_UC_TYPE_HUC: {
+		struct xe_gt *media_gt = NULL;
+		struct xe_huc *huc;
+
+		if (MEDIA_VER(xe) >= 13) {
+			struct xe_tile *tile;
+			u8 gt_id;
+
+			for_each_tile(tile, xe, gt_id) {
+				if (tile->media_gt) {
+					media_gt = tile->media_gt;
+					break;
+				}
+			}
+		} else {
+			media_gt = xe->tiles[0].primary_gt;
+		}
+
+		if (!media_gt)
+			break;
+
+		huc = &media_gt->uc.huc;
+		if (huc->fw.status == XE_UC_FIRMWARE_RUNNING)
+			version = &huc->fw.versions.found[XE_UC_FW_VER_RELEASE];
+		break;
+	}
+	default:
+		return -EINVAL;
+	}
+
+	if (version) {
+		resp.branch_ver = 0;
+		resp.major_ver = version->major;
+		resp.minor_ver = version->minor;
+		resp.patch_ver = version->patch;
+	} else {
+		return -ENODEV;
+	}
+
+	if (copy_to_user(query_ptr, &resp, size))
+		return -EFAULT;
+
+	return 0;
+}
+
 static int (* const xe_query_funcs[])(struct xe_device *xe,
 				      struct drm_xe_device_query *query) = {
 	query_engines,
@@ -529,6 +603,7 @@ static int (* const xe_query_funcs[])(struct xe_device *xe,
 	query_hwconfig,
 	query_gt_topology,
 	query_engine_cycles,
+	query_uc_fw_version,
 };
 
 int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
index 87adefb560..440ac572f6 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr.c
+++ b/drivers/gpu/drm/xe/xe_reg_sr.c
@@ -231,7 +231,7 @@ void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe)
 	if (err)
 		goto err_force_wake;
 
-	p = drm_debug_printer(KBUILD_MODNAME);
+	p = drm_dbg_printer(&xe->drm, DRM_UT_DRIVER, NULL);
 	xa_for_each(&sr->xa, reg, entry) {
 		if (slot == RING_MAX_NONPRIV_SLOTS) {
 			xe_gt_err(gt,
diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c
index e66ae1bdaf..3fa2ece7d2 100644
--- a/drivers/gpu/drm/xe/xe_reg_whitelist.c
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c
@@ -7,9 +7,11 @@
 
 #include "regs/xe_engine_regs.h"
 #include "regs/xe_gt_regs.h"
+#include "regs/xe_regs.h"
 #include "xe_gt_types.h"
 #include "xe_platform_types.h"
 #include "xe_rtp.h"
+#include "xe_step.h"
 
 #undef XE_REG_MCR
 #define XE_REG_MCR(...)     XE_REG(__VA_ARGS__, .mcr = 1)
@@ -56,6 +58,12 @@ static const struct xe_rtp_entry_sr register_whitelist[] = {
 				   RING_FORCE_TO_NONPRIV_DENY,
 				   XE_RTP_ACTION_FLAG(ENGINE_BASE)))
 	},
+	{ XE_RTP_NAME("16020183090"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0),
+		       ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(WHITELIST(CSBE_DEBUG_STATUS(RENDER_RING_BASE), 0))
+	},
+
 	{}
 };
 
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index 68495f8858..aca7a9af6e 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -5,7 +5,8 @@
 
 #include "xe_ring_ops.h"
 
-#include "generated/xe_wa_oob.h"
+#include <generated/xe_wa_oob.h>
+
 #include "instructions/xe_mi_commands.h"
 #include "regs/xe_engine_regs.h"
 #include "regs/xe_gpu_commands.h"
@@ -16,6 +17,7 @@
 #include "xe_lrc.h"
 #include "xe_macros.h"
 #include "xe_sched_job.h"
+#include "xe_sriov.h"
 #include "xe_vm_types.h"
 #include "xe_vm.h"
 #include "xe_wa.h"
@@ -78,6 +80,16 @@ static int emit_store_imm_ggtt(u32 addr, u32 value, u32 *dw, int i)
 	return i;
 }
 
+static int emit_flush_dw(u32 *dw, int i)
+{
+	dw[i++] = MI_FLUSH_DW | MI_FLUSH_IMM_DW;
+	dw[i++] = 0;
+	dw[i++] = 0;
+	dw[i++] = 0;
+
+	return i;
+}
+
 static int emit_flush_imm_ggtt(u32 addr, u32 value, bool invalidate_tlb,
 			       u32 *dw, int i)
 {
@@ -113,6 +125,19 @@ static int emit_flush_invalidate(u32 flag, u32 *dw, int i)
 	return i;
 }
 
+static int
+emit_pipe_control(u32 *dw, int i, u32 bit_group_0, u32 bit_group_1, u32 offset, u32 value)
+{
+	dw[i++] = GFX_OP_PIPE_CONTROL(6) | bit_group_0;
+	dw[i++] = bit_group_1;
+	dw[i++] = offset;
+	dw[i++] = 0;
+	dw[i++] = value;
+	dw[i++] = 0;
+
+	return i;
+}
+
 static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw,
 				int i)
 {
@@ -131,14 +156,7 @@ static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw,
 
 	flags &= ~mask_flags;
 
-	dw[i++] = GFX_OP_PIPE_CONTROL(6);
-	dw[i++] = flags;
-	dw[i++] = LRC_PPHWSP_SCRATCH_ADDR;
-	dw[i++] = 0;
-	dw[i++] = 0;
-	dw[i++] = 0;
-
-	return i;
+	return emit_pipe_control(dw, i, 0, flags, LRC_PPHWSP_SCRATCH_ADDR, 0);
 }
 
 static int emit_store_imm_ppgtt_posted(u64 addr, u64 value,
@@ -174,14 +192,7 @@ static int emit_render_cache_flush(struct xe_sched_job *job, u32 *dw, int i)
 	else if (job->q->class == XE_ENGINE_CLASS_COMPUTE)
 		flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
 
-	dw[i++] = GFX_OP_PIPE_CONTROL(6) | PIPE_CONTROL0_HDC_PIPELINE_FLUSH;
-	dw[i++] = flags;
-	dw[i++] = 0;
-	dw[i++] = 0;
-	dw[i++] = 0;
-	dw[i++] = 0;
-
-	return i;
+	return emit_pipe_control(dw, i, PIPE_CONTROL0_HDC_PIPELINE_FLUSH, flags, 0, 0);
 }
 
 static int emit_pipe_control_to_ring_end(struct xe_hw_engine *hwe, u32 *dw, int i)
@@ -189,14 +200,9 @@ static int emit_pipe_control_to_ring_end(struct xe_hw_engine *hwe, u32 *dw, int
 	if (hwe->class != XE_ENGINE_CLASS_RENDER)
 		return i;
 
-	if (XE_WA(hwe->gt, 16020292621)) {
-		dw[i++] = GFX_OP_PIPE_CONTROL(6);
-		dw[i++] = PIPE_CONTROL_LRI_POST_SYNC;
-		dw[i++] = RING_NOPID(hwe->mmio_base).addr;
-		dw[i++] = 0;
-		dw[i++] = 0;
-		dw[i++] = 0;
-	}
+	if (XE_WA(hwe->gt, 16020292621))
+		i = emit_pipe_control(dw, i, 0, PIPE_CONTROL_LRI_POST_SYNC,
+				      RING_NOPID(hwe->mmio_base).addr, 0);
 
 	return i;
 }
@@ -204,16 +210,13 @@ static int emit_pipe_control_to_ring_end(struct xe_hw_engine *hwe, u32 *dw, int
 static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw,
 			      int i)
 {
-	dw[i++] = GFX_OP_PIPE_CONTROL(6);
-	dw[i++] = (stall_only ? PIPE_CONTROL_CS_STALL :
-		   PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_CS_STALL) |
-		PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_QW_WRITE;
-	dw[i++] = addr;
-	dw[i++] = 0;
-	dw[i++] = value;
-	dw[i++] = 0; /* We're thrashing one extra dword. */
+	u32 flags = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_GLOBAL_GTT_IVB |
+		    PIPE_CONTROL_QW_WRITE;
 
-	return i;
+	if (!stall_only)
+		flags |= PIPE_CONTROL_FLUSH_ENABLE;
+
+	return emit_pipe_control(dw, i, 0, flags, addr, value);
 }
 
 static u32 get_ppgtt_flag(struct xe_sched_job *job)
@@ -241,10 +244,12 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc
 
 	i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
 
-	if (job->user_fence.used)
+	if (job->user_fence.used) {
+		i = emit_flush_dw(dw, i);
 		i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
 						job->user_fence.value,
 						dw, i);
+	}
 
 	i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i);
 
@@ -300,10 +305,12 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
 
 	i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
 
-	if (job->user_fence.used)
+	if (job->user_fence.used) {
+		i = emit_flush_dw(dw, i);
 		i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
 						job->user_fence.value,
 						dw, i);
+	}
 
 	i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i);
 
@@ -375,10 +382,12 @@ static void emit_migration_job_gen12(struct xe_sched_job *job,
 
 	i = emit_bb_start(job->batch_addr[0], BIT(8), dw, i);
 
-	/* XXX: Do we need this? Leaving for now. */
-	dw[i++] = preparser_disable(true);
-	i = emit_flush_invalidate(0, dw, i);
-	dw[i++] = preparser_disable(false);
+	if (!IS_SRIOV_VF(gt_to_xe(job->q->gt))) {
+		/* XXX: Do we need this? Leaving for now. */
+		dw[i++] = preparser_disable(true);
+		i = emit_flush_invalidate(0, dw, i);
+		dw[i++] = preparser_disable(false);
+	}
 
 	i = emit_bb_start(job->batch_addr[1], BIT(8), dw, i);
 
diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
index fb44cc7521..10326bd1bf 100644
--- a/drivers/gpu/drm/xe/xe_rtp.c
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -200,7 +200,7 @@ static void rtp_mark_active(struct xe_device *xe,
 	if (first == last)
 		bitmap_set(ctx->active_entries, first, 1);
 	else
-		bitmap_set(ctx->active_entries, first, last - first + 2);
+		bitmap_set(ctx->active_entries, first, last - first + 1);
 }
 
 /**
diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c
index 2c4632259e..8941522b77 100644
--- a/drivers/gpu/drm/xe/xe_sa.c
+++ b/drivers/gpu/drm/xe/xe_sa.c
@@ -48,8 +48,9 @@ struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32
 	sa_manager->bo = NULL;
 
 	bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel,
-				  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
-				  XE_BO_CREATE_GGTT_BIT);
+				  XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+				  XE_BO_FLAG_GGTT |
+				  XE_BO_FLAG_GGTT_INVALIDATE);
 	if (IS_ERR(bo)) {
 		drm_err(&xe->drm, "failed to allocate bo for sa manager: %ld\n",
 			PTR_ERR(bo));
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
index d07b9ee0ea..cd8a2fba54 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.c
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -5,6 +5,7 @@
 
 #include "xe_sched_job.h"
 
+#include <drm/xe_drm.h>
 #include <linux/dma-fence-array.h>
 #include <linux/slab.h>
 
@@ -15,6 +16,8 @@
 #include "xe_hw_fence.h"
 #include "xe_lrc.h"
 #include "xe_macros.h"
+#include "xe_pm.h"
+#include "xe_sync_types.h"
 #include "xe_trace.h"
 #include "xe_vm.h"
 
@@ -157,7 +160,7 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
 
 	/* All other jobs require a VM to be open which has a ref */
 	if (unlikely(q->flags & EXEC_QUEUE_FLAG_KERNEL))
-		xe_device_mem_access_get(job_to_xe(job));
+		xe_pm_runtime_get_noresume(job_to_xe(job));
 	xe_device_assert_mem_access(job_to_xe(job));
 
 	trace_xe_sched_job_create(job);
@@ -190,7 +193,7 @@ void xe_sched_job_destroy(struct kref *ref)
 		container_of(ref, struct xe_sched_job, refcount);
 
 	if (unlikely(job->q->flags & EXEC_QUEUE_FLAG_KERNEL))
-		xe_device_mem_access_put(job_to_xe(job));
+		xe_pm_runtime_put(job_to_xe(job));
 	xe_exec_queue_put(job->q);
 	dma_fence_put(job->fence);
 	drm_sched_job_cleanup(&job->drm);
@@ -287,3 +290,57 @@ int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm)
 
 	return drm_sched_job_add_dependency(&job->drm, fence);
 }
+
+/**
+ * xe_sched_job_init_user_fence - Initialize user_fence for the job
+ * @job: job whose user_fence needs an init
+ * @sync: sync to be use to init user_fence
+ */
+void xe_sched_job_init_user_fence(struct xe_sched_job *job,
+				  struct xe_sync_entry *sync)
+{
+	if (sync->type != DRM_XE_SYNC_TYPE_USER_FENCE)
+		return;
+
+	job->user_fence.used = true;
+	job->user_fence.addr = sync->addr;
+	job->user_fence.value = sync->timeline_value;
+}
+
+struct xe_sched_job_snapshot *
+xe_sched_job_snapshot_capture(struct xe_sched_job *job)
+{
+	struct xe_exec_queue *q = job->q;
+	struct xe_device *xe = q->gt->tile->xe;
+	struct xe_sched_job_snapshot *snapshot;
+	size_t len = sizeof(*snapshot) + (sizeof(u64) * q->width);
+	u16 i;
+
+	snapshot = kzalloc(len, GFP_ATOMIC);
+	if (!snapshot)
+		return NULL;
+
+	snapshot->batch_addr_len = q->width;
+	for (i = 0; i < q->width; i++)
+		snapshot->batch_addr[i] = xe_device_uncanonicalize_addr(xe, job->batch_addr[i]);
+
+	return snapshot;
+}
+
+void xe_sched_job_snapshot_free(struct xe_sched_job_snapshot *snapshot)
+{
+	kfree(snapshot);
+}
+
+void
+xe_sched_job_snapshot_print(struct xe_sched_job_snapshot *snapshot,
+			    struct drm_printer *p)
+{
+	u16 i;
+
+	if (!snapshot)
+		return;
+
+	for (i = 0; i < snapshot->batch_addr_len; i++)
+		drm_printf(p, "batch_addr[%u]: 0x%016llx\n", i, snapshot->batch_addr[i]);
+}
diff --git a/drivers/gpu/drm/xe/xe_sched_job.h b/drivers/gpu/drm/xe/xe_sched_job.h
index 34f475ba7f..c75018f466 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.h
+++ b/drivers/gpu/drm/xe/xe_sched_job.h
@@ -8,7 +8,9 @@
 
 #include "xe_sched_job_types.h"
 
+struct drm_printer;
 struct xe_vm;
+struct xe_sync_entry;
 
 #define XE_SCHED_HANG_LIMIT 1
 #define XE_SCHED_JOB_TIMEOUT LONG_MAX
@@ -57,6 +59,8 @@ void xe_sched_job_arm(struct xe_sched_job *job);
 void xe_sched_job_push(struct xe_sched_job *job);
 
 int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm);
+void xe_sched_job_init_user_fence(struct xe_sched_job *job,
+				  struct xe_sync_entry *sync);
 
 static inline struct xe_sched_job *
 to_xe_sched_job(struct drm_sched_job *drm)
@@ -77,4 +81,8 @@ xe_sched_job_add_migrate_flush(struct xe_sched_job *job, u32 flags)
 
 bool xe_sched_job_is_migration(struct xe_exec_queue *q);
 
+struct xe_sched_job_snapshot *xe_sched_job_snapshot_capture(struct xe_sched_job *job);
+void xe_sched_job_snapshot_free(struct xe_sched_job_snapshot *snapshot);
+void xe_sched_job_snapshot_print(struct xe_sched_job_snapshot *snapshot, struct drm_printer *p);
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h
index 46ead63459..5e12724219 100644
--- a/drivers/gpu/drm/xe/xe_sched_job_types.h
+++ b/drivers/gpu/drm/xe/xe_sched_job_types.h
@@ -30,11 +30,11 @@ struct xe_sched_job {
 	struct dma_fence *fence;
 	/** @user_fence: write back value when BB is complete */
 	struct {
-		/** @used: user fence is used */
+		/** @user_fence.used: user fence is used */
 		bool used;
-		/** @addr: address to write to */
+		/** @user_fence.addr: address to write to */
 		u64 addr;
-		/** @value: write back value */
+		/** @user_fence.value: write back value */
 		u64 value;
 	} user_fence;
 	/** @migrate_flush_flags: Additional flush flags for migration jobs */
@@ -45,4 +45,9 @@ struct xe_sched_job {
 	u64 batch_addr[];
 };
 
+struct xe_sched_job_snapshot {
+	u16 batch_addr_len;
+	u64 batch_addr[];
+};
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c
index 42a0e0c917..1c3fa84b6a 100644
--- a/drivers/gpu/drm/xe/xe_sriov.c
+++ b/drivers/gpu/drm/xe/xe_sriov.c
@@ -3,8 +3,15 @@
  * Copyright © 2023 Intel Corporation
  */
 
+#include <drm/drm_managed.h>
+
+#include "regs/xe_sriov_regs.h"
+
 #include "xe_assert.h"
+#include "xe_device.h"
+#include "xe_mmio.h"
 #include "xe_sriov.h"
+#include "xe_sriov_pf.h"
 
 /**
  * xe_sriov_mode_to_string - Convert enum value to string.
@@ -26,10 +33,16 @@ const char *xe_sriov_mode_to_string(enum xe_sriov_mode mode)
 	}
 }
 
+static bool test_is_vf(struct xe_device *xe)
+{
+	u32 value = xe_mmio_read32(xe_root_mmio_gt(xe), VF_CAP_REG);
+
+	return value & VF_CAP;
+}
+
 /**
  * xe_sriov_probe_early - Probe a SR-IOV mode.
  * @xe: the &xe_device to probe mode on
- * @has_sriov: flag indicating hardware support for SR-IOV
  *
  * This function should be called only once and as soon as possible during
  * driver probe to detect whether we are running a SR-IOV Physical Function
@@ -38,12 +51,17 @@ const char *xe_sriov_mode_to_string(enum xe_sriov_mode mode)
  * SR-IOV PF mode detection is based on PCI @dev_is_pf() function.
  * SR-IOV VF mode detection is based on dedicated MMIO register read.
  */
-void xe_sriov_probe_early(struct xe_device *xe, bool has_sriov)
+void xe_sriov_probe_early(struct xe_device *xe)
 {
 	enum xe_sriov_mode mode = XE_SRIOV_MODE_NONE;
+	bool has_sriov = xe->info.has_sriov;
 
-	/* TODO: replace with proper mode detection */
-	xe_assert(xe, !has_sriov);
+	if (has_sriov) {
+		if (test_is_vf(xe))
+			mode = XE_SRIOV_MODE_VF;
+		else if (xe_sriov_pf_readiness(xe))
+			mode = XE_SRIOV_MODE_PF;
+	}
 
 	xe_assert(xe, !xe->sriov.__mode);
 	xe->sriov.__mode = mode;
@@ -53,3 +71,71 @@ void xe_sriov_probe_early(struct xe_device *xe, bool has_sriov)
 		drm_info(&xe->drm, "Running in %s mode\n",
 			 xe_sriov_mode_to_string(xe_device_sriov_mode(xe)));
 }
+
+static void fini_sriov(struct drm_device *drm, void *arg)
+{
+	struct xe_device *xe = arg;
+
+	destroy_workqueue(xe->sriov.wq);
+	xe->sriov.wq = NULL;
+}
+
+/**
+ * xe_sriov_init - Initialize SR-IOV specific data.
+ * @xe: the &xe_device to initialize
+ *
+ * In this function we create dedicated workqueue that will be used
+ * by the SR-IOV specific workers.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_init(struct xe_device *xe)
+{
+	if (!IS_SRIOV(xe))
+		return 0;
+
+	if (IS_SRIOV_PF(xe)) {
+		int err = xe_sriov_pf_init_early(xe);
+
+		if (err)
+			return err;
+	}
+
+	xe_assert(xe, !xe->sriov.wq);
+	xe->sriov.wq = alloc_workqueue("xe-sriov-wq", 0, 0);
+	if (!xe->sriov.wq)
+		return -ENOMEM;
+
+	return drmm_add_action_or_reset(&xe->drm, fini_sriov, xe);
+}
+
+/**
+ * xe_sriov_print_info - Print basic SR-IOV information.
+ * @xe: the &xe_device to print info from
+ * @p: the &drm_printer
+ *
+ * Print SR-IOV related information into provided DRM printer.
+ */
+void xe_sriov_print_info(struct xe_device *xe, struct drm_printer *p)
+{
+	drm_printf(p, "supported: %s\n", str_yes_no(xe_device_has_sriov(xe)));
+	drm_printf(p, "enabled: %s\n", str_yes_no(IS_SRIOV(xe)));
+	drm_printf(p, "mode: %s\n", xe_sriov_mode_to_string(xe_device_sriov_mode(xe)));
+}
+
+/**
+ * xe_sriov_function_name() - Get SR-IOV Function name.
+ * @n: the Function number (identifier) to get name of
+ * @buf: the buffer to format to
+ * @size: size of the buffer (shall be at least 5 bytes)
+ *
+ * Return: formatted function name ("PF" or "VF%u").
+ */
+const char *xe_sriov_function_name(unsigned int n, char *buf, size_t size)
+{
+	if (n)
+		snprintf(buf, size, "VF%u", n);
+	else
+		strscpy(buf, "PF", size);
+	return buf;
+}
diff --git a/drivers/gpu/drm/xe/xe_sriov.h b/drivers/gpu/drm/xe/xe_sriov.h
index 5af73a3172..486bb21c32 100644
--- a/drivers/gpu/drm/xe/xe_sriov.h
+++ b/drivers/gpu/drm/xe/xe_sriov.h
@@ -10,9 +10,14 @@
 #include "xe_device_types.h"
 #include "xe_sriov_types.h"
 
+struct drm_printer;
+
 const char *xe_sriov_mode_to_string(enum xe_sriov_mode mode);
+const char *xe_sriov_function_name(unsigned int n, char *buf, size_t len);
 
-void xe_sriov_probe_early(struct xe_device *xe, bool has_sriov);
+void xe_sriov_probe_early(struct xe_device *xe);
+void xe_sriov_print_info(struct xe_device *xe, struct drm_printer *p);
+int xe_sriov_init(struct xe_device *xe);
 
 static inline enum xe_sriov_mode xe_device_sriov_mode(struct xe_device *xe)
 {
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.c b/drivers/gpu/drm/xe/xe_sriov_pf.c
new file mode 100644
index 0000000000..0f721ae17b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_pf.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "xe_assert.h"
+#include "xe_device.h"
+#include "xe_module.h"
+#include "xe_sriov.h"
+#include "xe_sriov_pf.h"
+#include "xe_sriov_printk.h"
+
+static unsigned int wanted_max_vfs(struct xe_device *xe)
+{
+	return xe_modparam.max_vfs;
+}
+
+static int pf_reduce_totalvfs(struct xe_device *xe, int limit)
+{
+	struct device *dev = xe->drm.dev;
+	struct pci_dev *pdev = to_pci_dev(dev);
+	int err;
+
+	err = pci_sriov_set_totalvfs(pdev, limit);
+	if (err)
+		xe_sriov_notice(xe, "Failed to set number of VFs to %d (%pe)\n",
+				limit, ERR_PTR(err));
+	return err;
+}
+
+static bool pf_continue_as_native(struct xe_device *xe, const char *why)
+{
+	xe_sriov_dbg(xe, "%s, continuing as native\n", why);
+	pf_reduce_totalvfs(xe, 0);
+	return false;
+}
+
+/**
+ * xe_sriov_pf_readiness - Check if PF functionality can be enabled.
+ * @xe: the &xe_device to check
+ *
+ * This function is called as part of the SR-IOV probe to validate if all
+ * PF prerequisites are satisfied and we can continue with enabling PF mode.
+ *
+ * Return: true if the PF mode can be turned on.
+ */
+bool xe_sriov_pf_readiness(struct xe_device *xe)
+{
+	struct device *dev = xe->drm.dev;
+	struct pci_dev *pdev = to_pci_dev(dev);
+	int totalvfs = pci_sriov_get_totalvfs(pdev);
+	int newlimit = min_t(u16, wanted_max_vfs(xe), totalvfs);
+
+	xe_assert(xe, totalvfs <= U16_MAX);
+
+	if (!dev_is_pf(dev))
+		return false;
+
+	if (!xe_device_uc_enabled(xe))
+		return pf_continue_as_native(xe, "Guc submission disabled");
+
+	if (!newlimit)
+		return pf_continue_as_native(xe, "all VFs disabled");
+
+	pf_reduce_totalvfs(xe, newlimit);
+
+	xe->sriov.pf.device_total_vfs = totalvfs;
+	xe->sriov.pf.driver_max_vfs = newlimit;
+
+	return true;
+}
+
+/**
+ * xe_sriov_pf_init_early - Initialize SR-IOV PF specific data.
+ * @xe: the &xe_device to initialize
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_init_early(struct xe_device *xe)
+{
+	xe_assert(xe, IS_SRIOV_PF(xe));
+
+	return drmm_mutex_init(&xe->drm, &xe->sriov.pf.master_lock);
+}
+
+/**
+ * xe_sriov_pf_print_vfs_summary - Print SR-IOV PF information.
+ * @xe: the &xe_device to print info from
+ * @p: the &drm_printer
+ *
+ * Print SR-IOV PF related information into provided DRM printer.
+ */
+void xe_sriov_pf_print_vfs_summary(struct xe_device *xe, struct drm_printer *p)
+{
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+
+	xe_assert(xe, IS_SRIOV_PF(xe));
+
+	drm_printf(p, "total: %u\n", xe->sriov.pf.device_total_vfs);
+	drm_printf(p, "supported: %u\n", xe->sriov.pf.driver_max_vfs);
+	drm_printf(p, "enabled: %u\n", pci_num_vf(pdev));
+}
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.h b/drivers/gpu/drm/xe/xe_sriov_pf.h
new file mode 100644
index 0000000000..d1220e70e1
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_pf.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#ifndef _XE_SRIOV_PF_H_
+#define _XE_SRIOV_PF_H_
+
+#include <linux/types.h>
+
+struct drm_printer;
+struct xe_device;
+
+#ifdef CONFIG_PCI_IOV
+bool xe_sriov_pf_readiness(struct xe_device *xe);
+int xe_sriov_pf_init_early(struct xe_device *xe);
+void xe_sriov_pf_print_vfs_summary(struct xe_device *xe, struct drm_printer *p);
+#else
+static inline bool xe_sriov_pf_readiness(struct xe_device *xe)
+{
+	return false;
+}
+
+static inline int xe_sriov_pf_init_early(struct xe_device *xe)
+{
+	return 0;
+}
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h b/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h
new file mode 100644
index 0000000000..7d156ba824
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2024 Intel Corporation
+ */
+
+#ifndef _XE_SRIOV_PF_HELPERS_H_
+#define _XE_SRIOV_PF_HELPERS_H_
+
+#include "xe_assert.h"
+#include "xe_device_types.h"
+#include "xe_sriov.h"
+#include "xe_sriov_types.h"
+
+/**
+ * xe_sriov_pf_assert_vfid() - warn if &id is not a supported VF number when debugging.
+ * @xe: the PF &xe_device to assert on
+ * @vfid: the VF number to assert
+ *
+ * Assert that &xe represents the Physical Function (PF) device and provided &vfid
+ * is within a range of supported VF numbers (up to maximum number of VFs that
+ * driver can support, including VF0 that represents the PF itself).
+ *
+ * Note: Effective only on debug builds. See `Xe ASSERTs`_ for more information.
+ */
+#define xe_sriov_pf_assert_vfid(xe, vfid) \
+	xe_assert((xe), (vfid) <= xe_sriov_pf_get_totalvfs(xe))
+
+/**
+ * xe_sriov_pf_get_totalvfs() - Get maximum number of VFs that driver can support.
+ * @xe: the &xe_device to query (shall be PF)
+ *
+ * Return: Maximum number of VFs that this PF driver supports.
+ */
+static inline int xe_sriov_pf_get_totalvfs(struct xe_device *xe)
+{
+	xe_assert(xe, IS_SRIOV_PF(xe));
+	return xe->sriov.pf.driver_max_vfs;
+}
+
+static inline struct mutex *xe_sriov_pf_master_mutex(struct xe_device *xe)
+{
+	xe_assert(xe, IS_SRIOV_PF(xe));
+	return &xe->sriov.pf.master_lock;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_types.h b/drivers/gpu/drm/xe/xe_sriov_types.h
index 999a4311b9..c7b7ad4af5 100644
--- a/drivers/gpu/drm/xe/xe_sriov_types.h
+++ b/drivers/gpu/drm/xe/xe_sriov_types.h
@@ -7,6 +7,20 @@
 #define _XE_SRIOV_TYPES_H_
 
 #include <linux/build_bug.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+
+/**
+ * VFID - Virtual Function Identifier
+ * @n: VF number
+ *
+ * Helper macro to represent Virtual Function (VF) Identifier.
+ * VFID(0) is used as alias to the PFID that represents Physical Function.
+ *
+ * Note: According to PCI spec, SR-IOV VF's numbers are 1-based (VF1, VF2, ...).
+ */
+#define VFID(n)		(n)
+#define PFID		VFID(0)
 
 /**
  * enum xe_sriov_mode - SR-IOV mode
@@ -25,4 +39,21 @@ enum xe_sriov_mode {
 };
 static_assert(XE_SRIOV_MODE_NONE);
 
+/**
+ * struct xe_device_pf - Xe PF related data
+ *
+ * The data in this structure is valid only if driver is running in the
+ * @XE_SRIOV_MODE_PF mode.
+ */
+struct xe_device_pf {
+	/** @device_total_vfs: Maximum number of VFs supported by the device. */
+	u16 device_total_vfs;
+
+	/** @driver_max_vfs: Maximum number of VFs supported by the driver. */
+	u16 driver_max_vfs;
+
+	/** @master_lock: protects all VFs configurations across GTs */
+	struct mutex master_lock;
+};
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
index 02c9577fe4..2bfff99845 100644
--- a/drivers/gpu/drm/xe/xe_sync.c
+++ b/drivers/gpu/drm/xe/xe_sync.c
@@ -224,8 +224,7 @@ int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job)
 	return 0;
 }
 
-void xe_sync_entry_signal(struct xe_sync_entry *sync, struct xe_sched_job *job,
-			  struct dma_fence *fence)
+void xe_sync_entry_signal(struct xe_sync_entry *sync, struct dma_fence *fence)
 {
 	if (!(sync->flags & DRM_XE_SYNC_FLAG_SIGNAL))
 		return;
@@ -254,10 +253,6 @@ void xe_sync_entry_signal(struct xe_sync_entry *sync, struct xe_sched_job *job,
 			user_fence_put(sync->ufence);
 			dma_fence_put(fence);
 		}
-	} else if (sync->type == DRM_XE_SYNC_TYPE_USER_FENCE) {
-		job->user_fence.used = true;
-		job->user_fence.addr = sync->addr;
-		job->user_fence.value = sync->timeline_value;
 	}
 }
 
@@ -268,7 +263,7 @@ void xe_sync_entry_cleanup(struct xe_sync_entry *sync)
 	if (sync->fence)
 		dma_fence_put(sync->fence);
 	if (sync->chain_fence)
-		dma_fence_put(&sync->chain_fence->base);
+		dma_fence_chain_free(sync->chain_fence);
 	if (sync->ufence)
 		user_fence_put(sync->ufence);
 }
diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h
index 0fd0d51208..3e03396af2 100644
--- a/drivers/gpu/drm/xe/xe_sync.h
+++ b/drivers/gpu/drm/xe/xe_sync.h
@@ -26,7 +26,6 @@ int xe_sync_entry_wait(struct xe_sync_entry *sync);
 int xe_sync_entry_add_deps(struct xe_sync_entry *sync,
 			   struct xe_sched_job *job);
 void xe_sync_entry_signal(struct xe_sync_entry *sync,
-			  struct xe_sched_job *job,
 			  struct dma_fence *fence);
 void xe_sync_entry_cleanup(struct xe_sync_entry *sync);
 struct dma_fence *
diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c
index 0650b2fa75..15ea0a942f 100644
--- a/drivers/gpu/drm/xe/xe_tile.c
+++ b/drivers/gpu/drm/xe/xe_tile.c
@@ -160,24 +160,19 @@ int xe_tile_init_noalloc(struct xe_tile *tile)
 {
 	int err;
 
-	xe_device_mem_access_get(tile_to_xe(tile));
-
 	err = tile_ttm_mgr_init(tile);
 	if (err)
-		goto err_mem_access;
+		return err;
 
 	tile->mem.kernel_bb_pool = xe_sa_bo_manager_init(tile, SZ_1M, 16);
-	if (IS_ERR(tile->mem.kernel_bb_pool)) {
-		err = PTR_ERR(tile->mem.kernel_bb_pool);
-		goto err_mem_access;
-	}
+	if (IS_ERR(tile->mem.kernel_bb_pool))
+		return PTR_ERR(tile->mem.kernel_bb_pool);
+
 	xe_wa_apply_tile_workarounds(tile);
 
-	xe_tile_sysfs_init(tile);
+	err = xe_tile_sysfs_init(tile);
 
-err_mem_access:
-	xe_device_mem_access_put(tile_to_xe(tile));
-	return err;
+	return 0;
 }
 
 void xe_tile_migrate_wait(struct xe_tile *tile)
diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs.c b/drivers/gpu/drm/xe/xe_tile_sysfs.c
index 0f8d3e7fce..64661403af 100644
--- a/drivers/gpu/drm/xe/xe_tile_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_tile_sysfs.c
@@ -7,8 +7,10 @@
 #include <linux/sysfs.h>
 #include <drm/drm_managed.h>
 
+#include "xe_pm.h"
 #include "xe_tile.h"
 #include "xe_tile_sysfs.h"
+#include "xe_vram_freq.h"
 
 static void xe_tile_sysfs_kobj_release(struct kobject *kobj)
 {
@@ -27,7 +29,7 @@ static void tile_sysfs_fini(struct drm_device *drm, void *arg)
 	kobject_put(tile->sysfs);
 }
 
-void xe_tile_sysfs_init(struct xe_tile *tile)
+int xe_tile_sysfs_init(struct xe_tile *tile)
 {
 	struct xe_device *xe = tile_to_xe(tile);
 	struct device *dev = xe->drm.dev;
@@ -36,7 +38,7 @@ void xe_tile_sysfs_init(struct xe_tile *tile)
 
 	kt = kzalloc(sizeof(*kt), GFP_KERNEL);
 	if (!kt)
-		return;
+		return -ENOMEM;
 
 	kobject_init(&kt->base, &xe_tile_sysfs_kobj_type);
 	kt->tile = tile;
@@ -44,14 +46,14 @@ void xe_tile_sysfs_init(struct xe_tile *tile)
 	err = kobject_add(&kt->base, &dev->kobj, "tile%d", tile->id);
 	if (err) {
 		kobject_put(&kt->base);
-		drm_warn(&xe->drm, "failed to register TILE sysfs directory, err: %d\n", err);
-		return;
+		return err;
 	}
 
 	tile->sysfs = &kt->base;
 
-	err = drmm_add_action_or_reset(&xe->drm, tile_sysfs_fini, tile);
+	err = xe_vram_freq_sysfs_init(tile);
 	if (err)
-		drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
-			 __func__, err);
+		return err;
+
+	return drmm_add_action_or_reset(&xe->drm, tile_sysfs_fini, tile);
 }
diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs.h b/drivers/gpu/drm/xe/xe_tile_sysfs.h
index e4f065039e..54a2ba8ba5 100644
--- a/drivers/gpu/drm/xe/xe_tile_sysfs.h
+++ b/drivers/gpu/drm/xe/xe_tile_sysfs.h
@@ -8,7 +8,7 @@
 
 #include "xe_tile_sysfs_types.h"
 
-void xe_tile_sysfs_init(struct xe_tile *tile);
+int xe_tile_sysfs_init(struct xe_tile *tile);
 
 static inline struct xe_tile *
 kobj_to_tile(struct kobject *kobj)
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
index 846f14507d..2d56cfc09e 100644
--- a/drivers/gpu/drm/xe/xe_trace.h
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -258,7 +258,7 @@ DECLARE_EVENT_CLASS(xe_sched_job,
 			     __field(u32, guc_state)
 			     __field(u32, flags)
 			     __field(int, error)
-			     __field(u64, fence)
+			     __field(struct dma_fence *, fence)
 			     __field(u64, batch_addr)
 			     ),
 
@@ -269,11 +269,11 @@ DECLARE_EVENT_CLASS(xe_sched_job,
 			   atomic_read(&job->q->guc->state);
 			   __entry->flags = job->q->flags;
 			   __entry->error = job->fence->error;
-			   __entry->fence = (unsigned long)job->fence;
+			   __entry->fence = job->fence;
 			   __entry->batch_addr = (u64)job->batch_addr[0];
 			   ),
 
-		    TP_printk("fence=0x%016llx, seqno=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d",
+		    TP_printk("fence=%p, seqno=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d",
 			      __entry->fence, __entry->seqno, __entry->guc_id,
 			      __entry->batch_addr, __entry->guc_state,
 			      __entry->flags, __entry->error)
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index e5d7d5e2be..f773673297 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -11,7 +11,8 @@
 #include <drm/ttm/ttm_placement.h>
 #include <drm/ttm/ttm_range_manager.h>
 
-#include "generated/xe_wa_oob.h"
+#include <generated/xe_wa_oob.h>
+
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_regs.h"
 #include "xe_bo.h"
@@ -19,6 +20,7 @@
 #include "xe_gt.h"
 #include "xe_mmio.h"
 #include "xe_res_cursor.h"
+#include "xe_sriov.h"
 #include "xe_ttm_stolen_mgr.h"
 #include "xe_ttm_vram_mgr.h"
 #include "xe_wa.h"
@@ -202,10 +204,17 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe)
 {
 	struct xe_ttm_stolen_mgr *mgr = drmm_kzalloc(&xe->drm, sizeof(*mgr), GFP_KERNEL);
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
-	u64 stolen_size, io_size, pgsize;
+	u64 stolen_size, io_size;
 	int err;
 
-	if (IS_DGFX(xe))
+	if (!mgr) {
+		drm_dbg_kms(&xe->drm, "Stolen mgr init failed\n");
+		return;
+	}
+
+	if (IS_SRIOV_VF(xe))
+		stolen_size = 0;
+	else if (IS_DGFX(xe))
 		stolen_size = detect_bar2_dgfx(xe, mgr);
 	else if (GRAPHICS_VERx100(xe) >= 1270)
 		stolen_size = detect_bar2_integrated(xe, mgr);
@@ -217,10 +226,6 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe)
 		return;
 	}
 
-	pgsize = xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K;
-	if (pgsize < PAGE_SIZE)
-		pgsize = PAGE_SIZE;
-
 	/*
 	 * We don't try to attempt partial visible support for stolen vram,
 	 * since stolen is always at the end of vram, and the BAR size is pretty
@@ -231,7 +236,7 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe)
 		io_size = stolen_size;
 
 	err = __xe_ttm_vram_mgr_init(xe, &mgr->base, XE_PL_STOLEN, stolen_size,
-				     io_size, pgsize);
+				     io_size, PAGE_SIZE);
 	if (err) {
 		drm_dbg_kms(&xe->drm, "Stolen mgr init failed: %i\n", err);
 		return;
@@ -294,7 +299,7 @@ static int __xe_ttm_stolen_io_mem_reserve_stolen(struct xe_device *xe,
 	XE_WARN_ON(IS_DGFX(xe));
 
 	/* XXX: Require BO to be mapped to GGTT? */
-	if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_CREATE_GGTT_BIT)))
+	if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_FLAG_GGTT)))
 		return -EIO;
 
 	/* GGTT is always contiguously mapped */
diff --git a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c
index 3e1fa0c832..9844a8edbf 100644
--- a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c
@@ -73,7 +73,10 @@ static void xe_ttm_sys_mgr_del(struct ttm_resource_manager *man,
 static void xe_ttm_sys_mgr_debug(struct ttm_resource_manager *man,
 				 struct drm_printer *printer)
 {
-
+	/*
+	 * This function is called by debugfs entry and would require
+	 * pm_runtime_{get,put} wrappers around any operation.
+	 */
 }
 
 static const struct ttm_resource_manager_func xe_ttm_sys_mgr_func = {
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
index 115ec745e5..fe3779fdba 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
@@ -91,7 +91,7 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
 
 	min_page_size = mgr->default_page_size;
 	if (tbo->page_alignment)
-		min_page_size = tbo->page_alignment << PAGE_SHIFT;
+		min_page_size = (u64)tbo->page_alignment << PAGE_SHIFT;
 
 	if (WARN_ON(min_page_size < mm->chunk_size)) {
 		err = -EINVAL;
@@ -196,7 +196,7 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man,
 	return 0;
 
 error_free_blocks:
-	drm_buddy_free_list(mm, &vres->blocks);
+	drm_buddy_free_list(mm, &vres->blocks, 0);
 	mutex_unlock(&mgr->lock);
 error_fini:
 	ttm_resource_fini(man, &vres->base);
@@ -214,7 +214,7 @@ static void xe_ttm_vram_mgr_del(struct ttm_resource_manager *man,
 	struct drm_buddy *mm = &mgr->mm;
 
 	mutex_lock(&mgr->lock);
-	drm_buddy_free_list(mm, &vres->blocks);
+	drm_buddy_free_list(mm, &vres->blocks, 0);
 	mgr->visible_avail += vres->used_visible_size;
 	mutex_unlock(&mgr->lock);
 
@@ -478,3 +478,15 @@ void xe_ttm_vram_get_used(struct ttm_resource_manager *man,
 	*used_visible = mgr->visible_size - mgr->visible_avail;
 	mutex_unlock(&mgr->lock);
 }
+
+u64 xe_ttm_vram_get_avail(struct ttm_resource_manager *man)
+{
+	struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man);
+	u64 avail;
+
+	mutex_lock(&mgr->lock);
+	avail =  mgr->mm.avail;
+	mutex_unlock(&mgr->lock);
+
+	return avail;
+}
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
index d184e19a92..cc76050e37 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
@@ -25,6 +25,7 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe,
 void xe_ttm_vram_mgr_free_sgt(struct device *dev, enum dma_data_direction dir,
 			      struct sg_table *sgt);
 
+u64 xe_ttm_vram_get_avail(struct ttm_resource_manager *man);
 u64 xe_ttm_vram_get_cpu_visible_size(struct ttm_resource_manager *man);
 void xe_ttm_vram_get_used(struct ttm_resource_manager *man,
 			  u64 *used, u64 *used_visible);
diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c
index 53ccd338fd..d4e6fa9189 100644
--- a/drivers/gpu/drm/xe/xe_tuning.c
+++ b/drivers/gpu/drm/xe/xe_tuning.c
@@ -28,7 +28,7 @@ static const struct xe_rtp_entry_sr gt_tunings[] = {
 	/* Xe2 */
 
 	{ XE_RTP_NAME("Tuning: L3 cache"),
-	  XE_RTP_RULES(GRAPHICS_VERSION(2004)),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
 	  XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
 				   REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
 	},
@@ -37,13 +37,20 @@ static const struct xe_rtp_entry_sr gt_tunings[] = {
 	  XE_RTP_ACTIONS(FIELD_SET(XE2LPM_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
 				   REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
 	},
-
+	{ XE_RTP_NAME("Tuning: Compression Overfetch"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
+	  XE_RTP_ACTIONS(CLR(CCCHKNREG1, ENCOMPPERFFIX)),
+	},
+	{ XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
+	  XE_RTP_ACTIONS(SET(L3SQCREG3, COMPPWOVERFETCHEN))
+	},
 	{}
 };
 
 static const struct xe_rtp_entry_sr engine_tunings[] = {
 	{ XE_RTP_NAME("Tuning: Set Indirect State Override"),
-	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1271),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1274),
 		       ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(SAMPLER_MODE, INDIRECT_STATE_BASE_ADDR_OVERRIDE))
 	},
@@ -81,7 +88,7 @@ static const struct xe_rtp_entry_sr lrc_tunings[] = {
 	/* Xe_LPG */
 
 	{ XE_RTP_NAME("Tuning: L3 cache"),
-	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271), ENGINE_CLASS(RENDER)),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274), ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
 				   REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
 	},
diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
index 25e1ddfd2f..4feb35c95a 100644
--- a/drivers/gpu/drm/xe/xe_uc.c
+++ b/drivers/gpu/drm/xe/xe_uc.c
@@ -7,8 +7,10 @@
 
 #include "xe_device.h"
 #include "xe_gsc.h"
+#include "xe_gsc_proxy.h"
 #include "xe_gt.h"
 #include "xe_guc.h"
+#include "xe_guc_db_mgr.h"
 #include "xe_guc_pc.h"
 #include "xe_guc_submit.h"
 #include "xe_huc.h"
@@ -36,7 +38,6 @@ int xe_uc_init(struct xe_uc *uc)
 	 * We call the GuC/HuC/GSC init functions even if GuC submission is off
 	 * to correctly move our tracking of the FW state to "disabled".
 	 */
-
 	ret = xe_guc_init(&uc->guc);
 	if (ret)
 		goto err;
@@ -50,7 +51,7 @@ int xe_uc_init(struct xe_uc *uc)
 		goto err;
 
 	if (!xe_device_uc_enabled(uc_to_xe(uc)))
-		return 0;
+		goto err;
 
 	ret = xe_wopcm_init(&uc->wopcm);
 	if (ret)
@@ -60,7 +61,7 @@ int xe_uc_init(struct xe_uc *uc)
 	if (ret)
 		goto err;
 
-	return 0;
+	ret = xe_guc_db_mgr_init(&uc->guc.dbm, ~0);
 
 err:
 	return ret;
@@ -88,6 +89,10 @@ int xe_uc_init_post_hwconfig(struct xe_uc *uc)
 	if (err)
 		return err;
 
+	err = xe_huc_init_post_hwconfig(&uc->huc);
+	if (err)
+		return err;
+
 	return xe_gsc_init_post_hwconfig(&uc->gsc);
 }
 
@@ -256,3 +261,16 @@ int xe_uc_suspend(struct xe_uc *uc)
 
 	return xe_guc_suspend(&uc->guc);
 }
+
+/**
+ * xe_uc_remove() - Clean up the UC structures before driver removal
+ * @uc: the UC object
+ *
+ * This function should only act on objects/structures that must be cleaned
+ * before the driver removal callback is complete and therefore can't be
+ * deferred to a drmm action.
+ */
+void xe_uc_remove(struct xe_uc *uc)
+{
+	xe_gsc_remove(&uc->gsc);
+}
diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h
index 5d5110c0c8..e4d4e3c99f 100644
--- a/drivers/gpu/drm/xe/xe_uc.h
+++ b/drivers/gpu/drm/xe/xe_uc.h
@@ -20,5 +20,6 @@ int xe_uc_stop(struct xe_uc *uc);
 int xe_uc_start(struct xe_uc *uc);
 int xe_uc_suspend(struct xe_uc *uc);
 int xe_uc_sanitize_reset(struct xe_uc *uc);
+void xe_uc_remove(struct xe_uc *uc);
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_uc_debugfs.c b/drivers/gpu/drm/xe/xe_uc_debugfs.c
index 0a39ec5a6e..78eb8db737 100644
--- a/drivers/gpu/drm/xe/xe_uc_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_uc_debugfs.c
@@ -3,6 +3,8 @@
  * Copyright © 2022 Intel Corporation
  */
 
+#include <linux/debugfs.h>
+
 #include <drm/drm_debugfs.h>
 
 #include "xe_gt.h"
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 9dff96dfe4..186f81640c 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -17,6 +17,7 @@
 #include "xe_map.h"
 #include "xe_mmio.h"
 #include "xe_module.h"
+#include "xe_sriov.h"
 #include "xe_uc_fw.h"
 
 /*
@@ -92,6 +93,7 @@ struct uc_fw_entry {
 		const char *path;
 		u16 major;
 		u16 minor;
+		u16 patch;
 		bool full_ver_required;
 	};
 };
@@ -102,14 +104,15 @@ struct fw_blobs_by_type {
 };
 
 #define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver)			\
-	fw_def(METEORLAKE,	major_ver(i915,	guc,	mtl,	70, 7))		\
-	fw_def(DG2,		major_ver(i915,	guc,	dg2,	70, 5))		\
-	fw_def(DG1,		major_ver(i915,	guc,	dg1,	70, 5))		\
-	fw_def(ALDERLAKE_N,	major_ver(i915,	guc,	tgl,	70, 5))		\
-	fw_def(ALDERLAKE_P,	major_ver(i915,	guc,	adlp,	70, 5))		\
-	fw_def(ALDERLAKE_S,	major_ver(i915,	guc,	tgl,	70, 5))		\
-	fw_def(ROCKETLAKE,	major_ver(i915,	guc,	tgl,	70, 5))		\
-	fw_def(TIGERLAKE,	major_ver(i915,	guc,	tgl,	70, 5))
+	fw_def(LUNARLAKE,	major_ver(xe,	guc,	lnl,	70, 19, 2))	\
+	fw_def(METEORLAKE,	major_ver(i915,	guc,	mtl,	70, 19, 2))	\
+	fw_def(DG2,		major_ver(i915,	guc,	dg2,	70, 19, 2))	\
+	fw_def(DG1,		major_ver(i915,	guc,	dg1,	70, 19, 2))	\
+	fw_def(ALDERLAKE_N,	major_ver(i915,	guc,	tgl,	70, 19, 2))	\
+	fw_def(ALDERLAKE_P,	major_ver(i915,	guc,	adlp,	70, 19, 2))	\
+	fw_def(ALDERLAKE_S,	major_ver(i915,	guc,	tgl,	70, 19, 2))	\
+	fw_def(ROCKETLAKE,	major_ver(i915,	guc,	tgl,	70, 19, 2))	\
+	fw_def(TIGERLAKE,	major_ver(i915,	guc,	tgl,	70, 19, 2))
 
 #define XE_HUC_FIRMWARE_DEFS(fw_def, mmp_ver, no_ver)		\
 	fw_def(METEORLAKE,	no_ver(i915,	huc_gsc,	mtl))		\
@@ -121,24 +124,24 @@ struct fw_blobs_by_type {
 
 /* for the GSC FW we match the compatibility version and not the release one */
 #define XE_GSC_FIRMWARE_DEFS(fw_def, major_ver)		\
-	fw_def(METEORLAKE,	major_ver(i915,	gsc,	mtl,	1, 0))
+	fw_def(METEORLAKE,	major_ver(i915,	gsc,	mtl,	1, 0, 0))
 
 #define MAKE_FW_PATH(dir__, uc__, shortname__, version__)			\
 	__stringify(dir__) "/" __stringify(shortname__) "_" __stringify(uc__) version__ ".bin"
 
 #define fw_filename_mmp_ver(dir_, uc_, shortname_, a, b, c)			\
 	MAKE_FW_PATH(dir_, uc_, shortname_, "_" __stringify(a ## . ## b ## . ## c))
-#define fw_filename_major_ver(dir_, uc_, shortname_, a, b)			\
+#define fw_filename_major_ver(dir_, uc_, shortname_, a, b, c)			\
 	MAKE_FW_PATH(dir_, uc_, shortname_, "_" __stringify(a))
 #define fw_filename_no_ver(dir_, uc_, shortname_)				\
 	MAKE_FW_PATH(dir_, uc_, shortname_, "")
 
 #define uc_fw_entry_mmp_ver(dir_, uc_, shortname_, a, b, c)			\
 	{ fw_filename_mmp_ver(dir_, uc_, shortname_, a, b, c),			\
-	  a, b, true }
-#define uc_fw_entry_major_ver(dir_, uc_, shortname_, a, b)			\
-	{ fw_filename_major_ver(dir_, uc_, shortname_, a, b),			\
-	  a, b }
+	  a, b, c, true }
+#define uc_fw_entry_major_ver(dir_, uc_, shortname_, a, b, c)			\
+	{ fw_filename_major_ver(dir_, uc_, shortname_, a, b, c),		\
+	  a, b, c }
 #define uc_fw_entry_no_ver(dir_, uc_, shortname_)				\
 	{ fw_filename_no_ver(dir_, uc_, shortname_),				\
 	  0, 0 }
@@ -221,6 +224,7 @@ uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw)
 			uc_fw->path = entries[i].path;
 			uc_fw->versions.wanted.major = entries[i].major;
 			uc_fw->versions.wanted.minor = entries[i].minor;
+			uc_fw->versions.wanted.patch = entries[i].patch;
 			uc_fw->full_ver_required = entries[i].full_ver_required;
 
 			if (uc_fw->type == XE_UC_FW_TYPE_GSC)
@@ -293,36 +297,28 @@ static void uc_fw_fini(struct drm_device *drm, void *arg)
 	xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_SELECTED);
 }
 
-static void guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css)
+static int guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css)
 {
 	struct xe_gt *gt = uc_fw_to_gt(uc_fw);
 	struct xe_uc_fw_version *release = &uc_fw->versions.found[XE_UC_FW_VER_RELEASE];
 	struct xe_uc_fw_version *compatibility = &uc_fw->versions.found[XE_UC_FW_VER_COMPATIBILITY];
 
 	xe_gt_assert(gt, uc_fw->type == XE_UC_FW_TYPE_GUC);
-	xe_gt_assert(gt, release->major >= 70);
-
-	if (release->major > 70 || release->minor >= 6) {
-		/* v70.6.0 adds CSS header support */
-		compatibility->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR,
-						 css->submission_version);
-		compatibility->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR,
-						 css->submission_version);
-		compatibility->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH,
-						 css->submission_version);
-	} else if (release->minor >= 3) {
-		/* v70.3.0 introduced v1.1.0 */
-		compatibility->major = 1;
-		compatibility->minor = 1;
-		compatibility->patch = 0;
-	} else {
-		/* v70.0.0 introduced v1.0.0 */
-		compatibility->major = 1;
-		compatibility->minor = 0;
-		compatibility->patch = 0;
+
+	/* We don't support GuC releases older than 70.19 */
+	if (release->major < 70 || (release->major == 70 && release->minor < 19)) {
+		xe_gt_err(gt, "Unsupported GuC v%u.%u! v70.19 or newer is required\n",
+			  release->major, release->minor);
+		return -EINVAL;
 	}
 
+	compatibility->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css->submission_version);
+	compatibility->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css->submission_version);
+	compatibility->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css->submission_version);
+
 	uc_fw->private_data_size = css->private_data_size;
+
+	return 0;
 }
 
 int xe_uc_fw_check_version_requirements(struct xe_uc_fw *uc_fw)
@@ -340,19 +336,22 @@ int xe_uc_fw_check_version_requirements(struct xe_uc_fw *uc_fw)
 	 * Otherwise, at least the major version.
 	 */
 	if (wanted->major != found->major ||
-	    (uc_fw->full_ver_required && wanted->minor != found->minor)) {
-		drm_notice(&xe->drm, "%s firmware %s: unexpected version: %u.%u != %u.%u\n",
+	    (uc_fw->full_ver_required &&
+	     ((wanted->minor != found->minor) ||
+	      (wanted->patch != found->patch)))) {
+		drm_notice(&xe->drm, "%s firmware %s: unexpected version: %u.%u.%u != %u.%u.%u\n",
 			   xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
-			   found->major, found->minor,
-			   wanted->major, wanted->minor);
+			   found->major, found->minor, found->patch,
+			   wanted->major, wanted->minor, wanted->patch);
 		goto fail;
 	}
 
-	if (wanted->minor > found->minor) {
-		drm_notice(&xe->drm, "%s firmware (%u.%u) is recommended, but only (%u.%u) was found in %s\n",
+	if (wanted->minor > found->minor ||
+	    (wanted->minor == found->minor && wanted->patch > found->patch)) {
+		drm_notice(&xe->drm, "%s firmware (%u.%u.%u) is recommended, but only (%u.%u.%u) was found in %s\n",
 			   xe_uc_fw_type_repr(uc_fw->type),
-			   wanted->major, wanted->minor,
-			   found->major, found->minor,
+			   wanted->major, wanted->minor, wanted->patch,
+			   found->major, found->minor, found->patch,
 			   uc_fw->path);
 		drm_info(&xe->drm, "Consider updating your linux-firmware pkg or downloading from %s\n",
 			 XE_UC_FIRMWARE_URL);
@@ -418,7 +417,7 @@ static int parse_css_header(struct xe_uc_fw *uc_fw, const void *fw_data, size_t
 	release->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css->sw_version);
 
 	if (uc_fw->type == XE_UC_FW_TYPE_GUC)
-		guc_read_css_info(uc_fw, css);
+		return guc_read_css_info(uc_fw, css);
 
 	return 0;
 }
@@ -652,14 +651,28 @@ static int uc_fw_request(struct xe_uc_fw *uc_fw, const struct firmware **firmwar
 	xe_assert(xe, !uc_fw->path);
 
 	uc_fw_auto_select(xe, uc_fw);
+
+	if (IS_SRIOV_VF(xe)) {
+		/* VF will support only firmwares that driver can autoselect */
+		xe_uc_fw_change_status(uc_fw, uc_fw->path ?
+				       XE_UC_FIRMWARE_PRELOADED :
+				       XE_UC_FIRMWARE_NOT_SUPPORTED);
+		return 0;
+	}
+
+	uc_fw_override(uc_fw);
+
 	xe_uc_fw_change_status(uc_fw, uc_fw->path ?
 			       XE_UC_FIRMWARE_SELECTED :
 			       XE_UC_FIRMWARE_NOT_SUPPORTED);
 
-	if (!xe_uc_fw_is_supported(uc_fw))
+	if (!xe_uc_fw_is_supported(uc_fw)) {
+		if (uc_fw->type == XE_UC_FW_TYPE_GUC) {
+			drm_err(&xe->drm, "No GuC firmware defined for platform\n");
+			return -ENOENT;
+		}
 		return 0;
-
-	uc_fw_override(uc_fw);
+	}
 
 	/* an empty path means the firmware is disabled */
 	if (!xe_device_uc_enabled(xe) || !(*uc_fw->path)) {
@@ -761,7 +774,8 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw)
 		return 0;
 
 	err = uc_fw_copy(uc_fw, fw->data, fw->size,
-			 XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_GGTT_BIT);
+			 XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT |
+			 XE_BO_FLAG_GGTT_INVALIDATE);
 
 	uc_fw_release(fw);
 
@@ -777,7 +791,8 @@ static int uc_fw_xfer(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags)
 {
 	struct xe_device *xe = uc_fw_to_xe(uc_fw);
 	struct xe_gt *gt = uc_fw_to_gt(uc_fw);
-	u32 src_offset, dma_ctrl;
+	u64 src_offset;
+	u32 dma_ctrl;
 	int ret;
 
 	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.h b/drivers/gpu/drm/xe/xe_uc_fw.h
index 85c20795d1..3507803879 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.h
+++ b/drivers/gpu/drm/xe/xe_uc_fw.h
@@ -59,6 +59,8 @@ const char *xe_uc_fw_status_repr(enum xe_uc_fw_status status)
 		return "TRANSFERRED";
 	case XE_UC_FIRMWARE_RUNNING:
 		return "RUNNING";
+	case XE_UC_FIRMWARE_PRELOADED:
+		return "PRELOADED";
 	}
 	return "<invalid>";
 }
@@ -85,6 +87,7 @@ static inline int xe_uc_fw_status_to_error(enum xe_uc_fw_status status)
 	case XE_UC_FIRMWARE_LOADABLE:
 	case XE_UC_FIRMWARE_TRANSFERRED:
 	case XE_UC_FIRMWARE_RUNNING:
+	case XE_UC_FIRMWARE_PRELOADED:
 		return 0;
 	}
 	return -EINVAL;
@@ -134,7 +137,8 @@ static inline bool xe_uc_fw_is_available(struct xe_uc_fw *uc_fw)
 
 static inline bool xe_uc_fw_is_loadable(struct xe_uc_fw *uc_fw)
 {
-	return __xe_uc_fw_status(uc_fw) >= XE_UC_FIRMWARE_LOADABLE;
+	return __xe_uc_fw_status(uc_fw) >= XE_UC_FIRMWARE_LOADABLE &&
+		__xe_uc_fw_status(uc_fw) != XE_UC_FIRMWARE_PRELOADED;
 }
 
 static inline bool xe_uc_fw_is_loaded(struct xe_uc_fw *uc_fw)
@@ -144,7 +148,7 @@ static inline bool xe_uc_fw_is_loaded(struct xe_uc_fw *uc_fw)
 
 static inline bool xe_uc_fw_is_running(struct xe_uc_fw *uc_fw)
 {
-	return __xe_uc_fw_status(uc_fw) == XE_UC_FIRMWARE_RUNNING;
+	return __xe_uc_fw_status(uc_fw) >= XE_UC_FIRMWARE_RUNNING;
 }
 
 static inline bool xe_uc_fw_is_overridden(const struct xe_uc_fw *uc_fw)
diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h
index ee914a5d85..0d8caa0e73 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw_types.h
+++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h
@@ -50,7 +50,8 @@ enum xe_uc_fw_status {
 	XE_UC_FIRMWARE_LOADABLE, /* all fw-required objects are ready */
 	XE_UC_FIRMWARE_LOAD_FAIL, /* failed to xfer or init/auth the fw */
 	XE_UC_FIRMWARE_TRANSFERRED, /* dma xfer done */
-	XE_UC_FIRMWARE_RUNNING /* init/auth done */
+	XE_UC_FIRMWARE_RUNNING, /* init/auth done */
+	XE_UC_FIRMWARE_PRELOADED, /* preloaded by the PF driver */
 };
 
 enum xe_uc_fw_type {
@@ -124,11 +125,14 @@ struct xe_uc_fw {
 
 	/** @versions: FW versions wanted and found */
 	struct {
-		/** @wanted: firmware version wanted by platform */
+		/** @versions.wanted: firmware version wanted by platform */
 		struct xe_uc_fw_version wanted;
-		/** @wanted_type: type of firmware version wanted (release vs compatibility) */
+		/**
+		 * @versions.wanted_type: type of firmware version wanted
+		 * (release vs compatibility)
+		 */
 		enum xe_uc_fw_version_types wanted_type;
-		/** @found: fw versions found in firmware blob */
+		/** @versions.found: fw versions found in firmware blob */
 		struct xe_uc_fw_version found[XE_UC_FW_VER_TYPE_COUNT];
 	} versions;
 
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index a2024247be..4aa3943e6f 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -13,17 +13,20 @@
 #include <drm/ttm/ttm_execbuf_util.h>
 #include <drm/ttm/ttm_tt.h>
 #include <drm/xe_drm.h>
+#include <linux/ascii85.h>
 #include <linux/delay.h>
 #include <linux/kthread.h>
 #include <linux/mm.h>
 #include <linux/swap.h>
 
+#include <generated/xe_wa_oob.h>
+
+#include "regs/xe_gtt_defs.h"
 #include "xe_assert.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_drm_client.h"
 #include "xe_exec_queue.h"
-#include "xe_gt.h"
 #include "xe_gt_pagefault.h"
 #include "xe_gt_tlb_invalidation.h"
 #include "xe_migrate.h"
@@ -34,8 +37,8 @@
 #include "xe_res_cursor.h"
 #include "xe_sync.h"
 #include "xe_trace.h"
-#include "generated/xe_wa_oob.h"
 #include "xe_wa.h"
+#include "xe_hmm.h"
 
 static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
 {
@@ -63,113 +66,14 @@ int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma)
 
 int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma)
 {
-	struct xe_userptr *userptr = &uvma->userptr;
 	struct xe_vma *vma = &uvma->vma;
 	struct xe_vm *vm = xe_vma_vm(vma);
 	struct xe_device *xe = vm->xe;
-	const unsigned long num_pages = xe_vma_size(vma) >> PAGE_SHIFT;
-	struct page **pages;
-	bool in_kthread = !current->mm;
-	unsigned long notifier_seq;
-	int pinned, ret, i;
-	bool read_only = xe_vma_read_only(vma);
 
 	lockdep_assert_held(&vm->lock);
 	xe_assert(xe, xe_vma_is_userptr(vma));
-retry:
-	if (vma->gpuva.flags & XE_VMA_DESTROYED)
-		return 0;
-
-	notifier_seq = mmu_interval_read_begin(&userptr->notifier);
-	if (notifier_seq == userptr->notifier_seq)
-		return 0;
-
-	pages = kvmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL);
-	if (!pages)
-		return -ENOMEM;
-
-	if (userptr->sg) {
-		dma_unmap_sgtable(xe->drm.dev,
-				  userptr->sg,
-				  read_only ? DMA_TO_DEVICE :
-				  DMA_BIDIRECTIONAL, 0);
-		sg_free_table(userptr->sg);
-		userptr->sg = NULL;
-	}
-
-	pinned = ret = 0;
-	if (in_kthread) {
-		if (!mmget_not_zero(userptr->notifier.mm)) {
-			ret = -EFAULT;
-			goto mm_closed;
-		}
-		kthread_use_mm(userptr->notifier.mm);
-	}
-
-	while (pinned < num_pages) {
-		ret = get_user_pages_fast(xe_vma_userptr(vma) +
-					  pinned * PAGE_SIZE,
-					  num_pages - pinned,
-					  read_only ? 0 : FOLL_WRITE,
-					  &pages[pinned]);
-		if (ret < 0)
-			break;
-
-		pinned += ret;
-		ret = 0;
-	}
-
-	if (in_kthread) {
-		kthread_unuse_mm(userptr->notifier.mm);
-		mmput(userptr->notifier.mm);
-	}
-mm_closed:
-	if (ret)
-		goto out;
-
-	ret = sg_alloc_table_from_pages_segment(&userptr->sgt, pages,
-						pinned, 0,
-						(u64)pinned << PAGE_SHIFT,
-						xe_sg_segment_size(xe->drm.dev),
-						GFP_KERNEL);
-	if (ret) {
-		userptr->sg = NULL;
-		goto out;
-	}
-	userptr->sg = &userptr->sgt;
-
-	ret = dma_map_sgtable(xe->drm.dev, userptr->sg,
-			      read_only ? DMA_TO_DEVICE :
-			      DMA_BIDIRECTIONAL,
-			      DMA_ATTR_SKIP_CPU_SYNC |
-			      DMA_ATTR_NO_KERNEL_MAPPING);
-	if (ret) {
-		sg_free_table(userptr->sg);
-		userptr->sg = NULL;
-		goto out;
-	}
-
-	for (i = 0; i < pinned; ++i) {
-		if (!read_only) {
-			lock_page(pages[i]);
-			set_page_dirty(pages[i]);
-			unlock_page(pages[i]);
-		}
-
-		mark_page_accessed(pages[i]);
-	}
-
-out:
-	release_pages(pages, pinned);
-	kvfree(pages);
-
-	if (!(ret < 0)) {
-		userptr->notifier_seq = notifier_seq;
-		if (xe_vma_userptr_check_repin(uvma) == -EAGAIN)
-			goto retry;
-	}
 
-	return ret < 0 ? ret : 0;
+	return xe_hmm_userptr_populate_range(uvma, false);
 }
 
 static bool preempt_fences_waiting(struct xe_vm *vm)
@@ -480,17 +384,53 @@ static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
 	return 0;
 }
 
+/**
+ * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas
+ * @vm: The vm for which we are rebinding.
+ * @exec: The struct drm_exec with the locked GEM objects.
+ * @num_fences: The number of fences to reserve for the operation, not
+ * including rebinds and validations.
+ *
+ * Validates all evicted gem objects and rebinds their vmas. Note that
+ * rebindings may cause evictions and hence the validation-rebind
+ * sequence is rerun until there are no more objects to validate.
+ *
+ * Return: 0 on success, negative error code on error. In particular,
+ * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if
+ * the drm_exec transaction needs to be restarted.
+ */
+int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
+			  unsigned int num_fences)
+{
+	struct drm_gem_object *obj;
+	unsigned long index;
+	int ret;
+
+	do {
+		ret = drm_gpuvm_validate(&vm->gpuvm, exec);
+		if (ret)
+			return ret;
+
+		ret = xe_vm_rebind(vm, false);
+		if (ret)
+			return ret;
+	} while (!list_empty(&vm->gpuvm.evict.list));
+
+	drm_exec_for_each_locked_object(exec, index, obj) {
+		ret = dma_resv_reserve_fences(obj->resv, num_fences);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
 				 bool *done)
 {
 	int err;
 
-	/*
-	 * 1 fence for each preempt fence plus a fence for each tile from a
-	 * possible rebind
-	 */
-	err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, vm->preempt.num_exec_queues +
-				   vm->xe->info.tile_count);
+	err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0);
 	if (err)
 		return err;
 
@@ -505,7 +445,7 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
 		return 0;
 	}
 
-	err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, vm->preempt.num_exec_queues);
+	err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0);
 	if (err)
 		return err;
 
@@ -513,7 +453,13 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
 	if (err)
 		return err;
 
-	return drm_gpuvm_validate(&vm->gpuvm, exec);
+	/*
+	 * Add validation and rebinding to the locking loop since both can
+	 * cause evictions which may require blocing dma_resv locks.
+	 * The fence reservation here is intended for the new preempt fences
+	 * we attach at the end of the rebind work.
+	 */
+	return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues);
 }
 
 static void preempt_rebind_work_func(struct work_struct *w)
@@ -638,6 +584,10 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
 	if (!mmu_notifier_range_blockable(range))
 		return false;
 
+	vm_dbg(&xe_vma_vm(vma)->xe->drm,
+	       "NOTIFIER: addr=0x%016llx, range=0x%016llx",
+		xe_vma_start(vma), xe_vma_size(vma));
+
 	down_write(&vm->userptr.notifier_lock);
 	mmu_interval_set_seq(mni, cur_seq);
 
@@ -801,6 +751,7 @@ static void xe_vma_free(struct xe_vma *vma)
 
 #define VMA_CREATE_FLAG_READ_ONLY	BIT(0)
 #define VMA_CREATE_FLAG_IS_NULL		BIT(1)
+#define VMA_CREATE_FLAG_DUMPABLE	BIT(2)
 
 static struct xe_vma *xe_vma_create(struct xe_vm *vm,
 				    struct xe_bo *bo,
@@ -813,6 +764,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
 	u8 id;
 	bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY);
 	bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL);
+	bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE);
 
 	xe_assert(vm->xe, start < end);
 	xe_assert(vm->xe, end < vm->size);
@@ -847,6 +799,8 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
 	vma->gpuva.va.range = end - start + 1;
 	if (read_only)
 		vma->gpuva.flags |= XE_VMA_READ_ONLY;
+	if (dumpable)
+		vma->gpuva.flags |= XE_VMA_DUMPABLE;
 
 	for_each_tile(tile, vm->xe, id)
 		vma->tile_mask |= 0x1 << id;
@@ -903,8 +857,6 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
 static void xe_vma_destroy_late(struct xe_vma *vma)
 {
 	struct xe_vm *vm = xe_vma_vm(vma);
-	struct xe_device *xe = vm->xe;
-	bool read_only = xe_vma_read_only(vma);
 
 	if (vma->ufence) {
 		xe_sync_ufence_put(vma->ufence);
@@ -912,16 +864,11 @@ static void xe_vma_destroy_late(struct xe_vma *vma)
 	}
 
 	if (xe_vma_is_userptr(vma)) {
-		struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr;
+		struct xe_userptr_vma *uvma = to_userptr_vma(vma);
+		struct xe_userptr *userptr = &uvma->userptr;
 
-		if (userptr->sg) {
-			dma_unmap_sgtable(xe->drm.dev,
-					  userptr->sg,
-					  read_only ? DMA_TO_DEVICE :
-					  DMA_BIDIRECTIONAL, 0);
-			sg_free_table(userptr->sg);
-			userptr->sg = NULL;
-		}
+		if (userptr->sg)
+			xe_hmm_userptr_free_sg(uvma);
 
 		/*
 		 * Since userptr pages are not pinned, we can't remove
@@ -990,35 +937,26 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
 }
 
 /**
- * xe_vm_prepare_vma() - drm_exec utility to lock a vma
+ * xe_vm_lock_vma() - drm_exec utility to lock a vma
  * @exec: The drm_exec object we're currently locking for.
  * @vma: The vma for witch we want to lock the vm resv and any attached
  * object's resv.
- * @num_shared: The number of dma-fence slots to pre-allocate in the
- * objects' reservation objects.
  *
  * Return: 0 on success, negative error code on error. In particular
  * may return -EDEADLK on WW transaction contention and -EINTR if
  * an interruptible wait is terminated by a signal.
  */
-int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma,
-		      unsigned int num_shared)
+int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma)
 {
 	struct xe_vm *vm = xe_vma_vm(vma);
 	struct xe_bo *bo = xe_vma_bo(vma);
 	int err;
 
 	XE_WARN_ON(!vm);
-	if (num_shared)
-		err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared);
-	else
-		err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
-	if (!err && bo && !bo->vm) {
-		if (num_shared)
-			err = drm_exec_prepare_obj(exec, &bo->ttm.base, num_shared);
-		else
-			err = drm_exec_lock_obj(exec, &bo->ttm.base);
-	}
+
+	err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
+	if (!err && bo && !bo->vm)
+		err = drm_exec_lock_obj(exec, &bo->ttm.base);
 
 	return err;
 }
@@ -1030,7 +968,7 @@ static void xe_vma_destroy_unlocked(struct xe_vma *vma)
 
 	drm_exec_init(&exec, 0, 0);
 	drm_exec_until_all_locked(&exec) {
-		err = xe_vm_prepare_vma(&exec, vma, 0);
+		err = xe_vm_lock_vma(&exec, vma);
 		drm_exec_retry_on_contention(&exec);
 		if (XE_WARN_ON(err))
 			break;
@@ -1065,7 +1003,9 @@ static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
 	xe_assert(vm->xe, xe_vma_vm(vma) == vm);
 	lockdep_assert_held(&vm->lock);
 
+	mutex_lock(&vm->snap_mutex);
 	err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva);
+	mutex_unlock(&vm->snap_mutex);
 	XE_WARN_ON(err);	/* Shouldn't be possible */
 
 	return err;
@@ -1076,7 +1016,9 @@ static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
 	xe_assert(vm->xe, xe_vma_vm(vma) == vm);
 	lockdep_assert_held(&vm->lock);
 
+	mutex_lock(&vm->snap_mutex);
 	drm_gpuva_remove(&vma->gpuva);
+	mutex_unlock(&vm->snap_mutex);
 	if (vm->usm.last_fault_vma == vma)
 		vm->usm.last_fault_vma = NULL;
 }
@@ -1095,7 +1037,7 @@ static struct drm_gpuva_op *xe_vm_op_alloc(void)
 
 static void xe_vm_free(struct drm_gpuvm *gpuvm);
 
-static struct drm_gpuvm_ops gpuvm_ops = {
+static const struct drm_gpuvm_ops gpuvm_ops = {
 	.op_alloc = xe_vm_op_alloc,
 	.vm_bo_validate = xe_gpuvm_validate,
 	.vm_free = xe_vm_free,
@@ -1231,8 +1173,6 @@ static const struct xe_pt_ops xelp_pt_ops = {
 	.pde_encode_bo = xelp_pde_encode_bo,
 };
 
-static void vm_destroy_work_func(struct work_struct *w);
-
 /**
  * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the
  * given tile and vm.
@@ -1303,6 +1243,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 	vm->flags = flags;
 
 	init_rwsem(&vm->lock);
+	mutex_init(&vm->snap_mutex);
 
 	INIT_LIST_HEAD(&vm->rebind_list);
 
@@ -1311,8 +1252,6 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 	init_rwsem(&vm->userptr.notifier_lock);
 	spin_lock_init(&vm->userptr.invalidated_lock);
 
-	INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
-
 	INIT_LIST_HEAD(&vm->preempt.exec_queues);
 	vm->preempt.min_run_period_ms = 10;	/* FIXME: Wire up to uAPI */
 
@@ -1322,7 +1261,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 	vm->pt_ops = &xelp_pt_ops;
 
 	if (!(flags & XE_VM_FLAG_MIGRATION))
-		xe_device_mem_access_get(xe);
+		xe_pm_runtime_get_noresume(xe);
 
 	vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
 	if (!vm_resv_obj) {
@@ -1367,9 +1306,8 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
 		vm->batch_invalidate_tlb = true;
 	}
 
-	if (flags & XE_VM_FLAG_LR_MODE) {
+	if (vm->flags & XE_VM_FLAG_LR_MODE) {
 		INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
-		vm->flags |= XE_VM_FLAG_LR_MODE;
 		vm->batch_invalidate_tlb = false;
 	}
 
@@ -1428,11 +1366,12 @@ err_close:
 	return ERR_PTR(err);
 
 err_no_resv:
+	mutex_destroy(&vm->snap_mutex);
 	for_each_tile(tile, xe, id)
 		xe_range_fence_tree_fini(&vm->rftree[id]);
 	kfree(vm);
 	if (!(flags & XE_VM_FLAG_MIGRATION))
-		xe_device_mem_access_put(xe);
+		xe_pm_runtime_put(xe);
 	return ERR_PTR(err);
 }
 
@@ -1532,6 +1471,16 @@ void xe_vm_close_and_put(struct xe_vm *vm)
 		xe->usm.num_vm_in_fault_mode--;
 	else if (!(vm->flags & XE_VM_FLAG_MIGRATION))
 		xe->usm.num_vm_in_non_fault_mode--;
+
+	if (vm->usm.asid) {
+		void *lookup;
+
+		xe_assert(xe, xe->info.has_asid);
+		xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION));
+
+		lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
+		xe_assert(xe, lookup == vm);
+	}
 	mutex_unlock(&xe->usm.lock);
 
 	for_each_tile(tile, xe, id)
@@ -1540,28 +1489,23 @@ void xe_vm_close_and_put(struct xe_vm *vm)
 	xe_vm_put(vm);
 }
 
-static void vm_destroy_work_func(struct work_struct *w)
+static void xe_vm_free(struct drm_gpuvm *gpuvm)
 {
-	struct xe_vm *vm =
-		container_of(w, struct xe_vm, destroy_work);
+	struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
 	struct xe_device *xe = vm->xe;
 	struct xe_tile *tile;
 	u8 id;
-	void *lookup;
 
 	/* xe_vm_close_and_put was not called? */
 	xe_assert(xe, !vm->size);
 
-	if (!(vm->flags & XE_VM_FLAG_MIGRATION)) {
-		xe_device_mem_access_put(xe);
+	if (xe_vm_in_preempt_fence_mode(vm))
+		flush_work(&vm->preempt.rebind_work);
 
-		if (xe->info.has_asid && vm->usm.asid) {
-			mutex_lock(&xe->usm.lock);
-			lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
-			xe_assert(xe, lookup == vm);
-			mutex_unlock(&xe->usm.lock);
-		}
-	}
+	mutex_destroy(&vm->snap_mutex);
+
+	if (!(vm->flags & XE_VM_FLAG_MIGRATION))
+		xe_pm_runtime_put(xe);
 
 	for_each_tile(tile, xe, id)
 		XE_WARN_ON(vm->pt_root[id]);
@@ -1570,14 +1514,6 @@ static void vm_destroy_work_func(struct work_struct *w)
 	kfree(vm);
 }
 
-static void xe_vm_free(struct drm_gpuvm *gpuvm)
-{
-	struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm);
-
-	/* To destroy the VM we need to be able to sleep */
-	queue_work(system_unbound_wq, &vm->destroy_work);
-}
-
 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
 {
 	struct xe_vm *vm;
@@ -1674,7 +1610,7 @@ next:
 		xe_exec_queue_last_fence_get(wait_exec_queue, vm) : fence;
 	if (last_op) {
 		for (i = 0; i < num_syncs; i++)
-			xe_sync_entry_signal(&syncs[i], NULL, fence);
+			xe_sync_entry_signal(&syncs[i], fence);
 	}
 
 	return fence;
@@ -1748,7 +1684,7 @@ next:
 
 	if (last_op) {
 		for (i = 0; i < num_syncs; i++)
-			xe_sync_entry_signal(&syncs[i], NULL,
+			xe_sync_entry_signal(&syncs[i],
 					     cf ? &cf->base : fence);
 	}
 
@@ -1809,7 +1745,7 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
 		fence = xe_exec_queue_last_fence_get(wait_exec_queue, vm);
 		if (last_op) {
 			for (i = 0; i < num_syncs; i++)
-				xe_sync_entry_signal(&syncs[i], NULL, fence);
+				xe_sync_entry_signal(&syncs[i], fence);
 		}
 	}
 
@@ -2010,7 +1946,7 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
 	struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
 	int err;
 
-	xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type));
+	xe_assert(vm->xe, region < ARRAY_SIZE(region_to_mem_type));
 
 	if (!xe_vma_has_no_bo(vma)) {
 		err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region]);
@@ -2030,7 +1966,7 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
 				struct dma_fence *fence =
 					xe_exec_queue_last_fence_get(wait_exec_queue, vm);
 
-				xe_sync_entry_signal(&syncs[i], NULL, fence);
+				xe_sync_entry_signal(&syncs[i], fence);
 				dma_fence_put(fence);
 			}
 		}
@@ -2162,7 +2098,12 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
 		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
 
 		if (__op->op == DRM_GPUVA_OP_MAP) {
+			op->map.immediate =
+				flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE;
+			op->map.read_only =
+				flags & DRM_XE_VM_BIND_FLAG_READONLY;
 			op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
+			op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE;
 			op->map.pat_index = pat_index;
 		} else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
 			op->prefetch.region = prefetch_region;
@@ -2328,6 +2269,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 				   struct xe_sync_entry *syncs, u32 num_syncs,
 				   struct list_head *ops_list, bool last)
 {
+	struct xe_device *xe = vm->xe;
 	struct xe_vma_op *last_op = NULL;
 	struct drm_gpuva_op *__op;
 	int err = 0;
@@ -2354,8 +2296,12 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 		switch (op->base.op) {
 		case DRM_GPUVA_OP_MAP:
 		{
+			flags |= op->map.read_only ?
+				VMA_CREATE_FLAG_READ_ONLY : 0;
 			flags |= op->map.is_null ?
 				VMA_CREATE_FLAG_IS_NULL : 0;
+			flags |= op->map.dumpable ?
+				VMA_CREATE_FLAG_DUMPABLE : 0;
 
 			vma = new_vma(vm, &op->base.map, op->map.pat_index,
 				      flags);
@@ -2380,6 +2326,9 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 				flags |= op->base.remap.unmap->va->flags &
 					DRM_GPUVA_SPARSE ?
 					VMA_CREATE_FLAG_IS_NULL : 0;
+				flags |= op->base.remap.unmap->va->flags &
+					XE_VMA_DUMPABLE ?
+					VMA_CREATE_FLAG_DUMPABLE : 0;
 
 				vma = new_vma(vm, op->base.remap.prev,
 					      old->pat_index, flags);
@@ -2401,6 +2350,9 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 						xe_vma_end(vma) -
 						xe_vma_start(old);
 					op->remap.start = xe_vma_end(vma);
+					vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx",
+					       (ULL)op->remap.start,
+					       (ULL)op->remap.range);
 				}
 			}
 
@@ -2411,6 +2363,9 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 				flags |= op->base.remap.unmap->va->flags &
 					DRM_GPUVA_SPARSE ?
 					VMA_CREATE_FLAG_IS_NULL : 0;
+				flags |= op->base.remap.unmap->va->flags &
+					XE_VMA_DUMPABLE ?
+					VMA_CREATE_FLAG_DUMPABLE : 0;
 
 				vma = new_vma(vm, op->base.remap.next,
 					      old->pat_index, flags);
@@ -2431,6 +2386,9 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
 					op->remap.range -=
 						xe_vma_end(old) -
 						xe_vma_start(vma);
+					vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx",
+					       (ULL)op->remap.start,
+					       (ULL)op->remap.range);
 				}
 			}
 			break;
@@ -2473,7 +2431,7 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
 
 	lockdep_assert_held_write(&vm->lock);
 
-	err = xe_vm_prepare_vma(exec, vma, 1);
+	err = xe_vm_lock_vma(exec, vma);
 	if (err)
 		return err;
 
@@ -2484,7 +2442,7 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
 	case DRM_GPUVA_OP_MAP:
 		err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
 				 op->syncs, op->num_syncs,
-				 !xe_vm_in_fault_mode(vm),
+				 op->map.immediate || !xe_vm_in_fault_mode(vm),
 				 op->flags & XE_VMA_OP_FIRST,
 				 op->flags & XE_VMA_OP_LAST);
 		break;
@@ -2759,7 +2717,10 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
 	return 0;
 }
 
-#define SUPPORTED_FLAGS	(DRM_XE_VM_BIND_FLAG_NULL | \
+#define SUPPORTED_FLAGS	\
+	(DRM_XE_VM_BIND_FLAG_READONLY | \
+	 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \
+	 DRM_XE_VM_BIND_FLAG_NULL | \
 	 DRM_XE_VM_BIND_FLAG_DUMPABLE)
 #define XE_64K_PAGE_MASK 0xffffull
 #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
@@ -2892,7 +2853,7 @@ static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
 		return PTR_ERR(fence);
 
 	for (i = 0; i < num_syncs; i++)
-		xe_sync_entry_signal(&syncs[i], NULL, fence);
+		xe_sync_entry_signal(&syncs[i], fence);
 
 	xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm,
 				     fence);
@@ -3003,7 +2964,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 			goto put_obj;
 		}
 
-		if (bos[i]->flags & XE_BO_INTERNAL_64K) {
+		if (bos[i]->flags & XE_BO_FLAG_INTERNAL_64K) {
 			if (XE_IOCTL_DBG(xe, obj_offset &
 					 XE_64K_PAGE_MASK) ||
 			    XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) ||
@@ -3195,6 +3156,10 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
 	xe_assert(xe, !xe_vma_is_null(vma));
 	trace_xe_vma_invalidate(vma);
 
+	vm_dbg(&xe_vma_vm(vma)->xe->drm,
+	       "INVALIDATE: addr=0x%016llx, range=0x%016llx",
+		xe_vma_start(vma), xe_vma_size(vma));
+
 	/* Check that we don't race with page-table updates */
 	if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
 		if (xe_vma_is_userptr(vma)) {
@@ -3284,3 +3249,175 @@ int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
 
 	return 0;
 }
+
+struct xe_vm_snapshot {
+	unsigned long num_snaps;
+	struct {
+		u64 ofs, bo_ofs;
+		unsigned long len;
+		struct xe_bo *bo;
+		void *data;
+		struct mm_struct *mm;
+	} snap[];
+};
+
+struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm)
+{
+	unsigned long num_snaps = 0, i;
+	struct xe_vm_snapshot *snap = NULL;
+	struct drm_gpuva *gpuva;
+
+	if (!vm)
+		return NULL;
+
+	mutex_lock(&vm->snap_mutex);
+	drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
+		if (gpuva->flags & XE_VMA_DUMPABLE)
+			num_snaps++;
+	}
+
+	if (num_snaps)
+		snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT);
+	if (!snap) {
+		snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV);
+		goto out_unlock;
+	}
+
+	snap->num_snaps = num_snaps;
+	i = 0;
+	drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
+		struct xe_vma *vma = gpuva_to_vma(gpuva);
+		struct xe_bo *bo = vma->gpuva.gem.obj ?
+			gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
+
+		if (!(gpuva->flags & XE_VMA_DUMPABLE))
+			continue;
+
+		snap->snap[i].ofs = xe_vma_start(vma);
+		snap->snap[i].len = xe_vma_size(vma);
+		if (bo) {
+			snap->snap[i].bo = xe_bo_get(bo);
+			snap->snap[i].bo_ofs = xe_vma_bo_offset(vma);
+		} else if (xe_vma_is_userptr(vma)) {
+			struct mm_struct *mm =
+				to_userptr_vma(vma)->userptr.notifier.mm;
+
+			if (mmget_not_zero(mm))
+				snap->snap[i].mm = mm;
+			else
+				snap->snap[i].data = ERR_PTR(-EFAULT);
+
+			snap->snap[i].bo_ofs = xe_vma_userptr(vma);
+		} else {
+			snap->snap[i].data = ERR_PTR(-ENOENT);
+		}
+		i++;
+	}
+
+out_unlock:
+	mutex_unlock(&vm->snap_mutex);
+	return snap;
+}
+
+void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
+{
+	if (IS_ERR_OR_NULL(snap))
+		return;
+
+	for (int i = 0; i < snap->num_snaps; i++) {
+		struct xe_bo *bo = snap->snap[i].bo;
+		struct iosys_map src;
+		int err;
+
+		if (IS_ERR(snap->snap[i].data))
+			continue;
+
+		snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER);
+		if (!snap->snap[i].data) {
+			snap->snap[i].data = ERR_PTR(-ENOMEM);
+			goto cleanup_bo;
+		}
+
+		if (bo) {
+			dma_resv_lock(bo->ttm.base.resv, NULL);
+			err = ttm_bo_vmap(&bo->ttm, &src);
+			if (!err) {
+				xe_map_memcpy_from(xe_bo_device(bo),
+						   snap->snap[i].data,
+						   &src, snap->snap[i].bo_ofs,
+						   snap->snap[i].len);
+				ttm_bo_vunmap(&bo->ttm, &src);
+			}
+			dma_resv_unlock(bo->ttm.base.resv);
+		} else {
+			void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs;
+
+			kthread_use_mm(snap->snap[i].mm);
+			if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len))
+				err = 0;
+			else
+				err = -EFAULT;
+			kthread_unuse_mm(snap->snap[i].mm);
+
+			mmput(snap->snap[i].mm);
+			snap->snap[i].mm = NULL;
+		}
+
+		if (err) {
+			kvfree(snap->snap[i].data);
+			snap->snap[i].data = ERR_PTR(err);
+		}
+
+cleanup_bo:
+		xe_bo_put(bo);
+		snap->snap[i].bo = NULL;
+	}
+}
+
+void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p)
+{
+	unsigned long i, j;
+
+	if (IS_ERR_OR_NULL(snap)) {
+		drm_printf(p, "[0].error: %li\n", PTR_ERR(snap));
+		return;
+	}
+
+	for (i = 0; i < snap->num_snaps; i++) {
+		drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len);
+
+		if (IS_ERR(snap->snap[i].data)) {
+			drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs,
+				   PTR_ERR(snap->snap[i].data));
+			continue;
+		}
+
+		drm_printf(p, "[%llx].data: ", snap->snap[i].ofs);
+
+		for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) {
+			u32 *val = snap->snap[i].data + j;
+			char dumped[ASCII85_BUFSZ];
+
+			drm_puts(p, ascii85_encode(*val, dumped));
+		}
+
+		drm_puts(p, "\n");
+	}
+}
+
+void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
+{
+	unsigned long i;
+
+	if (IS_ERR_OR_NULL(snap))
+		return;
+
+	for (i = 0; i < snap->num_snaps; i++) {
+		if (!IS_ERR(snap->snap[i].data))
+			kvfree(snap->snap[i].data);
+		xe_bo_put(snap->snap[i].bo);
+		if (snap->snap[i].mm)
+			mmput(snap->snap[i].mm);
+	}
+	kvfree(snap);
+}
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index 40188a28c5..306cd0934a 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -211,8 +211,6 @@ int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker);
 
 int xe_vm_invalidate_vma(struct xe_vma *vma);
 
-extern struct ttm_device_funcs xe_ttm_funcs;
-
 static inline void xe_vm_queue_rebind_worker(struct xe_vm *vm)
 {
 	xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
@@ -244,8 +242,10 @@ bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end);
 
 int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id);
 
-int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma,
-		      unsigned int num_shared);
+int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma);
+
+int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
+			  unsigned int num_fences);
 
 /**
  * xe_vm_resv() - Return's the vm's reservation object
@@ -273,3 +273,8 @@ static inline void vm_dbg(const struct drm_device *dev,
 { /* noop */ }
 #endif
 #endif
+
+struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm);
+void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap);
+void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p);
+void xe_vm_snapshot_free(struct xe_vm_snapshot *snap);
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 4027da3a10..72a100671e 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -32,6 +32,7 @@ struct xe_vm;
 #define XE_VMA_PTE_1G		(DRM_GPUVA_USERBITS << 7)
 #define XE_VMA_PTE_64K		(DRM_GPUVA_USERBITS << 8)
 #define XE_VMA_PTE_COMPACT	(DRM_GPUVA_USERBITS << 9)
+#define XE_VMA_DUMPABLE		(DRM_GPUVA_USERBITS << 10)
 
 /** struct xe_userptr - User pointer */
 struct xe_userptr {
@@ -163,6 +164,11 @@ struct xe_vm {
 	 * VM
 	 */
 	struct rw_semaphore lock;
+	/**
+	 * @snap_mutex: Mutex used to guard insertions and removals from gpuva,
+	 * so we can take a snapshot safely from devcoredump.
+	 */
+	struct mutex snap_mutex;
 
 	/**
 	 * @rebind_list: list of VMAs that need rebinding. Protected by the
@@ -172,42 +178,11 @@ struct xe_vm {
 	struct list_head rebind_list;
 
 	/**
-	 * @destroy_work: worker to destroy VM, needed as a dma_fence signaling
-	 * from an irq context can be last put and the destroy needs to be able
-	 * to sleep.
-	 */
-	struct work_struct destroy_work;
-
-	/**
 	 * @rftree: range fence tree to track updates to page table structure.
 	 * Used to implement conflict tracking between independent bind engines.
 	 */
 	struct xe_range_fence_tree rftree[XE_MAX_TILES_PER_DEVICE];
 
-	/** @async_ops: async VM operations (bind / unbinds) */
-	struct {
-		/** @list: list of pending async VM ops */
-		struct list_head pending;
-		/** @work: worker to execute async VM ops */
-		struct work_struct work;
-		/** @lock: protects list of pending async VM ops and fences */
-		spinlock_t lock;
-		/** @fence: fence state */
-		struct {
-			/** @context: context of async fence */
-			u64 context;
-			/** @seqno: seqno of async fence */
-			u32 seqno;
-		} fence;
-		/** @error: error state for async VM ops */
-		int error;
-		/**
-		 * @munmap_rebind_inflight: an munmap style VM bind is in the
-		 * middle of a set of ops which requires a rebind at the end.
-		 */
-		bool munmap_rebind_inflight;
-	} async_ops;
-
 	const struct xe_pt_ops *pt_ops;
 
 	/** @userptr: user pointer state */
@@ -294,8 +269,14 @@ struct xe_vm {
 struct xe_vma_op_map {
 	/** @vma: VMA to map */
 	struct xe_vma *vma;
+	/** @immediate: Immediate bind */
+	bool immediate;
+	/** @read_only: Read only */
+	bool read_only;
 	/** @is_null: is NULL binding */
 	bool is_null;
+	/** @dumpable: whether BO is dumped on GPU hang */
+	bool dumpable;
 	/** @pat_index: The pat index to use for this operation. */
 	u16 pat_index;
 };
diff --git a/drivers/gpu/drm/xe/xe_vram_freq.c b/drivers/gpu/drm/xe/xe_vram_freq.c
new file mode 100644
index 0000000000..3e21ddc6e6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vram_freq.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+#include <linux/sysfs.h>
+#include <drm/drm_managed.h>
+
+#include "xe_gt_types.h"
+#include "xe_pcode.h"
+#include "xe_pcode_api.h"
+#include "xe_tile.h"
+#include "xe_tile_sysfs.h"
+#include "xe_vram_freq.h"
+
+/**
+ * DOC: Xe VRAM freq
+ *
+ * Provides sysfs entries for vram frequency in tile
+ *
+ * device/tile#/memory/freq0/max_freq - This is maximum frequency. This value is read-only as it
+ *					is the fixed fuse point P0. It is not the system
+ *					configuration.
+ * device/tile#/memory/freq0/min_freq - This is minimum frequency. This value is read-only as it
+ *					is the fixed fuse point PN. It is not the system
+ *					configuration.
+ */
+
+static struct xe_tile *dev_to_tile(struct device *dev)
+{
+	return kobj_to_tile(dev->kobj.parent);
+}
+
+static ssize_t max_freq_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	struct xe_tile *tile = dev_to_tile(dev);
+	struct xe_gt *gt = tile->primary_gt;
+	u32 val, mbox;
+	int err;
+
+	mbox = REG_FIELD_PREP(PCODE_MB_COMMAND, PCODE_FREQUENCY_CONFIG)
+		| REG_FIELD_PREP(PCODE_MB_PARAM1, PCODE_MBOX_FC_SC_READ_FUSED_P0)
+		| REG_FIELD_PREP(PCODE_MB_PARAM2, PCODE_MBOX_DOMAIN_HBM);
+
+	err = xe_pcode_read(gt, mbox, &val, NULL);
+	if (err)
+		return err;
+
+	/* data_out - Fused P0 for domain ID in units of 50 MHz */
+	val *= 50;
+
+	return sysfs_emit(buf, "%u\n", val);
+}
+static DEVICE_ATTR_RO(max_freq);
+
+static ssize_t min_freq_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	struct xe_tile *tile = dev_to_tile(dev);
+	struct xe_gt *gt = tile->primary_gt;
+	u32 val, mbox;
+	int err;
+
+	mbox = REG_FIELD_PREP(PCODE_MB_COMMAND, PCODE_FREQUENCY_CONFIG)
+		| REG_FIELD_PREP(PCODE_MB_PARAM1, PCODE_MBOX_FC_SC_READ_FUSED_PN)
+		| REG_FIELD_PREP(PCODE_MB_PARAM2, PCODE_MBOX_DOMAIN_HBM);
+
+	err = xe_pcode_read(gt, mbox, &val, NULL);
+	if (err)
+		return err;
+
+	/* data_out - Fused Pn for domain ID in units of 50 MHz */
+	val *= 50;
+
+	return sysfs_emit(buf, "%u\n", val);
+}
+static DEVICE_ATTR_RO(min_freq);
+
+static struct attribute *freq_attrs[] = {
+	&dev_attr_max_freq.attr,
+	&dev_attr_min_freq.attr,
+	NULL
+};
+
+static const struct attribute_group freq_group_attrs = {
+	.name = "freq0",
+	.attrs = freq_attrs,
+};
+
+static void vram_freq_sysfs_fini(struct drm_device *drm, void *arg)
+{
+	struct kobject *kobj = arg;
+
+	sysfs_remove_group(kobj, &freq_group_attrs);
+	kobject_put(kobj);
+}
+
+/**
+ * xe_vram_freq_sysfs_init - Initialize vram frequency sysfs component
+ * @tile: Xe Tile object
+ *
+ * It needs to be initialized after the main tile component is ready
+ *
+ * Returns: 0 on success, negative error code on error.
+ */
+int xe_vram_freq_sysfs_init(struct xe_tile *tile)
+{
+	struct xe_device *xe = tile_to_xe(tile);
+	struct kobject *kobj;
+	int err;
+
+	if (xe->info.platform != XE_PVC)
+		return 0;
+
+	kobj = kobject_create_and_add("memory", tile->sysfs);
+	if (!kobj)
+		return -ENOMEM;
+
+	err = sysfs_create_group(kobj, &freq_group_attrs);
+	if (err) {
+		kobject_put(kobj);
+		return err;
+	}
+
+	return drmm_add_action_or_reset(&xe->drm, vram_freq_sysfs_fini, kobj);
+}
diff --git a/drivers/gpu/drm/xe/xe_vram_freq.h b/drivers/gpu/drm/xe/xe_vram_freq.h
new file mode 100644
index 0000000000..bf726bc588
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vram_freq.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_VRAM_FREQ_H_
+#define _XE_VRAM_FREQ_H_
+
+struct xe_tile;
+
+int xe_vram_freq_sysfs_init(struct xe_tile *tile);
+
+#endif /* _XE_VRAM_FREQ_H_ */
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 5f61dd87c5..dd214d95e4 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -9,7 +9,8 @@
 #include <kunit/visibility.h>
 #include <linux/compiler_types.h>
 
-#include "generated/xe_wa_oob.h"
+#include <generated/xe_wa_oob.h>
+
 #include "regs/xe_engine_regs.h"
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_regs.h"
@@ -125,13 +126,6 @@ static const struct xe_rtp_entry_sr gt_was[] = {
 
 	/* DG2 */
 
-	{ XE_RTP_NAME("16010515920"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10),
-		       GRAPHICS_STEP(A0, B0),
-		       ENGINE_CLASS(VIDEO_DECODE)),
-	  XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F18(0), ALNUNIT_CLKGATE_DIS)),
-	  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
-	},
 	{ XE_RTP_NAME("22010523718"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10)),
 	  XE_RTP_ACTIONS(SET(UNSLICE_UNIT_LEVEL_CLKGATE, CG3DDISCFEG_CLKGATE_DIS))
@@ -140,61 +134,6 @@ static const struct xe_rtp_entry_sr gt_was[] = {
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G10)),
 	  XE_RTP_ACTIONS(SET(SUBSLICE_UNIT_LEVEL_CLKGATE, DSS_ROUTER_CLKGATE_DIS))
 	},
-	{ XE_RTP_NAME("14012362059"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
-	  XE_RTP_ACTIONS(SET(XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB))
-	},
-	{ XE_RTP_NAME("14012362059"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0)),
-	  XE_RTP_ACTIONS(SET(XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB))
-	},
-	{ XE_RTP_NAME("14010948348"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
-	  XE_RTP_ACTIONS(SET(UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS))
-	},
-	{ XE_RTP_NAME("14011037102"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
-	  XE_RTP_ACTIONS(SET(UNSLCGCTL9444, LTCDD_CLKGATE_DIS))
-	},
-	{ XE_RTP_NAME("14011371254"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
-	  XE_RTP_ACTIONS(SET(XEHP_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS))
-	},
-	{ XE_RTP_NAME("14011431319"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
-	  XE_RTP_ACTIONS(SET(UNSLCGCTL9440,
-			     GAMTLBOACS_CLKGATE_DIS |
-			     GAMTLBVDBOX7_CLKGATE_DIS | GAMTLBVDBOX6_CLKGATE_DIS |
-			     GAMTLBVDBOX5_CLKGATE_DIS | GAMTLBVDBOX4_CLKGATE_DIS |
-			     GAMTLBVDBOX3_CLKGATE_DIS | GAMTLBVDBOX2_CLKGATE_DIS |
-			     GAMTLBVDBOX1_CLKGATE_DIS | GAMTLBVDBOX0_CLKGATE_DIS |
-			     GAMTLBKCR_CLKGATE_DIS | GAMTLBGUC_CLKGATE_DIS |
-			     GAMTLBBLT_CLKGATE_DIS),
-			 SET(UNSLCGCTL9444,
-			     GAMTLBGFXA0_CLKGATE_DIS | GAMTLBGFXA1_CLKGATE_DIS |
-			     GAMTLBCOMPA0_CLKGATE_DIS | GAMTLBCOMPA1_CLKGATE_DIS |
-			     GAMTLBCOMPB0_CLKGATE_DIS | GAMTLBCOMPB1_CLKGATE_DIS |
-			     GAMTLBCOMPC0_CLKGATE_DIS | GAMTLBCOMPC1_CLKGATE_DIS |
-			     GAMTLBCOMPD0_CLKGATE_DIS | GAMTLBCOMPD1_CLKGATE_DIS |
-			     GAMTLBMERT_CLKGATE_DIS |
-			     GAMTLBVEBOX3_CLKGATE_DIS | GAMTLBVEBOX2_CLKGATE_DIS |
-			     GAMTLBVEBOX1_CLKGATE_DIS | GAMTLBVEBOX0_CLKGATE_DIS))
-	},
-	{ XE_RTP_NAME("14010569222"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
-	  XE_RTP_ACTIONS(SET(UNSLICE_UNIT_LEVEL_CLKGATE, GAMEDIA_CLKGATE_DIS))
-	},
-	{ XE_RTP_NAME("14011028019"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
-	  XE_RTP_ACTIONS(SET(SSMCGCTL9530, RTFUNIT_CLKGATE_DIS))
-	},
-	{ XE_RTP_NAME("14010680813"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
-	  XE_RTP_ACTIONS(SET(XEHP_GAMSTLB_CTRL,
-			     CONTROL_BLOCK_CLKGATE_DIS |
-			     EGRESS_BLOCK_CLKGATE_DIS |
-			     TAG_BLOCK_CLKGATE_DIS))
-	},
 	{ XE_RTP_NAME("14014830051"),
 	  XE_RTP_RULES(PLATFORM(DG2)),
 	  XE_RTP_ACTIONS(CLR(SARB_CHICKEN1, COMP_CKN_IN))
@@ -212,10 +151,6 @@ static const struct xe_rtp_entry_sr gt_was[] = {
 			     INVALIDATION_BROADCAST_MODE_DIS |
 			     GLOBAL_INVALIDATION_MODE))
 	},
-	{ XE_RTP_NAME("14010648519"),
-	  XE_RTP_RULES(PLATFORM(DG2)),
-	  XE_RTP_ACTIONS(SET(XEHP_L3NODEARBCFG, XEHP_LNESPARE))
-	},
 
 	/* PVC */
 
@@ -238,11 +173,11 @@ static const struct xe_rtp_entry_sr gt_was[] = {
 	  XE_RTP_ACTIONS(CLR(MISCCPCTL, DOP_CLOCK_GATE_RENDER_ENABLE))
 	},
 	{ XE_RTP_NAME("14018575942"),
-	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271)),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274)),
 	  XE_RTP_ACTIONS(SET(COMP_MOD_CTRL, FORCE_MISS_FTLB))
 	},
 	{ XE_RTP_NAME("22016670082"),
-	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271)),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274)),
 	  XE_RTP_ACTIONS(SET(SQCNT1, ENFORCE_RAR))
 	},
 
@@ -293,6 +228,28 @@ static const struct xe_rtp_entry_sr gt_was[] = {
 	  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
 	},
 
+	/* Xe2_HPM */
+
+	{ XE_RTP_NAME("16021867713"),
+	  XE_RTP_RULES(MEDIA_VERSION(1301),
+		       ENGINE_CLASS(VIDEO_DECODE)),
+	  XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)),
+	  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
+	},
+	{ XE_RTP_NAME("14020316580"),
+	  XE_RTP_RULES(MEDIA_VERSION(1301)),
+	  XE_RTP_ACTIONS(CLR(PG_ENABLE,
+			     VD0_HCP_POWERGATE_ENABLE |
+			     VD0_MFXVDENC_POWERGATE_ENABLE |
+			     VD2_HCP_POWERGATE_ENABLE |
+			     VD2_MFXVDENC_POWERGATE_ENABLE)),
+	},
+	{ XE_RTP_NAME("14019449301"),
+	  XE_RTP_RULES(MEDIA_VERSION(1301), ENGINE_CLASS(VIDEO_DECODE)),
+	  XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F08(0), CG3DDISHRS_CLKGATE_DIS)),
+	  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
+	},
+
 	{}
 };
 
@@ -377,13 +334,6 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 			     POLYGON_TRIFAN_LINELOOP_DISABLE))
 	},
 	{ XE_RTP_NAME("22012826095, 22013059131"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(B0, C0),
-		       FUNC(xe_rtp_match_first_render_or_compute)),
-	  XE_RTP_ACTIONS(FIELD_SET(LSC_CHICKEN_BIT_0_UDW,
-				   MAXREQS_PER_BANK,
-				   REG_FIELD_PREP(MAXREQS_PER_BANK, 2)))
-	},
-	{ XE_RTP_NAME("22012826095, 22013059131"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G11),
 		       FUNC(xe_rtp_match_first_render_or_compute)),
 	  XE_RTP_ACTIONS(FIELD_SET(LSC_CHICKEN_BIT_0_UDW,
@@ -391,59 +341,26 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 				   REG_FIELD_PREP(MAXREQS_PER_BANK, 2)))
 	},
 	{ XE_RTP_NAME("22013059131"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(B0, C0),
-		       FUNC(xe_rtp_match_first_render_or_compute)),
-	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, FORCE_1_SUB_MESSAGE_PER_FRAGMENT))
-	},
-	{ XE_RTP_NAME("22013059131"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G11),
 		       FUNC(xe_rtp_match_first_render_or_compute)),
 	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, FORCE_1_SUB_MESSAGE_PER_FRAGMENT))
 	},
-	{ XE_RTP_NAME("14010918519"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0),
-		       FUNC(xe_rtp_match_first_render_or_compute)),
-	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW,
-			     FORCE_SLM_FENCE_SCOPE_TO_TILE |
-			     FORCE_UGM_FENCE_SCOPE_TO_TILE,
-			     /*
-			      * Ignore read back as it always returns 0 in these
-			      * steps
-			      */
-			     .read_mask = 0))
-	},
 	{ XE_RTP_NAME("14015227452"),
 	  XE_RTP_RULES(PLATFORM(DG2),
 		       FUNC(xe_rtp_match_first_render_or_compute)),
 	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE))
 	},
-	{ XE_RTP_NAME("16015675438"),
-	  XE_RTP_RULES(PLATFORM(DG2),
-		       FUNC(xe_rtp_match_first_render_or_compute)),
-	  XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN2(RENDER_RING_BASE),
-			     PERF_FIX_BALANCING_CFE_DISABLE))
-	},
 	{ XE_RTP_NAME("18028616096"),
 	  XE_RTP_RULES(PLATFORM(DG2),
 		       FUNC(xe_rtp_match_first_render_or_compute)),
 	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, UGM_FRAGMENT_THRESHOLD_TO_3))
 	},
-	{ XE_RTP_NAME("16011620976, 22015475538"),
+	{ XE_RTP_NAME("22015475538"),
 	  XE_RTP_RULES(PLATFORM(DG2),
 		       FUNC(xe_rtp_match_first_render_or_compute)),
 	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8))
 	},
 	{ XE_RTP_NAME("22012654132"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0),
-		       FUNC(xe_rtp_match_first_render_or_compute)),
-	  XE_RTP_ACTIONS(SET(CACHE_MODE_SS, ENABLE_PREFETCH_INTO_IC,
-			     /*
-			      * Register can't be read back for verification on
-			      * DG2 due to Wa_14012342262
-			      */
-			     .read_mask = 0))
-	},
-	{ XE_RTP_NAME("22012654132"),
 	  XE_RTP_RULES(SUBPLATFORM(DG2, G11),
 		       FUNC(xe_rtp_match_first_render_or_compute)),
 	  XE_RTP_ACTIONS(SET(CACHE_MODE_SS, ENABLE_PREFETCH_INTO_IC,
@@ -461,68 +378,11 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(ROW_CHICKEN2, DISABLE_READ_SUPPRESSION))
 	},
-	{ XE_RTP_NAME("14013392000"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0),
-		       ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(ROW_CHICKEN2, ENABLE_LARGE_GRF_MODE))
-	},
-	{ XE_RTP_NAME("14012419201"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0),
-		       ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4,
-			     DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX))
-	},
-	{ XE_RTP_NAME("14012419201"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0),
-		       ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4,
-			     DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX))
-	},
-	{ XE_RTP_NAME("1308578152"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(B0, C0),
-		       ENGINE_CLASS(RENDER),
-		       FUNC(xe_rtp_match_first_gslice_fused_off)),
-	  XE_RTP_ACTIONS(CLR(CS_DEBUG_MODE1(RENDER_RING_BASE),
-			     REPLAY_MODE_GRANULARITY))
-	},
 	{ XE_RTP_NAME("22010960976, 14013347512"),
 	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(CLR(XEHP_HDC_CHICKEN0,
 			     LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK))
 	},
-	{ XE_RTP_NAME("1608949956, 14010198302"),
-	  XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(ROW_CHICKEN,
-			     MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE))
-	},
-	{ XE_RTP_NAME("22010430635"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0),
-		       ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4,
-			     DISABLE_GRF_CLEAR))
-	},
-	{ XE_RTP_NAME("14013202645"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(B0, C0),
-		       ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(RT_CTRL, DIS_NULL_QUERY))
-	},
-	{ XE_RTP_NAME("14013202645"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0),
-		       ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(RT_CTRL, DIS_NULL_QUERY))
-	},
-	{ XE_RTP_NAME("22012532006"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0),
-		       ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7,
-			     DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA))
-	},
-	{ XE_RTP_NAME("22012532006"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0),
-		       ENGINE_CLASS(RENDER)),
-	  XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7,
-			     DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA))
-	},
 	{ XE_RTP_NAME("14015150844"),
 	  XE_RTP_RULES(PLATFORM(DG2), FUNC(xe_rtp_match_first_render_or_compute)),
 	  XE_RTP_ACTIONS(SET(XEHP_HDC_CHICKEN0, DIS_ATOMIC_CHAINING_TYPED_WRITES,
@@ -539,10 +399,10 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 	  XE_RTP_RULES(PLATFORM(PVC), FUNC(xe_rtp_match_first_render_or_compute)),
 	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE))
 	},
-	{ XE_RTP_NAME("16015675438"),
-	  XE_RTP_RULES(PLATFORM(PVC), FUNC(xe_rtp_match_first_render_or_compute)),
-	  XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN2(RENDER_RING_BASE),
-			     PERF_FIX_BALANCING_CFE_DISABLE))
+	{ XE_RTP_NAME("18020744125"),
+	  XE_RTP_RULES(PLATFORM(PVC), FUNC(xe_rtp_match_first_render_or_compute),
+		       ENGINE_CLASS(COMPUTE)),
+	  XE_RTP_ACTIONS(SET(RING_HWSTAM(RENDER_RING_BASE), ~0))
 	},
 	{ XE_RTP_NAME("14014999345"),
 	  XE_RTP_RULES(PLATFORM(PVC), ENGINE_CLASS(COMPUTE),
@@ -553,7 +413,7 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 	/* Xe_LPG */
 
 	{ XE_RTP_NAME("14017856879"),
-	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274),
 		       FUNC(xe_rtp_match_first_render_or_compute)),
 	  XE_RTP_ACTIONS(SET(ROW_CHICKEN3, DIS_FIX_EOT1_FLUSH))
 	},
@@ -563,6 +423,11 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 	  XE_RTP_ACTIONS(SET(XEHP_HDC_CHICKEN0, DIS_ATOMIC_CHAINING_TYPED_WRITES,
 			     XE_RTP_NOCHECK))
 	},
+	{ XE_RTP_NAME("14020495402"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN2, DISABLE_TDL_SVHS_GATING))
+	},
 
 	/* Xe2_LPG */
 
@@ -580,8 +445,12 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 		       FUNC(xe_rtp_match_first_render_or_compute)),
 	  XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN5, DISABLE_SAMPLE_G_PERFORMANCE))
 	},
-	{ XE_RTP_NAME("16021540221"),
-	  XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0),
+	{ XE_RTP_NAME("14020338487"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN3, XE2_EUPEND_CHK_FLUSH_DIS))
+	},
+	{ XE_RTP_NAME("18034896535, 16021540221"), /* 16021540221: GRAPHICS_STEP(A0, B0) */
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004),
 		       FUNC(xe_rtp_match_first_render_or_compute)),
 	  XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH))
 	},
@@ -612,6 +481,68 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 			     PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS,
 			     XE_RTP_ACTION_FLAG(ENGINE_BASE)))
 	},
+	{ XE_RTP_NAME("16018610683"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, SLM_WMTP_RESTORE))
+	},
+
+	/* Xe2_HPG */
+
+	{ XE_RTP_NAME("16018712365"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, XE2_ALLOC_DPA_STARVE_FIX_DIS))
+	},
+	{ XE_RTP_NAME("16018737384"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN, EARLY_EOT_DIS))
+	},
+	{ XE_RTP_NAME("14019988906"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD))
+	},
+	{ XE_RTP_NAME("14019877138"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT))
+	},
+	{ XE_RTP_NAME("14020338487"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(ROW_CHICKEN3, XE2_EUPEND_CHK_FLUSH_DIS))
+	},
+	{ XE_RTP_NAME("18032247524"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, SEQUENTIAL_ACCESS_UPGRADE_DISABLE))
+	},
+	{ XE_RTP_NAME("14018471104"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, ENABLE_SMP_LD_RENDER_SURFACE_CONTROL))
+	},
+	/*
+	 * Although this workaround isn't required for the RCS, disabling these
+	 * reports has no impact for our driver or the GuC, so we go ahead and
+	 * apply this to all engines for simplicity.
+	 */
+	{ XE_RTP_NAME("16021639441"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2001)),
+	  XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0),
+			     GHWSP_CSB_REPORT_DIS |
+			     PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS,
+			     XE_RTP_ACTION_FLAG(ENGINE_BASE)))
+	},
+	{ XE_RTP_NAME("14019811474"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2001),
+		       FUNC(xe_rtp_match_first_render_or_compute)),
+	  XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, WR_REQ_CHAINING_DIS))
+	},
+
+	/* Xe2_HPM */
+
+	{ XE_RTP_NAME("16021639441"),
+	  XE_RTP_RULES(MEDIA_VERSION(1301)),
+	  XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0),
+			     GHWSP_CSB_REPORT_DIS |
+			     PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS,
+			     XE_RTP_ACTION_FLAG(ENGINE_BASE)))
+	},
 
 	{}
 };
@@ -652,21 +583,6 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
 
 	/* DG2 */
 
-	{ XE_RTP_NAME("16011186671"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0)),
-	  XE_RTP_ACTIONS(CLR(VFLSKPD, DIS_MULT_MISS_RD_SQUASH),
-			 SET(VFLSKPD, DIS_OVER_FETCH_CACHE))
-	},
-	{ XE_RTP_NAME("14010469329"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
-	  XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN3,
-			     XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE))
-	},
-	{ XE_RTP_NAME("14010698770, 22010613112, 22010465075"),
-	  XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
-	  XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN3,
-			     DISABLE_CPS_AWARE_COLOR_PIPE))
-	},
 	{ XE_RTP_NAME("16013271637"),
 	  XE_RTP_RULES(PLATFORM(DG2)),
 	  XE_RTP_ACTIONS(SET(XEHP_SLICE_COMMON_ECO_CHICKEN1,
@@ -705,9 +621,13 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
 	/* Xe_LPG */
 
 	{ XE_RTP_NAME("18019271663"),
-	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271)),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274)),
 	  XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE))
 	},
+	{ XE_RTP_NAME("14019877138"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT))
+	},
 
 	/* Xe2_LPG */
 
@@ -739,6 +659,29 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
 	  XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD))
 	},
+	{ XE_RTP_NAME("16020183090"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0),
+		       ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(INSTPM(RENDER_RING_BASE), ENABLE_SEMAPHORE_POLL_BIT))
+	},
+	{ XE_RTP_NAME("18033852989"),
+	  XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN1, DISABLE_BOTTOM_CLIP_RECTANGLE_TEST))
+	},
+
+	/* Xe2_HPG */
+	{ XE_RTP_NAME("15010599737"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN))
+	},
+	{ XE_RTP_NAME("14019386621"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(VF_SCRATCHPAD, XE2_VFG_TED_CREDIT_INTERFACE_DISABLE))
+	},
+	{ XE_RTP_NAME("14020756599"),
+	  XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
+	  XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS))
+	},
 
 	{}
 };
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index 727bdc4292..12fe88796a 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -1,17 +1,9 @@
 22012773006	GRAPHICS_VERSION_RANGE(1200, 1250)
-16011759253	SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)
 14014475959	GRAPHICS_VERSION_RANGE(1270, 1271), GRAPHICS_STEP(A0, B0)
 		PLATFORM(DG2)
 22011391025	PLATFORM(DG2)
-14012197797	PLATFORM(DG2), GRAPHICS_STEP(A0, B0)
-16011777198	SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0)
-		SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0)
-22012727170	SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0)
-		SUBPLATFORM(DG2, G11)
+22012727170	SUBPLATFORM(DG2, G11)
 22012727685	SUBPLATFORM(DG2, G11)
-16015675438	PLATFORM(PVC)
-		SUBPLATFORM(DG2, G10)
-		SUBPLATFORM(DG2, G12)
 18020744125	PLATFORM(PVC)
 1509372804	PLATFORM(PVC), GRAPHICS_STEP(A0, C0)
 1409600907	GRAPHICS_VERSION_RANGE(1200, 1250)
@@ -22,3 +14,16 @@
 14019821291	MEDIA_VERSION_RANGE(1300, 2000)
 14015076503	MEDIA_VERSION(1300)
 16020292621	GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)
+14018913170	GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)
+		MEDIA_VERSION(2000), GRAPHICS_STEP(A0, A1)
+		GRAPHICS_VERSION_RANGE(1270, 1274)
+		MEDIA_VERSION(1300)
+		PLATFORM(DG2)
+14018094691	GRAPHICS_VERSION(2004)
+14019882105	GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)
+18024947630	GRAPHICS_VERSION(2001)
+		GRAPHICS_VERSION(2004)
+		MEDIA_VERSION(2000)
+16022287689	GRAPHICS_VERSION(2001)
+		GRAPHICS_VERSION(2004)
+13011645652	GRAPHICS_VERSION(2004)
diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c
index a75eeba7bf..f697213392 100644
--- a/drivers/gpu/drm/xe/xe_wait_user_fence.c
+++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c
@@ -148,7 +148,7 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
 
 		if (q) {
 			if (q->ops->reset_status(q)) {
-				drm_info(&xe->drm, "exec gueue reset detected\n");
+				drm_info(&xe->drm, "exec queue reset detected\n");
 				err = -EIO;
 				break;
 			}
diff --git a/drivers/gpu/drm/xe/xe_wopcm_types.h b/drivers/gpu/drm/xe/xe_wopcm_types.h
index 486d850c40..99d34837c4 100644
--- a/drivers/gpu/drm/xe/xe_wopcm_types.h
+++ b/drivers/gpu/drm/xe/xe_wopcm_types.h
@@ -16,9 +16,9 @@ struct xe_wopcm {
 	u32 size;
 	/** @guc: GuC WOPCM Region info */
 	struct {
-		/** @base: GuC WOPCM base which is offset from WOPCM base */
+		/** @guc.base: GuC WOPCM base which is offset from WOPCM base */
 		u32 base;
-		/** @size: Size of the GuC WOPCM region */
+		/** @guc.size: Size of the GuC WOPCM region */
 		u32 size;
 	} guc;
 };