summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/xe
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/xe')
-rw-r--r--drivers/gpu/drm/xe/.kunitconfig5
-rw-r--r--drivers/gpu/drm/xe/Kconfig3
-rw-r--r--drivers/gpu/drm/xe/Kconfig.debug1
-rw-r--r--drivers/gpu/drm/xe/Makefile42
-rw-r--r--drivers/gpu/drm/xe/abi/gsc_proxy_commands_abi.h44
-rw-r--r--drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h174
-rw-r--r--drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h3
-rw-r--r--drivers/gpu/drm/xe/abi/guc_messages_abi.h2
-rw-r--r--drivers/gpu/drm/xe/abi/guc_relay_actions_abi.h79
-rw-r--r--drivers/gpu/drm/xe/abi/guc_relay_communication_abi.h118
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h10
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h3
-rw-r--r--drivers/gpu/drm/xe/display/xe_display.c (renamed from drivers/gpu/drm/xe/xe_display.c)0
-rw-r--r--drivers/gpu/drm/xe/display/xe_display.h (renamed from drivers/gpu/drm/xe/xe_display.h)0
-rw-r--r--drivers/gpu/drm/xe/display/xe_plane_initial.c67
-rw-r--r--drivers/gpu/drm/xe/instructions/xe_mi_commands.h3
-rw-r--r--drivers/gpu/drm/xe/regs/xe_engine_regs.h6
-rw-r--r--drivers/gpu/drm/xe/regs/xe_gt_regs.h27
-rw-r--r--drivers/gpu/drm/xe/regs/xe_lrc_layout.h9
-rw-r--r--drivers/gpu/drm/xe/regs/xe_pcode_regs.h21
-rw-r--r--drivers/gpu/drm/xe/tests/Makefile7
-rw-r--r--drivers/gpu/drm/xe/tests/xe_guc_db_mgr_test.c201
-rw-r--r--drivers/gpu/drm/xe/tests/xe_guc_relay_test.c522
-rw-r--r--drivers/gpu/drm/xe/tests/xe_kunit_helpers.c90
-rw-r--r--drivers/gpu/drm/xe/tests/xe_kunit_helpers.h17
-rw-r--r--drivers/gpu/drm/xe/tests/xe_mocs.c36
-rw-r--r--drivers/gpu/drm/xe/tests/xe_mocs_test.c1
-rw-r--r--drivers/gpu/drm/xe/tests/xe_mocs_test.h1
-rw-r--r--drivers/gpu/drm/xe/tests/xe_pci.c3
-rw-r--r--drivers/gpu/drm/xe/tests/xe_pci_test.c5
-rw-r--r--drivers/gpu/drm/xe/tests/xe_pci_test.h2
-rw-r--r--drivers/gpu/drm/xe/tests/xe_rtp_test.c10
-rw-r--r--drivers/gpu/drm/xe/tests/xe_test_mod.c10
-rw-r--r--drivers/gpu/drm/xe/tests/xe_wa_test.c16
-rw-r--r--drivers/gpu/drm/xe/xe_bb.c3
-rw-r--r--drivers/gpu/drm/xe/xe_bo.c123
-rw-r--r--drivers/gpu/drm/xe/xe_bo.h6
-rw-r--r--drivers/gpu/drm/xe/xe_bo_types.h3
-rw-r--r--drivers/gpu/drm/xe/xe_debugfs.c1
-rw-r--r--drivers/gpu/drm/xe/xe_devcoredump.c55
-rw-r--r--drivers/gpu/drm/xe/xe_devcoredump.h6
-rw-r--r--drivers/gpu/drm/xe/xe_devcoredump_types.h13
-rw-r--r--drivers/gpu/drm/xe/xe_device.c107
-rw-r--r--drivers/gpu/drm/xe/xe_device.h10
-rw-r--r--drivers/gpu/drm/xe/xe_device_types.h169
-rw-r--r--drivers/gpu/drm/xe/xe_drm_client.c2
-rw-r--r--drivers/gpu/drm/xe/xe_exec.c18
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.c121
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.h3
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue_types.h49
-rw-r--r--drivers/gpu/drm/xe/xe_execlist.c8
-rw-r--r--drivers/gpu/drm/xe/xe_ggtt.c81
-rw-r--r--drivers/gpu/drm/xe/xe_ggtt.h3
-rw-r--r--drivers/gpu/drm/xe/xe_gsc.c71
-rw-r--r--drivers/gpu/drm/xe/xe_gsc.h1
-rw-r--r--drivers/gpu/drm/xe/xe_gsc_proxy.c537
-rw-r--r--drivers/gpu/drm/xe/xe_gsc_proxy.h20
-rw-r--r--drivers/gpu/drm/xe/xe_gsc_submit.c20
-rw-r--r--drivers/gpu/drm/xe/xe_gsc_submit.h1
-rw-r--r--drivers/gpu/drm/xe/xe_gsc_types.h33
-rw-r--r--drivers/gpu/drm/xe/xe_gt.c92
-rw-r--r--drivers/gpu/drm/xe/xe_gt.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_idle.c9
-rw-r--r--drivers/gpu/drm/xe/xe_gt_mcr.c17
-rw-r--r--drivers/gpu/drm/xe/xe_gt_pagefault.c41
-rw-r--r--drivers/gpu/drm/xe/xe_gt_printk.h44
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_printk.h34
-rw-r--r--drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c14
-rw-r--r--drivers/gpu/drm/xe/xe_gt_topology.c4
-rw-r--r--drivers/gpu/drm/xe/xe_gt_types.h121
-rw-r--r--drivers/gpu/drm/xe/xe_guc.c117
-rw-r--r--drivers/gpu/drm/xe/xe_guc.h1
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ads.c4
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.c265
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.h12
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct_types.h24
-rw-r--r--drivers/gpu/drm/xe/xe_guc_db_mgr.c266
-rw-r--r--drivers/gpu/drm/xe/xe_guc_db_mgr.h22
-rw-r--r--drivers/gpu/drm/xe/xe_guc_fwif.h1
-rw-r--r--drivers/gpu/drm/xe/xe_guc_hwconfig.c2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_hxg_helpers.h108
-rw-r--r--drivers/gpu/drm/xe/xe_guc_log.c2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc.c89
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc.h1
-rw-r--r--drivers/gpu/drm/xe/xe_guc_relay.c941
-rw-r--r--drivers/gpu/drm/xe/xe_guc_relay.h37
-rw-r--r--drivers/gpu/drm/xe/xe_guc_relay_types.h36
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c87
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.h4
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit_types.h18
-rw-r--r--drivers/gpu/drm/xe/xe_guc_types.h47
-rw-r--r--drivers/gpu/drm/xe/xe_heci_gsc.c2
-rw-r--r--drivers/gpu/drm/xe/xe_huc.c19
-rw-r--r--drivers/gpu/drm/xe/xe_huc.h1
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine.c144
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c38
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine_types.h82
-rw-r--r--drivers/gpu/drm/xe/xe_hwmon.c32
-rw-r--r--drivers/gpu/drm/xe/xe_irq.c136
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.c38
-rw-r--r--drivers/gpu/drm/xe/xe_lrc_types.h6
-rw-r--r--drivers/gpu/drm/xe/xe_memirq.c430
-rw-r--r--drivers/gpu/drm/xe/xe_memirq.h26
-rw-r--r--drivers/gpu/drm/xe/xe_memirq_types.h37
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.c39
-rw-r--r--drivers/gpu/drm/xe/xe_mmio.c9
-rw-r--r--drivers/gpu/drm/xe/xe_mocs.c27
-rw-r--r--drivers/gpu/drm/xe/xe_pat.c5
-rw-r--r--drivers/gpu/drm/xe/xe_pci.c10
-rw-r--r--drivers/gpu/drm/xe/xe_pcode.c115
-rw-r--r--drivers/gpu/drm/xe/xe_pcode.h6
-rw-r--r--drivers/gpu/drm/xe/xe_pcode_api.h7
-rw-r--r--drivers/gpu/drm/xe/xe_pm.c76
-rw-r--r--drivers/gpu/drm/xe/xe_pm.h1
-rw-r--r--drivers/gpu/drm/xe/xe_preempt_fence.c2
-rw-r--r--drivers/gpu/drm/xe/xe_pt.c22
-rw-r--r--drivers/gpu/drm/xe/xe_query.c50
-rw-r--r--drivers/gpu/drm/xe/xe_reg_sr.c2
-rw-r--r--drivers/gpu/drm/xe/xe_reg_whitelist.c8
-rw-r--r--drivers/gpu/drm/xe/xe_ring_ops.c78
-rw-r--r--drivers/gpu/drm/xe/xe_sched_job.c38
-rw-r--r--drivers/gpu/drm/xe/xe_sched_job.h5
-rw-r--r--drivers/gpu/drm/xe/xe_sched_job_types.h11
-rw-r--r--drivers/gpu/drm/xe/xe_sriov.c32
-rw-r--r--drivers/gpu/drm/xe/xe_sriov.h1
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_types.h12
-rw-r--r--drivers/gpu/drm/xe/xe_tile_sysfs.c3
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c8
-rw-r--r--drivers/gpu/drm/xe/xe_tuning.c9
-rw-r--r--drivers/gpu/drm/xe/xe_uc.c33
-rw-r--r--drivers/gpu/drm/xe/xe_uc.h1
-rw-r--r--drivers/gpu/drm/xe/xe_uc_fw.c60
-rw-r--r--drivers/gpu/drm/xe/xe_uc_fw_types.h9
-rw-r--r--drivers/gpu/drm/xe/xe_vm.c306
-rw-r--r--drivers/gpu/drm/xe/xe_vm.h13
-rw-r--r--drivers/gpu/drm/xe/xe_vm_types.h32
-rw-r--r--drivers/gpu/drm/xe/xe_vram_freq.c130
-rw-r--r--drivers/gpu/drm/xe/xe_vram_freq.h13
-rw-r--r--drivers/gpu/drm/xe/xe_wa.c191
-rw-r--r--drivers/gpu/drm/xe/xe_wa_oob.rules12
-rw-r--r--drivers/gpu/drm/xe/xe_wait_user_fence.c2
-rw-r--r--drivers/gpu/drm/xe/xe_wopcm_types.h4
142 files changed, 6605 insertions, 1232 deletions
diff --git a/drivers/gpu/drm/xe/.kunitconfig b/drivers/gpu/drm/xe/.kunitconfig
index 9590eac91a..ad4b9b4a9f 100644
--- a/drivers/gpu/drm/xe/.kunitconfig
+++ b/drivers/gpu/drm/xe/.kunitconfig
@@ -11,3 +11,8 @@ CONFIG_DRM_XE_DISPLAY=n
CONFIG_EXPERT=y
CONFIG_FB=y
CONFIG_DRM_XE_KUNIT_TEST=y
+CONFIG_LOCK_DEBUGGING_SUPPORT=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_DEBUG_MUTEXES=y
+CONFIG_LOCKDEP=y
+CONFIG_DEBUG_LOCKDEP=y
diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig
index e36ae1f0d8..1a556d087e 100644
--- a/drivers/gpu/drm/xe/Kconfig
+++ b/drivers/gpu/drm/xe/Kconfig
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
config DRM_XE
tristate "Intel Xe Graphics"
- depends on DRM && PCI && MMU && (m || (y && KUNIT=y)) && 64BIT
+ depends on DRM && PCI && MMU && (m || (y && KUNIT=y))
select INTERVAL_TREE
# we need shmfs for the swappable backing store, and in particular
# the shmem_readpage() which depends upon tmpfs
@@ -10,6 +10,7 @@ config DRM_XE
select DRM_BUDDY
select DRM_EXEC
select DRM_KMS_HELPER
+ select DRM_KUNIT_TEST_HELPERS if DRM_XE_KUNIT_TEST != n
select DRM_PANEL
select DRM_SUBALLOC_HELPER
select DRM_DISPLAY_DP_HELPER
diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug
index 549065f57a..df02e5d17d 100644
--- a/drivers/gpu/drm/xe/Kconfig.debug
+++ b/drivers/gpu/drm/xe/Kconfig.debug
@@ -76,7 +76,6 @@ config DRM_XE_KUNIT_TEST
depends on DRM_XE && KUNIT && DEBUG_FS
default KUNIT_ALL_TESTS
select DRM_EXPORT_FOR_TESTS if m
- select DRM_KUNIT_TEST_HELPERS
help
Choose this option to allow the driver to perform selftests under
the kunit framework
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 1c9e6906b0..c29a850859 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -77,6 +77,7 @@ xe-y += xe_bb.o \
xe_ggtt.o \
xe_gpu_scheduler.o \
xe_gsc.o \
+ xe_gsc_proxy.o \
xe_gsc_submit.o \
xe_gt.o \
xe_gt_ccs_mode.o \
@@ -93,6 +94,7 @@ xe-y += xe_bb.o \
xe_guc.o \
xe_guc_ads.o \
xe_guc_ct.o \
+ xe_guc_db_mgr.o \
xe_guc_debugfs.o \
xe_guc_hwconfig.o \
xe_guc_log.o \
@@ -138,6 +140,7 @@ xe-y += xe_bb.o \
xe_uc_debugfs.o \
xe_uc_fw.o \
xe_vm.o \
+ xe_vram_freq.o \
xe_wait_user_fence.o \
xe_wa.o \
xe_wopcm.o
@@ -146,18 +149,25 @@ xe-y += xe_bb.o \
xe-$(CONFIG_HWMON) += xe_hwmon.o
# graphics virtualization (SR-IOV) support
-xe-y += xe_sriov.o
+xe-y += \
+ xe_guc_relay.o \
+ xe_memirq.o \
+ xe_sriov.o
xe-$(CONFIG_PCI_IOV) += \
xe_lmtt.o \
xe_lmtt_2l.o \
xe_lmtt_ml.o
+# include helpers for tests even when XE is built-in
+ifdef CONFIG_DRM_XE_KUNIT_TEST
+xe-y += tests/xe_kunit_helpers.o
+endif
+
# i915 Display compat #defines and #includes
subdir-ccflags-$(CONFIG_DRM_XE_DISPLAY) += \
-I$(srctree)/$(src)/display/ext \
-I$(srctree)/$(src)/compat-i915-headers \
- -I$(srctree)/drivers/gpu/drm/xe/display/ \
-I$(srctree)/drivers/gpu/drm/i915/display/ \
-Ddrm_i915_gem_object=xe_bo \
-Ddrm_i915_private=xe_device
@@ -177,17 +187,17 @@ $(obj)/i915-display/%.o: $(srctree)/drivers/gpu/drm/i915/display/%.c FORCE
# Display code specific to xe
xe-$(CONFIG_DRM_XE_DISPLAY) += \
- xe_display.o \
- display/xe_fb_pin.o \
- display/xe_hdcp_gsc.o \
- display/xe_plane_initial.o \
- display/xe_display_rps.o \
+ display/ext/i915_irq.o \
+ display/ext/i915_utils.o \
+ display/intel_fb_bo.o \
+ display/intel_fbdev_fb.o \
+ display/xe_display.o \
display/xe_display_misc.o \
+ display/xe_display_rps.o \
display/xe_dsb_buffer.o \
- display/intel_fbdev_fb.o \
- display/intel_fb_bo.o \
- display/ext/i915_irq.o \
- display/ext/i915_utils.o
+ display/xe_fb_pin.o \
+ display/xe_hdcp_gsc.o \
+ display/xe_plane_initial.o
# SOC code shared with i915
xe-$(CONFIG_DRM_XE_DISPLAY) += \
@@ -214,8 +224,6 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \
i915-display/intel_ddi.o \
i915-display/intel_ddi_buf_trans.o \
i915-display/intel_display.o \
- i915-display/intel_display_debugfs.o \
- i915-display/intel_display_debugfs_params.o \
i915-display/intel_display_device.o \
i915-display/intel_display_driver.o \
i915-display/intel_display_irq.o \
@@ -259,7 +267,6 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \
i915-display/intel_modeset_setup.o \
i915-display/intel_modeset_verify.o \
i915-display/intel_panel.o \
- i915-display/intel_pipe_crc.o \
i915-display/intel_pmdemand.o \
i915-display/intel_pps.o \
i915-display/intel_psr.o \
@@ -286,6 +293,13 @@ ifeq ($(CONFIG_DRM_FBDEV_EMULATION),y)
xe-$(CONFIG_DRM_XE_DISPLAY) += i915-display/intel_fbdev.o
endif
+ifeq ($(CONFIG_DEBUG_FS),y)
+ xe-$(CONFIG_DRM_XE_DISPLAY) += \
+ i915-display/intel_display_debugfs.o \
+ i915-display/intel_display_debugfs_params.o \
+ i915-display/intel_pipe_crc.o
+endif
+
obj-$(CONFIG_DRM_XE) += xe.o
obj-$(CONFIG_DRM_XE_KUNIT_TEST) += tests/
diff --git a/drivers/gpu/drm/xe/abi/gsc_proxy_commands_abi.h b/drivers/gpu/drm/xe/abi/gsc_proxy_commands_abi.h
new file mode 100644
index 0000000000..80bbf06a3e
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/gsc_proxy_commands_abi.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _ABI_GSC_PROXY_COMMANDS_ABI_H
+#define _ABI_GSC_PROXY_COMMANDS_ABI_H
+
+#include <linux/types.h>
+
+/* Heci client ID for proxy commands */
+#define HECI_MEADDRESS_PROXY 10
+
+/* FW-defined proxy header */
+struct xe_gsc_proxy_header {
+ /*
+ * hdr:
+ * Bits 0-7: type of the proxy message (see enum xe_gsc_proxy_type)
+ * Bits 8-15: rsvd
+ * Bits 16-31: length in bytes of the payload following the proxy header
+ */
+ u32 hdr;
+#define GSC_PROXY_TYPE GENMASK(7, 0)
+#define GSC_PROXY_PAYLOAD_LENGTH GENMASK(31, 16)
+
+ u32 source; /* Source of the Proxy message */
+ u32 destination; /* Destination of the Proxy message */
+#define GSC_PROXY_ADDRESSING_KMD 0x10000
+#define GSC_PROXY_ADDRESSING_GSC 0x20000
+#define GSC_PROXY_ADDRESSING_CSME 0x30000
+
+ u32 status; /* Command status */
+} __packed;
+
+/* FW-defined proxy types */
+enum xe_gsc_proxy_type {
+ GSC_PROXY_MSG_TYPE_PROXY_INVALID = 0,
+ GSC_PROXY_MSG_TYPE_PROXY_QUERY = 1,
+ GSC_PROXY_MSG_TYPE_PROXY_PAYLOAD = 2,
+ GSC_PROXY_MSG_TYPE_PROXY_END = 3,
+ GSC_PROXY_MSG_TYPE_PROXY_NOTIFICATION = 4,
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h
new file mode 100644
index 0000000000..5496a58908
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h
@@ -0,0 +1,174 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _GUC_ACTIONS_PF_ABI_H
+#define _GUC_ACTIONS_PF_ABI_H
+
+#include "guc_communication_ctb_abi.h"
+
+/**
+ * DOC: GUC2PF_RELAY_FROM_VF
+ *
+ * This message is used by the GuC firmware to forward a VF2PF `Relay Message`_
+ * received from the Virtual Function (VF) driver to this Physical Function (PF)
+ * driver.
+ *
+ * This message is always sent as `CTB HXG Message`_.
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = GUC_HXG_TYPE_EVENT_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:16 | MBZ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 15:0 | ACTION = _`XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF` = 0x5100 |
+ * +---+-------+--------------------------------------------------------------+
+ * | 1 | 31:0 | **VFID** - source VF identifier |
+ * +---+-------+--------------------------------------------------------------+
+ * | 2 | 31:0 | **RELAY_ID** - VF/PF message ID |
+ * +---+-------+-----------------+--------------------------------------------+
+ * | 3 | 31:0 | **RELAY_DATA1** | |
+ * +---+-------+-----------------+ |
+ * |...| | | [Embedded `Relay Message`_] |
+ * +---+-------+-----------------+ |
+ * | n | 31:0 | **RELAY_DATAx** | |
+ * +---+-------+-----------------+--------------------------------------------+
+ */
+#define XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF 0x5100
+
+#define GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN (GUC_HXG_EVENT_MSG_MIN_LEN + 2u)
+#define GUC2PF_RELAY_FROM_VF_EVENT_MSG_MAX_LEN \
+ (GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN + GUC_RELAY_MSG_MAX_LEN)
+#define GUC2PF_RELAY_FROM_VF_EVENT_MSG_0_MBZ GUC_HXG_EVENT_MSG_0_DATA0
+#define GUC2PF_RELAY_FROM_VF_EVENT_MSG_1_VFID GUC_HXG_EVENT_MSG_n_DATAn
+#define GUC2PF_RELAY_FROM_VF_EVENT_MSG_2_RELAY_ID GUC_HXG_EVENT_MSG_n_DATAn
+#define GUC2PF_RELAY_FROM_VF_EVENT_MSG_3_RELAY_DATA1 GUC_HXG_EVENT_MSG_n_DATAn
+#define GUC2PF_RELAY_FROM_VF_EVENT_MSG_n_RELAY_DATAx GUC_HXG_EVENT_MSG_n_DATAn
+#define GUC2PF_RELAY_FROM_VF_EVENT_MSG_NUM_RELAY_DATA GUC_RELAY_MSG_MAX_LEN
+
+/**
+ * DOC: PF2GUC_RELAY_TO_VF
+ *
+ * This H2G message is used by the Physical Function (PF) driver to send embedded
+ * VF2PF `Relay Message`_ to the VF.
+ *
+ * This action message must be sent over CTB as `CTB HXG Message`_.
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = `GUC_HXG_TYPE_FAST_REQUEST`_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:16 | MBZ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 15:0 | ACTION = _`XE_GUC_ACTION_PF2GUC_RELAY_TO_VF` = 0x5101 |
+ * +---+-------+--------------------------------------------------------------+
+ * | 1 | 31:0 | **VFID** - target VF identifier |
+ * +---+-------+--------------------------------------------------------------+
+ * | 2 | 31:0 | **RELAY_ID** - VF/PF message ID |
+ * +---+-------+-----------------+--------------------------------------------+
+ * | 3 | 31:0 | **RELAY_DATA1** | |
+ * +---+-------+-----------------+ |
+ * |...| | | [Embedded `Relay Message`_] |
+ * +---+-------+-----------------+ |
+ * | n | 31:0 | **RELAY_DATAx** | |
+ * +---+-------+-----------------+--------------------------------------------+
+ */
+#define XE_GUC_ACTION_PF2GUC_RELAY_TO_VF 0x5101
+
+#define PF2GUC_RELAY_TO_VF_REQUEST_MSG_MIN_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 2u)
+#define PF2GUC_RELAY_TO_VF_REQUEST_MSG_MAX_LEN \
+ (PF2GUC_RELAY_TO_VF_REQUEST_MSG_MIN_LEN + GUC_RELAY_MSG_MAX_LEN)
+#define PF2GUC_RELAY_TO_VF_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0
+#define PF2GUC_RELAY_TO_VF_REQUEST_MSG_1_VFID GUC_HXG_REQUEST_MSG_n_DATAn
+#define PF2GUC_RELAY_TO_VF_REQUEST_MSG_2_RELAY_ID GUC_HXG_REQUEST_MSG_n_DATAn
+#define PF2GUC_RELAY_TO_VF_REQUEST_MSG_3_RELAY_DATA1 GUC_HXG_REQUEST_MSG_n_DATAn
+#define PF2GUC_RELAY_TO_VF_REQUEST_MSG_n_RELAY_DATAx GUC_HXG_REQUEST_MSG_n_DATAn
+#define PF2GUC_RELAY_TO_VF_REQUEST_MSG_NUM_RELAY_DATA GUC_RELAY_MSG_MAX_LEN
+
+/**
+ * DOC: GUC2VF_RELAY_FROM_PF
+ *
+ * This message is used by the GuC firmware to deliver `Relay Message`_ from the
+ * Physical Function (PF) driver to this Virtual Function (VF) driver.
+ * See `GuC Relay Communication`_ for details.
+ *
+ * This message is always sent over CTB.
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = GUC_HXG_TYPE_EVENT_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:16 | MBZ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 15:0 | ACTION = _`XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF` = 0x5102 |
+ * +---+-------+--------------------------------------------------------------+
+ * | 1 | 31:0 | **RELAY_ID** - VF/PF message ID |
+ * +---+-------+-----------------+--------------------------------------------+
+ * | 2 | 31:0 | **RELAY_DATA1** | |
+ * +---+-------+-----------------+ |
+ * |...| | | [Embedded `Relay Message`_] |
+ * +---+-------+-----------------+ |
+ * | n | 31:0 | **RELAY_DATAx** | |
+ * +---+-------+-----------------+--------------------------------------------+
+ */
+#define XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF 0x5102
+
+#define GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN (GUC_HXG_EVENT_MSG_MIN_LEN + 1u)
+#define GUC2VF_RELAY_FROM_PF_EVENT_MSG_MAX_LEN \
+ (GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN + GUC_RELAY_MSG_MAX_LEN)
+#define GUC2VF_RELAY_FROM_PF_EVENT_MSG_0_MBZ GUC_HXG_EVENT_MSG_0_DATA0
+#define GUC2VF_RELAY_FROM_PF_EVENT_MSG_1_RELAY_ID GUC_HXG_EVENT_MSG_n_DATAn
+#define GUC2VF_RELAY_FROM_PF_EVENT_MSG_n_RELAY_DATAx GUC_HXG_EVENT_MSG_n_DATAn
+#define GUC2VF_RELAY_FROM_PF_EVENT_MSG_NUM_RELAY_DATA GUC_RELAY_MSG_MAX_LEN
+
+/**
+ * DOC: VF2GUC_RELAY_TO_PF
+ *
+ * This message is used by the Virtual Function (VF) drivers to communicate with
+ * the Physical Function (PF) driver and send `Relay Message`_ to the PF driver.
+ * See `GuC Relay Communication`_ for details.
+ *
+ * This message must be sent over CTB.
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ or GUC_HXG_TYPE_FAST_REQUEST_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:16 | MBZ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 15:0 | ACTION = _`XE_GUC_ACTION_VF2GUC_RELAY_TO_PF` = 0x5103 |
+ * +---+-------+--------------------------------------------------------------+
+ * | 1 | 31:0 | **RELAY_ID** - VF/PF message ID |
+ * +---+-------+-----------------+--------------------------------------------+
+ * | 2 | 31:0 | **RELAY_DATA1** | |
+ * +---+-------+-----------------+ |
+ * |...| | | [Embedded `Relay Message`_] |
+ * +---+-------+-----------------+ |
+ * | n | 31:0 | **RELAY_DATAx** | |
+ * +---+-------+-----------------+--------------------------------------------+
+ */
+#define XE_GUC_ACTION_VF2GUC_RELAY_TO_PF 0x5103
+
+#define VF2GUC_RELAY_TO_PF_REQUEST_MSG_MIN_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 1u)
+#define VF2GUC_RELAY_TO_PF_REQUEST_MSG_MAX_LEN \
+ (VF2GUC_RELAY_TO_PF_REQUEST_MSG_MIN_LEN + GUC_RELAY_MSG_MAX_LEN)
+#define VF2GUC_RELAY_TO_PF_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0
+#define VF2GUC_RELAY_TO_PF_REQUEST_MSG_1_RELAY_ID GUC_HXG_REQUEST_MSG_n_DATAn
+#define VF2GUC_RELAY_TO_PF_REQUEST_MSG_n_RELAY_DATAx GUC_HXG_REQUEST_MSG_n_DATAn
+#define VF2GUC_RELAY_TO_PF_REQUEST_MSG_NUM_RELAY_DATA GUC_RELAY_MSG_MAX_LEN
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
index 0b1146d0c9..8f86a16dc5 100644
--- a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h
@@ -81,12 +81,13 @@ static_assert(sizeof(struct guc_ct_buffer_desc) == 64);
#define GUC_CTB_HDR_LEN 1u
#define GUC_CTB_MSG_MIN_LEN GUC_CTB_HDR_LEN
-#define GUC_CTB_MSG_MAX_LEN 256u
+#define GUC_CTB_MSG_MAX_LEN (GUC_CTB_MSG_MIN_LEN + GUC_CTB_MAX_DWORDS)
#define GUC_CTB_MSG_0_FENCE (0xffffu << 16)
#define GUC_CTB_MSG_0_FORMAT (0xfu << 12)
#define GUC_CTB_FORMAT_HXG 0u
#define GUC_CTB_MSG_0_RESERVED (0xfu << 8)
#define GUC_CTB_MSG_0_NUM_DWORDS (0xffu << 0)
+#define GUC_CTB_MAX_DWORDS 255
/**
* DOC: CTB HXG Message
diff --git a/drivers/gpu/drm/xe/abi/guc_messages_abi.h b/drivers/gpu/drm/xe/abi/guc_messages_abi.h
index 29e414c82d..534a39db77 100644
--- a/drivers/gpu/drm/xe/abi/guc_messages_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_messages_abi.h
@@ -24,6 +24,7 @@
* | | 30:28 | **TYPE** - message type |
* | | | - _`GUC_HXG_TYPE_REQUEST` = 0 |
* | | | - _`GUC_HXG_TYPE_EVENT` = 1 |
+ * | | | - _`GUC_HXG_TYPE_FAST_REQUEST` = 2 |
* | | | - _`GUC_HXG_TYPE_NO_RESPONSE_BUSY` = 3 |
* | | | - _`GUC_HXG_TYPE_NO_RESPONSE_RETRY` = 5 |
* | | | - _`GUC_HXG_TYPE_RESPONSE_FAILURE` = 6 |
@@ -46,6 +47,7 @@
#define GUC_HXG_MSG_0_TYPE (0x7u << 28)
#define GUC_HXG_TYPE_REQUEST 0u
#define GUC_HXG_TYPE_EVENT 1u
+#define GUC_HXG_TYPE_FAST_REQUEST 2u
#define GUC_HXG_TYPE_NO_RESPONSE_BUSY 3u
#define GUC_HXG_TYPE_NO_RESPONSE_RETRY 5u
#define GUC_HXG_TYPE_RESPONSE_FAILURE 6u
diff --git a/drivers/gpu/drm/xe/abi/guc_relay_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_relay_actions_abi.h
new file mode 100644
index 0000000000..747e428de4
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_relay_actions_abi.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_RELAY_ACTIONS_ABI_H_
+#define _ABI_GUC_RELAY_ACTIONS_ABI_H_
+
+/**
+ * DOC: GuC Relay Debug Actions
+ *
+ * This range of action codes is reserved for debugging purposes only and should
+ * be used only on debug builds. These actions may not be supported by the
+ * production drivers. Their definitions could be changed in the future.
+ *
+ * _`GUC_RELAY_ACTION_DEBUG_ONLY_START` = 0xDEB0
+ * _`GUC_RELAY_ACTION_DEBUG_ONLY_END` = 0xDEFF
+ */
+
+#define GUC_RELAY_ACTION_DEBUG_ONLY_START 0xDEB0
+#define GUC_RELAY_ACTION_DEBUG_ONLY_END 0xDEFF
+
+/**
+ * DOC: VFXPF_TESTLOOP
+ *
+ * This `Relay Message`_ is used to selftest the `GuC Relay Communication`_.
+ *
+ * The following opcodes are defined:
+ * VFXPF_TESTLOOP_OPCODE_NOP_ will return no data.
+ * VFXPF_TESTLOOP_OPCODE_BUSY_ will reply with BUSY response first.
+ * VFXPF_TESTLOOP_OPCODE_RETRY_ will reply with RETRY response instead.
+ * VFXPF_TESTLOOP_OPCODE_ECHO_ will return same data as received.
+ * VFXPF_TESTLOOP_OPCODE_FAIL_ will always fail with error.
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ or GUC_HXG_TYPE_FAST_REQUEST_ |
+ * | | | or GUC_HXG_TYPE_EVENT_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:16 | **OPCODE** |
+ * | | | - _`VFXPF_TESTLOOP_OPCODE_NOP` = 0x0 |
+ * | | | - _`VFXPF_TESTLOOP_OPCODE_BUSY` = 0xB |
+ * | | | - _`VFXPF_TESTLOOP_OPCODE_RETRY` = 0xD |
+ * | | | - _`VFXPF_TESTLOOP_OPCODE_ECHO` = 0xE |
+ * | | | - _`VFXPF_TESTLOOP_OPCODE_FAIL` = 0xF |
+ * | +-------+--------------------------------------------------------------+
+ * | | 15:0 | ACTION = _`IOV_ACTION_SELFTEST_RELAY` |
+ * +---+-------+--------------------------------------------------------------+
+ * | 1 | 31:0 | **DATA1** = optional, depends on **OPCODE**: |
+ * | | | for VFXPF_TESTLOOP_OPCODE_BUSY_: time in ms for reply |
+ * | | | for VFXPF_TESTLOOP_OPCODE_FAIL_: expected error |
+ * | | | for VFXPF_TESTLOOP_OPCODE_ECHO_: payload |
+ * +---+-------+--------------------------------------------------------------+
+ * |...| 31:0 | **DATAn** = only for **OPCODE** VFXPF_TESTLOOP_OPCODE_ECHO_ |
+ * +---+-------+--------------------------------------------------------------+
+ *
+ * +---+-------+--------------------------------------------------------------+
+ * | | Bits | Description |
+ * +===+=======+==============================================================+
+ * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ |
+ * | +-------+--------------------------------------------------------------+
+ * | | 27:0 | DATA0 = MBZ |
+ * +---+-------+--------------------------------------------------------------+
+ * |...| 31:0 | DATAn = only for **OPCODE** VFXPF_TESTLOOP_OPCODE_ECHO_ |
+ * +---+-------+--------------------------------------------------------------+
+ */
+#define GUC_RELAY_ACTION_VFXPF_TESTLOOP (GUC_RELAY_ACTION_DEBUG_ONLY_START + 1)
+#define VFXPF_TESTLOOP_OPCODE_NOP 0x0
+#define VFXPF_TESTLOOP_OPCODE_BUSY 0xB
+#define VFXPF_TESTLOOP_OPCODE_RETRY 0xD
+#define VFXPF_TESTLOOP_OPCODE_ECHO 0xE
+#define VFXPF_TESTLOOP_OPCODE_FAIL 0xF
+
+#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_relay_communication_abi.h b/drivers/gpu/drm/xe/abi/guc_relay_communication_abi.h
new file mode 100644
index 0000000000..f92625f047
--- /dev/null
+++ b/drivers/gpu/drm/xe/abi/guc_relay_communication_abi.h
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_RELAY_COMMUNICATION_ABI_H
+#define _ABI_GUC_RELAY_COMMUNICATION_ABI_H
+
+#include <linux/build_bug.h>
+
+#include "guc_actions_sriov_abi.h"
+#include "guc_communication_ctb_abi.h"
+#include "guc_messages_abi.h"
+
+/**
+ * DOC: GuC Relay Communication
+ *
+ * The communication between Virtual Function (VF) drivers and Physical Function
+ * (PF) drivers is based on the GuC firmware acting as a proxy (relay) agent.
+ *
+ * To communicate with the PF driver, VF's drivers use `VF2GUC_RELAY_TO_PF`_
+ * action that takes the `Relay Message`_ as opaque payload and requires the
+ * relay message identifier (RID) as additional parameter.
+ *
+ * This identifier is used by the drivers to match related messages.
+ *
+ * The GuC forwards this `Relay Message`_ and its identifier to the PF driver
+ * in `GUC2PF_RELAY_FROM_VF`_ action. This event message additionally contains
+ * the identifier of the origin VF (VFID).
+ *
+ * Likewise, to communicate with the VF drivers, PF driver use
+ * `VF2GUC_RELAY_TO_PF`_ action that in addition to the `Relay Message`_
+ * and the relay message identifier (RID) also takes the target VF identifier.
+ *
+ * The GuC uses this target VFID from the message to select where to send the
+ * `GUC2VF_RELAY_FROM_PF`_ with the embedded `Relay Message`_ with response::
+ *
+ * VF GuC PF
+ * | | |
+ * [ ] VF2GUC_RELAY_TO_PF | |
+ * [ ]---------------------------> [ ] |
+ * [ ] { rid, msg } [ ] |
+ * [ ] [ ] GUC2PF_RELAY_FROM_VF |
+ * [ ] [ ]---------------------------> [ ]
+ * [ ] | { VFID, rid, msg } [ ]
+ * [ ] | [ ]
+ * [ ] | PF2GUC_RELAY_TO_VF [ ]
+ * [ ] [ ] <---------------------------[ ]
+ * [ ] [ ] { VFID, rid, reply } |
+ * [ ] GUC2VF_RELAY_FROM_PF [ ] |
+ * [ ] <---------------------------[ ] |
+ * | { rid, reply } | |
+ * | | |
+ *
+ * It is also possible that PF driver will initiate communication with the
+ * selected VF driver. The same GuC action messages will be used::
+ *
+ * VF GuC PF
+ * | | |
+ * | | PF2GUC_RELAY_TO_VF [ ]
+ * | [ ] <---------------------------[ ]
+ * | [ ] { VFID, rid, msg } [ ]
+ * | GUC2VF_RELAY_FROM_PF [ ] [ ]
+ * [ ] <---------------------------[ ] [ ]
+ * [ ] { rid, msg } | [ ]
+ * [ ] | [ ]
+ * [ ] VF2GUC_RELAY_TO_PF | [ ]
+ * [ ]---------------------------> [ ] [ ]
+ * | { rid, reply } [ ] [ ]
+ * | [ ] GUC2PF_RELAY_FROM_VF [ ]
+ * | [ ]---------------------------> [ ]
+ * | | { VFID, rid, reply } |
+ * | | |
+ */
+
+/**
+ * DOC: Relay Message
+ *
+ * The `Relay Message`_ is used by Physical Function (PF) driver and Virtual
+ * Function (VF) drivers to communicate using `GuC Relay Communication`_.
+ *
+ * Format of the `Relay Message`_ follows format of the generic `HXG Message`_.
+ *
+ * +--------------------------------------------------------------------------+
+ * | `Relay Message`_ |
+ * +==========================================================================+
+ * | `HXG Message`_ |
+ * +--------------------------------------------------------------------------+
+ *
+ * Maximum length of the `Relay Message`_ is limited by the maximum length of
+ * the `CTB HXG Message`_ and format of the `GUC2PF_RELAY_FROM_VF`_ message.
+ */
+
+#define GUC_RELAY_MSG_MIN_LEN GUC_HXG_MSG_MIN_LEN
+#define GUC_RELAY_MSG_MAX_LEN \
+ (GUC_CTB_MAX_DWORDS - GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN)
+
+static_assert(PF2GUC_RELAY_TO_VF_REQUEST_MSG_MIN_LEN >
+ VF2GUC_RELAY_TO_PF_REQUEST_MSG_MIN_LEN);
+
+/**
+ * DOC: Relay Error Codes
+ *
+ * The `GuC Relay Communication`_ can be used to pass `Relay Message`_ between
+ * drivers that run on different Operating Systems. To help in troubleshooting,
+ * `GuC Relay Communication`_ uses error codes that mostly match errno values.
+ */
+
+#define GUC_RELAY_ERROR_UNDISCLOSED 0
+#define GUC_RELAY_ERROR_OPERATION_NOT_PERMITTED 1 /* EPERM */
+#define GUC_RELAY_ERROR_PERMISSION_DENIED 13 /* EACCES */
+#define GUC_RELAY_ERROR_INVALID_ARGUMENT 22 /* EINVAL */
+#define GUC_RELAY_ERROR_INVALID_REQUEST_CODE 56 /* EBADRQC */
+#define GUC_RELAY_ERROR_NO_DATA_AVAILABLE 61 /* ENODATA */
+#define GUC_RELAY_ERROR_PROTOCOL_ERROR 71 /* EPROTO */
+#define GUC_RELAY_ERROR_MESSAGE_SIZE 90 /* EMSGSIZE */
+
+#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
index d9a81e6a4b..854a7bb535 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
@@ -163,18 +163,18 @@ static inline struct drm_i915_private *kdev_to_i915(struct device *kdev)
#include "intel_wakeref.h"
-static inline bool intel_runtime_pm_get(struct xe_runtime_pm *pm)
+static inline intel_wakeref_t intel_runtime_pm_get(struct xe_runtime_pm *pm)
{
struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm);
if (xe_pm_runtime_get(xe) < 0) {
xe_pm_runtime_put(xe);
- return false;
+ return 0;
}
- return true;
+ return 1;
}
-static inline bool intel_runtime_pm_get_if_in_use(struct xe_runtime_pm *pm)
+static inline intel_wakeref_t intel_runtime_pm_get_if_in_use(struct xe_runtime_pm *pm)
{
struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm);
@@ -188,7 +188,7 @@ static inline void intel_runtime_pm_put_unchecked(struct xe_runtime_pm *pm)
xe_pm_runtime_put(xe);
}
-static inline void intel_runtime_pm_put(struct xe_runtime_pm *pm, bool wakeref)
+static inline void intel_runtime_pm_put(struct xe_runtime_pm *pm, intel_wakeref_t wakeref)
{
if (wakeref)
intel_runtime_pm_put_unchecked(pm);
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h
index 888e7a87a9..bd233007c1 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h
@@ -19,6 +19,9 @@ static inline int i915_gem_stolen_insert_node_in_range(struct xe_device *xe,
int err;
u32 flags = XE_BO_CREATE_PINNED_BIT | XE_BO_CREATE_STOLEN_BIT;
+ if (align)
+ size = ALIGN(size, align);
+
bo = xe_bo_create_locked_range(xe, xe_device_get_root_tile(xe),
NULL, size, start, end,
ttm_bo_type_kernel, flags);
diff --git a/drivers/gpu/drm/xe/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c
index 6ec375c1c4..6ec375c1c4 100644
--- a/drivers/gpu/drm/xe/xe_display.c
+++ b/drivers/gpu/drm/xe/display/xe_display.c
diff --git a/drivers/gpu/drm/xe/xe_display.h b/drivers/gpu/drm/xe/display/xe_display.h
index 710e56180b..710e56180b 100644
--- a/drivers/gpu/drm/xe/xe_display.h
+++ b/drivers/gpu/drm/xe/display/xe_display.h
diff --git a/drivers/gpu/drm/xe/display/xe_plane_initial.c b/drivers/gpu/drm/xe/display/xe_plane_initial.c
index ccf83c12b5..866d1dd6ee 100644
--- a/drivers/gpu/drm/xe/display/xe_plane_initial.c
+++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c
@@ -10,6 +10,7 @@
#include "i915_drv.h"
#include "intel_atomic_plane.h"
+#include "intel_crtc.h"
#include "intel_display.h"
#include "intel_display_types.h"
#include "intel_fb.h"
@@ -18,19 +19,20 @@
#include "intel_plane_initial.h"
static bool
-intel_reuse_initial_plane_obj(struct drm_i915_private *i915,
- const struct intel_initial_plane_config *plane_config,
+intel_reuse_initial_plane_obj(struct intel_crtc *this,
+ const struct intel_initial_plane_config plane_configs[],
struct drm_framebuffer **fb)
{
+ struct drm_i915_private *i915 = to_i915(this->base.dev);
struct intel_crtc *crtc;
for_each_intel_crtc(&i915->drm, crtc) {
- struct intel_crtc_state *crtc_state =
- to_intel_crtc_state(crtc->base.state);
struct intel_plane *plane =
to_intel_plane(crtc->base.primary);
- struct intel_plane_state *plane_state =
+ const struct intel_plane_state *plane_state =
to_intel_plane_state(plane->base.state);
+ const struct intel_crtc_state *crtc_state =
+ to_intel_crtc_state(crtc->base.state);
if (!crtc_state->uapi.active)
continue;
@@ -38,7 +40,7 @@ intel_reuse_initial_plane_obj(struct drm_i915_private *i915,
if (!plane_state->ggtt_vma)
continue;
- if (intel_plane_ggtt_offset(plane_state) == plane_config->base) {
+ if (plane_configs[this->pipe].base == plane_configs[crtc->pipe].base) {
*fb = plane_state->hw.fb;
return true;
}
@@ -178,10 +180,10 @@ err_bo:
static void
intel_find_initial_plane_obj(struct intel_crtc *crtc,
- struct intel_initial_plane_config *plane_config)
+ struct intel_initial_plane_config plane_configs[])
{
- struct drm_device *dev = crtc->base.dev;
- struct drm_i915_private *dev_priv = to_i915(dev);
+ struct intel_initial_plane_config *plane_config =
+ &plane_configs[crtc->pipe];
struct intel_plane *plane =
to_intel_plane(crtc->base.primary);
struct intel_plane_state *plane_state =
@@ -201,7 +203,7 @@ intel_find_initial_plane_obj(struct intel_crtc *crtc,
if (intel_alloc_initial_plane_obj(crtc, plane_config))
fb = &plane_config->fb->base;
- else if (!intel_reuse_initial_plane_obj(dev_priv, plane_config, &fb))
+ else if (!intel_reuse_initial_plane_obj(crtc, plane_configs, &fb))
goto nofb;
plane_state->uapi.rotation = plane_config->rotation;
@@ -267,25 +269,36 @@ static void plane_config_fini(struct intel_initial_plane_config *plane_config)
}
}
-void intel_crtc_initial_plane_config(struct intel_crtc *crtc)
+void intel_initial_plane_config(struct drm_i915_private *i915)
{
- struct xe_device *xe = to_xe_device(crtc->base.dev);
- struct intel_initial_plane_config plane_config = {};
+ struct intel_initial_plane_config plane_configs[I915_MAX_PIPES] = {};
+ struct intel_crtc *crtc;
- /*
- * Note that reserving the BIOS fb up front prevents us
- * from stuffing other stolen allocations like the ring
- * on top. This prevents some ugliness at boot time, and
- * can even allow for smooth boot transitions if the BIOS
- * fb is large enough for the active pipe configuration.
- */
- xe->display.funcs.display->get_initial_plane_config(crtc, &plane_config);
+ for_each_intel_crtc(&i915->drm, crtc) {
+ struct intel_initial_plane_config *plane_config =
+ &plane_configs[crtc->pipe];
- /*
- * If the fb is shared between multiple heads, we'll
- * just get the first one.
- */
- intel_find_initial_plane_obj(crtc, &plane_config);
+ if (!to_intel_crtc_state(crtc->base.state)->uapi.active)
+ continue;
- plane_config_fini(&plane_config);
+ /*
+ * Note that reserving the BIOS fb up front prevents us
+ * from stuffing other stolen allocations like the ring
+ * on top. This prevents some ugliness at boot time, and
+ * can even allow for smooth boot transitions if the BIOS
+ * fb is large enough for the active pipe configuration.
+ */
+ i915->display.funcs.display->get_initial_plane_config(crtc, plane_config);
+
+ /*
+ * If the fb is shared between multiple heads, we'll
+ * just get the first one.
+ */
+ intel_find_initial_plane_obj(crtc, plane_configs);
+
+ if (i915->display.funcs.display->fixup_initial_plane_config(crtc, plane_config))
+ intel_crtc_wait_for_next_vblank(crtc);
+
+ plane_config_fini(plane_config);
+ }
}
diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
index 1cfa96167f..c74ceb550d 100644
--- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
+++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
@@ -56,6 +56,9 @@
#define MI_FLUSH_IMM_QW REG_FIELD_PREP(MI_FLUSH_DW_LEN_DW, 5 - 2)
#define MI_FLUSH_DW_USE_GTT REG_BIT(2)
+#define MI_LOAD_REGISTER_MEM (__MI_INSTR(0x29) | XE_INSTR_NUM_DW(4))
+#define MI_LRM_USE_GGTT REG_BIT(22)
+
#define MI_BATCH_BUFFER_START __MI_INSTR(0x31)
#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index 81b8362e93..deddc8be48 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -75,12 +75,17 @@
#define FF_THREAD_MODE(base) XE_REG((base) + 0xa0)
#define FF_TESSELATION_DOP_GATE_DISABLE BIT(19)
+#define RING_INT_SRC_RPT_PTR(base) XE_REG((base) + 0xa4)
#define RING_IMR(base) XE_REG((base) + 0xa8)
+#define RING_INT_STATUS_RPT_PTR(base) XE_REG((base) + 0xac)
#define RING_EIR(base) XE_REG((base) + 0xb0)
#define RING_EMR(base) XE_REG((base) + 0xb4)
#define RING_ESR(base) XE_REG((base) + 0xb8)
+#define INSTPM(base) XE_REG((base) + 0xc0, XE_REG_OPTION_MASKED)
+#define ENABLE_SEMAPHORE_POLL_BIT REG_BIT(13)
+
#define RING_CMD_CCTL(base) XE_REG((base) + 0xc4, XE_REG_OPTION_MASKED)
/*
* CMD_CCTL read/write fields take a MOCS value and _not_ a table index.
@@ -136,6 +141,7 @@
#define TAIL_ADDR 0x001FFFF8
#define RING_CTX_TIMESTAMP(base) XE_REG((base) + 0x3a8)
+#define CSBE_DEBUG_STATUS(base) XE_REG((base) + 0x3fc)
#define RING_FORCE_TO_NONPRIV(base, i) XE_REG(((base) + 0x4d0) + (i) * 4)
#define RING_FORCE_TO_NONPRIV_DENY REG_BIT(30)
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 1dd361046b..15ac2d284d 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -144,8 +144,12 @@
#define GSCPSMI_BASE XE_REG(0x880c)
+#define CCCHKNREG1 XE_REG_MCR(0x8828)
+#define ENCOMPPERFFIX REG_BIT(18)
+
/* Fuse readout registers for GT */
#define XEHP_FUSE4 XE_REG(0x9114)
+#define CFEG_WMTP_DISABLE REG_BIT(20)
#define CCS_EN_MASK REG_GENMASK(19, 16)
#define GT_L3_EXC_MASK REG_GENMASK(6, 4)
@@ -288,6 +292,9 @@
#define XEHP_L3NODEARBCFG XE_REG_MCR(0xb0b4)
#define XEHP_LNESPARE REG_BIT(19)
+#define L3SQCREG3 XE_REG_MCR(0xb108)
+#define COMPPWOVERFETCHEN REG_BIT(28)
+
#define XEHP_L3SQCREG5 XE_REG_MCR(0xb158)
#define L3_PWM_TIMER_INIT_VAL_MASK REG_GENMASK(9, 0)
@@ -344,6 +351,9 @@
#define ROW_CHICKEN3 XE_REG_MCR(0xe49c, XE_REG_OPTION_MASKED)
#define DIS_FIX_EOT1_FLUSH REG_BIT(9)
+#define TDL_TSL_CHICKEN XE_REG_MCR(0xe4c4, XE_REG_OPTION_MASKED)
+#define SLM_WMTP_RESTORE REG_BIT(11)
+
#define ROW_CHICKEN XE_REG_MCR(0xe4f0, XE_REG_OPTION_MASKED)
#define UGM_BACKUP_MODE REG_BIT(13)
#define MDQ_ARBITRATION_MODE REG_BIT(12)
@@ -430,6 +440,15 @@
#define VOLTAGE_MASK REG_GENMASK(10, 0)
#define GT_INTR_DW(x) XE_REG(0x190018 + ((x) * 4))
+#define INTR_GSC REG_BIT(31)
+#define INTR_GUC REG_BIT(25)
+#define INTR_MGUC REG_BIT(24)
+#define INTR_BCS8 REG_BIT(23)
+#define INTR_BCS(x) REG_BIT(15 - (x))
+#define INTR_CCS(x) REG_BIT(4 + (x))
+#define INTR_RCS0 REG_BIT(0)
+#define INTR_VECS(x) REG_BIT(31 - (x))
+#define INTR_VCS(x) REG_BIT(x)
#define RENDER_COPY_INTR_ENABLE XE_REG(0x190030)
#define VCS_VECS_INTR_ENABLE XE_REG(0x190034)
@@ -446,6 +465,7 @@
#define INTR_ENGINE_CLASS(x) REG_FIELD_GET(GENMASK(18, 16), x)
#define INTR_ENGINE_INTR(x) REG_FIELD_GET(GENMASK(15, 0), x)
#define OTHER_GUC_INSTANCE 0
+#define OTHER_GSC_HECI2_INSTANCE 3
#define OTHER_GSC_INSTANCE 6
#define IIR_REG_SELECTOR(x) XE_REG(0x190070 + ((x) * 4))
@@ -454,6 +474,7 @@
#define VCS0_VCS1_INTR_MASK XE_REG(0x1900a8)
#define VCS2_VCS3_INTR_MASK XE_REG(0x1900ac)
#define VECS0_VECS1_INTR_MASK XE_REG(0x1900d0)
+#define HECI2_RSVD_INTR_MASK XE_REG(0x1900e4)
#define GUC_SG_INTR_MASK XE_REG(0x1900e8)
#define GPM_WGBOXPERF_INTR_MASK XE_REG(0x1900ec)
#define GUNIT_GSC_INTR_MASK XE_REG(0x1900f4)
@@ -469,10 +490,4 @@
#define GT_CS_MASTER_ERROR_INTERRUPT REG_BIT(3)
#define GT_RENDER_USER_INTERRUPT REG_BIT(0)
-#define PVC_GT0_PACKAGE_ENERGY_STATUS XE_REG(0x281004)
-#define PVC_GT0_PACKAGE_RAPL_LIMIT XE_REG(0x281008)
-#define PVC_GT0_PACKAGE_POWER_SKU_UNIT XE_REG(0x281068)
-#define PVC_GT0_PLATFORM_ENERGY_STATUS XE_REG(0x28106c)
-#define PVC_GT0_PACKAGE_POWER_SKU XE_REG(0x281080)
-
#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
index 4be81abc86..1825d8f79d 100644
--- a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
+++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
@@ -14,4 +14,13 @@
#define CTX_PDP0_UDW (0x30 + 1)
#define CTX_PDP0_LDW (0x32 + 1)
+#define CTX_LRM_INT_MASK_ENABLE 0x50
+#define CTX_INT_MASK_ENABLE_REG (CTX_LRM_INT_MASK_ENABLE + 1)
+#define CTX_INT_MASK_ENABLE_PTR (CTX_LRM_INT_MASK_ENABLE + 2)
+#define CTX_LRI_INT_REPORT_PTR 0x55
+#define CTX_INT_STATUS_REPORT_REG (CTX_LRI_INT_REPORT_PTR + 1)
+#define CTX_INT_STATUS_REPORT_PTR (CTX_LRI_INT_REPORT_PTR + 2)
+#define CTX_INT_SRC_REPORT_REG (CTX_LRI_INT_REPORT_PTR + 3)
+#define CTX_INT_SRC_REPORT_PTR (CTX_LRI_INT_REPORT_PTR + 4)
+
#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_pcode_regs.h b/drivers/gpu/drm/xe/regs/xe_pcode_regs.h
new file mode 100644
index 0000000000..3dae858508
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_pcode_regs.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef _XE_PCODE_REGS_H_
+#define _XE_PCODE_REGS_H_
+
+#include "regs/xe_reg_defs.h"
+
+/*
+ * This file contains addresses of PCODE registers visible through GT MMIO space.
+ */
+
+#define PVC_GT0_PACKAGE_ENERGY_STATUS XE_REG(0x281004)
+#define PVC_GT0_PACKAGE_RAPL_LIMIT XE_REG(0x281008)
+#define PVC_GT0_PACKAGE_POWER_SKU_UNIT XE_REG(0x281068)
+#define PVC_GT0_PLATFORM_ENERGY_STATUS XE_REG(0x28106c)
+#define PVC_GT0_PACKAGE_POWER_SKU XE_REG(0x281080)
+
+#endif /* _XE_PCODE_REGS_H_ */
diff --git a/drivers/gpu/drm/xe/tests/Makefile b/drivers/gpu/drm/xe/tests/Makefile
index 39d8a08922..9d1d88af8b 100644
--- a/drivers/gpu/drm/xe/tests/Makefile
+++ b/drivers/gpu/drm/xe/tests/Makefile
@@ -1,10 +1,15 @@
# SPDX-License-Identifier: GPL-2.0
+# "live" kunit tests
obj-$(CONFIG_DRM_XE_KUNIT_TEST) += \
xe_bo_test.o \
xe_dma_buf_test.o \
xe_migrate_test.o \
- xe_mocs_test.o \
+ xe_mocs_test.o
+
+# Normal kunit tests
+obj-$(CONFIG_DRM_XE_KUNIT_TEST) += xe_test.o
+xe_test-y = xe_test_mod.o \
xe_pci_test.o \
xe_rtp_test.o \
xe_wa_test.o
diff --git a/drivers/gpu/drm/xe/tests/xe_guc_db_mgr_test.c b/drivers/gpu/drm/xe/tests/xe_guc_db_mgr_test.c
new file mode 100644
index 0000000000..a87a7b4b04
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_guc_db_mgr_test.c
@@ -0,0 +1,201 @@
+// SPDX-License-Identifier: GPL-2.0 AND MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <kunit/test.h>
+
+#include "xe_device.h"
+#include "xe_kunit_helpers.h"
+
+static int guc_dbm_test_init(struct kunit *test)
+{
+ struct xe_guc_db_mgr *dbm;
+
+ xe_kunit_helper_xe_device_test_init(test);
+ dbm = &xe_device_get_gt(test->priv, 0)->uc.guc.dbm;
+
+ mutex_init(dbm_mutex(dbm));
+ test->priv = dbm;
+ return 0;
+}
+
+static void test_empty(struct kunit *test)
+{
+ struct xe_guc_db_mgr *dbm = test->priv;
+
+ KUNIT_ASSERT_EQ(test, xe_guc_db_mgr_init(dbm, 0), 0);
+ KUNIT_ASSERT_EQ(test, dbm->count, 0);
+
+ mutex_lock(dbm_mutex(dbm));
+ KUNIT_EXPECT_LT(test, xe_guc_db_mgr_reserve_id_locked(dbm), 0);
+ mutex_unlock(dbm_mutex(dbm));
+
+ KUNIT_EXPECT_LT(test, xe_guc_db_mgr_reserve_range(dbm, 1, 0), 0);
+}
+
+static void test_default(struct kunit *test)
+{
+ struct xe_guc_db_mgr *dbm = test->priv;
+
+ KUNIT_ASSERT_EQ(test, xe_guc_db_mgr_init(dbm, ~0), 0);
+ KUNIT_ASSERT_EQ(test, dbm->count, GUC_NUM_DOORBELLS);
+}
+
+static const unsigned int guc_dbm_params[] = {
+ GUC_NUM_DOORBELLS / 64,
+ GUC_NUM_DOORBELLS / 32,
+ GUC_NUM_DOORBELLS / 8,
+ GUC_NUM_DOORBELLS,
+};
+
+static void uint_param_get_desc(const unsigned int *p, char *desc)
+{
+ snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%u", *p);
+}
+
+KUNIT_ARRAY_PARAM(guc_dbm, guc_dbm_params, uint_param_get_desc);
+
+static void test_size(struct kunit *test)
+{
+ const unsigned int *p = test->param_value;
+ struct xe_guc_db_mgr *dbm = test->priv;
+ unsigned int n;
+ int id;
+
+ KUNIT_ASSERT_EQ(test, xe_guc_db_mgr_init(dbm, *p), 0);
+ KUNIT_ASSERT_EQ(test, dbm->count, *p);
+
+ mutex_lock(dbm_mutex(dbm));
+ for (n = 0; n < *p; n++) {
+ KUNIT_EXPECT_GE(test, id = xe_guc_db_mgr_reserve_id_locked(dbm), 0);
+ KUNIT_EXPECT_LT(test, id, dbm->count);
+ }
+ KUNIT_EXPECT_LT(test, xe_guc_db_mgr_reserve_id_locked(dbm), 0);
+ mutex_unlock(dbm_mutex(dbm));
+
+ mutex_lock(dbm_mutex(dbm));
+ for (n = 0; n < *p; n++)
+ xe_guc_db_mgr_release_id_locked(dbm, n);
+ mutex_unlock(dbm_mutex(dbm));
+}
+
+static void test_reuse(struct kunit *test)
+{
+ const unsigned int *p = test->param_value;
+ struct xe_guc_db_mgr *dbm = test->priv;
+ unsigned int n;
+
+ KUNIT_ASSERT_EQ(test, xe_guc_db_mgr_init(dbm, *p), 0);
+
+ mutex_lock(dbm_mutex(dbm));
+ for (n = 0; n < *p; n++)
+ KUNIT_EXPECT_GE(test, xe_guc_db_mgr_reserve_id_locked(dbm), 0);
+ KUNIT_EXPECT_LT(test, xe_guc_db_mgr_reserve_id_locked(dbm), 0);
+ mutex_unlock(dbm_mutex(dbm));
+
+ mutex_lock(dbm_mutex(dbm));
+ for (n = 0; n < *p; n++) {
+ xe_guc_db_mgr_release_id_locked(dbm, n);
+ KUNIT_EXPECT_EQ(test, xe_guc_db_mgr_reserve_id_locked(dbm), n);
+ }
+ KUNIT_EXPECT_LT(test, xe_guc_db_mgr_reserve_id_locked(dbm), 0);
+ mutex_unlock(dbm_mutex(dbm));
+
+ mutex_lock(dbm_mutex(dbm));
+ for (n = 0; n < *p; n++)
+ xe_guc_db_mgr_release_id_locked(dbm, n);
+ mutex_unlock(dbm_mutex(dbm));
+}
+
+static void test_range_overlap(struct kunit *test)
+{
+ const unsigned int *p = test->param_value;
+ struct xe_guc_db_mgr *dbm = test->priv;
+ int id1, id2, id3;
+ unsigned int n;
+
+ KUNIT_ASSERT_EQ(test, xe_guc_db_mgr_init(dbm, ~0), 0);
+ KUNIT_ASSERT_LE(test, *p, dbm->count);
+
+ KUNIT_ASSERT_GE(test, id1 = xe_guc_db_mgr_reserve_range(dbm, *p, 0), 0);
+ for (n = 0; n < dbm->count - *p; n++) {
+ KUNIT_ASSERT_GE(test, id2 = xe_guc_db_mgr_reserve_range(dbm, 1, 0), 0);
+ KUNIT_ASSERT_NE(test, id2, id1);
+ KUNIT_ASSERT_NE_MSG(test, id2 < id1, id2 > id1 + *p - 1,
+ "id1=%d id2=%d", id1, id2);
+ }
+ KUNIT_ASSERT_LT(test, xe_guc_db_mgr_reserve_range(dbm, 1, 0), 0);
+ xe_guc_db_mgr_release_range(dbm, 0, dbm->count);
+
+ if (*p >= 1) {
+ KUNIT_ASSERT_GE(test, id1 = xe_guc_db_mgr_reserve_range(dbm, 1, 0), 0);
+ KUNIT_ASSERT_GE(test, id2 = xe_guc_db_mgr_reserve_range(dbm, *p - 1, 0), 0);
+ KUNIT_ASSERT_NE(test, id2, id1);
+ KUNIT_ASSERT_NE_MSG(test, id1 < id2, id1 > id2 + *p - 2,
+ "id1=%d id2=%d", id1, id2);
+ for (n = 0; n < dbm->count - *p; n++) {
+ KUNIT_ASSERT_GE(test, id3 = xe_guc_db_mgr_reserve_range(dbm, 1, 0), 0);
+ KUNIT_ASSERT_NE(test, id3, id1);
+ KUNIT_ASSERT_NE(test, id3, id2);
+ KUNIT_ASSERT_NE_MSG(test, id3 < id2, id3 > id2 + *p - 2,
+ "id3=%d id2=%d", id3, id2);
+ }
+ KUNIT_ASSERT_LT(test, xe_guc_db_mgr_reserve_range(dbm, 1, 0), 0);
+ xe_guc_db_mgr_release_range(dbm, 0, dbm->count);
+ }
+}
+
+static void test_range_compact(struct kunit *test)
+{
+ const unsigned int *p = test->param_value;
+ struct xe_guc_db_mgr *dbm = test->priv;
+ unsigned int n;
+
+ KUNIT_ASSERT_EQ(test, xe_guc_db_mgr_init(dbm, ~0), 0);
+ KUNIT_ASSERT_NE(test, *p, 0);
+ KUNIT_ASSERT_LE(test, *p, dbm->count);
+ if (dbm->count % *p)
+ kunit_skip(test, "must be divisible");
+
+ KUNIT_ASSERT_GE(test, xe_guc_db_mgr_reserve_range(dbm, *p, 0), 0);
+ for (n = 1; n < dbm->count / *p; n++)
+ KUNIT_ASSERT_GE(test, xe_guc_db_mgr_reserve_range(dbm, *p, 0), 0);
+ KUNIT_ASSERT_LT(test, xe_guc_db_mgr_reserve_range(dbm, 1, 0), 0);
+ xe_guc_db_mgr_release_range(dbm, 0, dbm->count);
+}
+
+static void test_range_spare(struct kunit *test)
+{
+ const unsigned int *p = test->param_value;
+ struct xe_guc_db_mgr *dbm = test->priv;
+ int id;
+
+ KUNIT_ASSERT_EQ(test, xe_guc_db_mgr_init(dbm, ~0), 0);
+ KUNIT_ASSERT_LE(test, *p, dbm->count);
+
+ KUNIT_ASSERT_LT(test, xe_guc_db_mgr_reserve_range(dbm, *p, dbm->count), 0);
+ KUNIT_ASSERT_LT(test, xe_guc_db_mgr_reserve_range(dbm, *p, dbm->count - *p + 1), 0);
+ KUNIT_ASSERT_EQ(test, id = xe_guc_db_mgr_reserve_range(dbm, *p, dbm->count - *p), 0);
+ KUNIT_ASSERT_LT(test, xe_guc_db_mgr_reserve_range(dbm, 1, dbm->count - *p), 0);
+ xe_guc_db_mgr_release_range(dbm, id, *p);
+}
+
+static struct kunit_case guc_dbm_test_cases[] = {
+ KUNIT_CASE(test_empty),
+ KUNIT_CASE(test_default),
+ KUNIT_CASE_PARAM(test_size, guc_dbm_gen_params),
+ KUNIT_CASE_PARAM(test_reuse, guc_dbm_gen_params),
+ KUNIT_CASE_PARAM(test_range_overlap, guc_dbm_gen_params),
+ KUNIT_CASE_PARAM(test_range_compact, guc_dbm_gen_params),
+ KUNIT_CASE_PARAM(test_range_spare, guc_dbm_gen_params),
+ {}
+};
+
+static struct kunit_suite guc_dbm_suite = {
+ .name = "guc_dbm",
+ .test_cases = guc_dbm_test_cases,
+ .init = guc_dbm_test_init,
+};
+
+kunit_test_suites(&guc_dbm_suite);
diff --git a/drivers/gpu/drm/xe/tests/xe_guc_relay_test.c b/drivers/gpu/drm/xe/tests/xe_guc_relay_test.c
new file mode 100644
index 0000000000..13701451b9
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_guc_relay_test.c
@@ -0,0 +1,522 @@
+// SPDX-License-Identifier: GPL-2.0 AND MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <kunit/static_stub.h>
+#include <kunit/test.h>
+#include <kunit/test-bug.h>
+
+#include "xe_device.h"
+#include "xe_kunit_helpers.h"
+#include "xe_pci_test.h"
+
+#define TEST_RID 1234
+#define TEST_VFID 5
+#define TEST_LEN 6
+#define TEST_ACTION 0xa
+#define TEST_DATA(n) (0xd0 + (n))
+
+static int replacement_relay_get_totalvfs(struct xe_guc_relay *relay)
+{
+ return TEST_VFID;
+}
+
+static int relay_test_init(struct kunit *test)
+{
+ struct xe_pci_fake_data fake = {
+ .sriov_mode = XE_SRIOV_MODE_PF,
+ .platform = XE_TIGERLAKE, /* some random platform */
+ .subplatform = XE_SUBPLATFORM_NONE,
+ };
+ struct xe_guc_relay *relay;
+ struct xe_device *xe;
+
+ test->priv = &fake;
+ xe_kunit_helper_xe_device_test_init(test);
+
+ xe = test->priv;
+ KUNIT_ASSERT_EQ(test, xe_sriov_init(xe), 0);
+
+ relay = &xe_device_get_gt(xe, 0)->uc.guc.relay;
+ kunit_activate_static_stub(test, relay_get_totalvfs,
+ replacement_relay_get_totalvfs);
+
+ KUNIT_ASSERT_EQ(test, xe_guc_relay_init(relay), 0);
+ KUNIT_EXPECT_TRUE(test, relay_is_ready(relay));
+ relay->last_rid = TEST_RID - 1;
+
+ test->priv = relay;
+ return 0;
+}
+
+static const u32 TEST_MSG[TEST_LEN] = {
+ FIELD_PREP_CONST(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+ FIELD_PREP_CONST(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) |
+ FIELD_PREP_CONST(GUC_HXG_EVENT_MSG_0_ACTION, TEST_ACTION) |
+ FIELD_PREP_CONST(GUC_HXG_EVENT_MSG_0_DATA0, TEST_DATA(0)),
+ TEST_DATA(1), TEST_DATA(2), TEST_DATA(3), TEST_DATA(4),
+};
+
+static int replacement_xe_guc_ct_send_recv_always_fails(struct xe_guc_ct *ct,
+ const u32 *msg, u32 len,
+ u32 *response_buffer)
+{
+ struct kunit *test = kunit_get_current_test();
+
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ct);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, msg);
+ KUNIT_ASSERT_GE(test, len, GUC_HXG_MSG_MIN_LEN);
+
+ return -ECOMM;
+}
+
+static int replacement_xe_guc_ct_send_recv_expects_pf2guc_relay(struct xe_guc_ct *ct,
+ const u32 *msg, u32 len,
+ u32 *response_buffer)
+{
+ struct kunit *test = kunit_get_current_test();
+
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ct);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, msg);
+ KUNIT_ASSERT_GE(test, len, PF2GUC_RELAY_TO_VF_REQUEST_MSG_MIN_LEN);
+ KUNIT_ASSERT_EQ(test, len, PF2GUC_RELAY_TO_VF_REQUEST_MSG_MIN_LEN + TEST_LEN);
+ KUNIT_EXPECT_EQ(test, GUC_HXG_ORIGIN_HOST, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]));
+ KUNIT_EXPECT_EQ(test, GUC_HXG_TYPE_REQUEST, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]));
+ KUNIT_EXPECT_EQ(test, XE_GUC_ACTION_PF2GUC_RELAY_TO_VF,
+ FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, msg[0]));
+ KUNIT_EXPECT_EQ(test, TEST_VFID,
+ FIELD_GET(PF2GUC_RELAY_TO_VF_REQUEST_MSG_1_VFID, msg[1]));
+ KUNIT_EXPECT_EQ(test, TEST_RID,
+ FIELD_GET(PF2GUC_RELAY_TO_VF_REQUEST_MSG_2_RELAY_ID, msg[2]));
+ KUNIT_EXPECT_MEMEQ(test, TEST_MSG, msg + PF2GUC_RELAY_TO_VF_REQUEST_MSG_MIN_LEN,
+ sizeof(u32) * TEST_LEN);
+ return 0;
+}
+
+static const u32 test_guc2pf[GUC2PF_RELAY_FROM_VF_EVENT_MSG_MAX_LEN] = {
+ /* transport */
+ FIELD_PREP_CONST(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_GUC) |
+ FIELD_PREP_CONST(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) |
+ FIELD_PREP_CONST(GUC_HXG_EVENT_MSG_0_ACTION, XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF),
+ FIELD_PREP_CONST(GUC2PF_RELAY_FROM_VF_EVENT_MSG_1_VFID, TEST_VFID),
+ FIELD_PREP_CONST(GUC2PF_RELAY_FROM_VF_EVENT_MSG_2_RELAY_ID, TEST_RID),
+ /* payload */
+ FIELD_PREP_CONST(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+ FIELD_PREP_CONST(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_SUCCESS),
+};
+
+static const u32 test_guc2vf[GUC2VF_RELAY_FROM_PF_EVENT_MSG_MAX_LEN] = {
+ /* transport */
+ FIELD_PREP_CONST(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_GUC) |
+ FIELD_PREP_CONST(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) |
+ FIELD_PREP_CONST(GUC_HXG_EVENT_MSG_0_ACTION, XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF),
+ FIELD_PREP_CONST(GUC2VF_RELAY_FROM_PF_EVENT_MSG_1_RELAY_ID, TEST_RID),
+ /* payload */
+ FIELD_PREP_CONST(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+ FIELD_PREP_CONST(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_SUCCESS),
+};
+
+static void pf_rejects_guc2pf_too_short(struct kunit *test)
+{
+ const u32 len = GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN - 1;
+ struct xe_guc_relay *relay = test->priv;
+ const u32 *msg = test_guc2pf;
+
+ KUNIT_ASSERT_EQ(test, -EPROTO, xe_guc_relay_process_guc2pf(relay, msg, len));
+}
+
+static void pf_rejects_guc2pf_too_long(struct kunit *test)
+{
+ const u32 len = GUC2PF_RELAY_FROM_VF_EVENT_MSG_MAX_LEN + 1;
+ struct xe_guc_relay *relay = test->priv;
+ const u32 *msg = test_guc2pf;
+
+ KUNIT_ASSERT_EQ(test, -EMSGSIZE, xe_guc_relay_process_guc2pf(relay, msg, len));
+}
+
+static void pf_rejects_guc2pf_no_payload(struct kunit *test)
+{
+ const u32 len = GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN;
+ struct xe_guc_relay *relay = test->priv;
+ const u32 *msg = test_guc2pf;
+
+ KUNIT_ASSERT_EQ(test, -EPROTO, xe_guc_relay_process_guc2pf(relay, msg, len));
+}
+
+static void pf_fails_no_payload(struct kunit *test)
+{
+ struct xe_guc_relay *relay = test->priv;
+ const u32 msg = 0;
+
+ KUNIT_ASSERT_EQ(test, -EPROTO, relay_process_msg(relay, TEST_VFID, TEST_RID, &msg, 0));
+}
+
+static void pf_fails_bad_origin(struct kunit *test)
+{
+ struct xe_guc_relay *relay = test->priv;
+ static const u32 msg[] = {
+ FIELD_PREP_CONST(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_GUC) |
+ FIELD_PREP_CONST(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_SUCCESS),
+ };
+ u32 len = ARRAY_SIZE(msg);
+
+ KUNIT_ASSERT_EQ(test, -EPROTO, relay_process_msg(relay, TEST_VFID, TEST_RID, msg, len));
+}
+
+static void pf_fails_bad_type(struct kunit *test)
+{
+ struct xe_guc_relay *relay = test->priv;
+ const u32 msg[] = {
+ FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, 4), /* only 4 is undefined */
+ };
+ u32 len = ARRAY_SIZE(msg);
+
+ KUNIT_ASSERT_EQ(test, -EBADRQC, relay_process_msg(relay, TEST_VFID, TEST_RID, msg, len));
+}
+
+static void pf_txn_reports_error(struct kunit *test)
+{
+ struct xe_guc_relay *relay = test->priv;
+ struct relay_transaction *txn;
+
+ txn = __relay_get_transaction(relay, false, TEST_VFID, TEST_RID,
+ TEST_MSG, TEST_LEN, NULL, 0);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, txn);
+
+ kunit_activate_static_stub(test, xe_guc_ct_send_recv,
+ replacement_xe_guc_ct_send_recv_always_fails);
+ KUNIT_EXPECT_EQ(test, -ECOMM, relay_send_transaction(relay, txn));
+
+ relay_release_transaction(relay, txn);
+}
+
+static void pf_txn_sends_pf2guc(struct kunit *test)
+{
+ struct xe_guc_relay *relay = test->priv;
+ struct relay_transaction *txn;
+
+ txn = __relay_get_transaction(relay, false, TEST_VFID, TEST_RID,
+ TEST_MSG, TEST_LEN, NULL, 0);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, txn);
+
+ kunit_activate_static_stub(test, xe_guc_ct_send_recv,
+ replacement_xe_guc_ct_send_recv_expects_pf2guc_relay);
+ KUNIT_ASSERT_EQ(test, 0, relay_send_transaction(relay, txn));
+
+ relay_release_transaction(relay, txn);
+}
+
+static void pf_sends_pf2guc(struct kunit *test)
+{
+ struct xe_guc_relay *relay = test->priv;
+
+ kunit_activate_static_stub(test, xe_guc_ct_send_recv,
+ replacement_xe_guc_ct_send_recv_expects_pf2guc_relay);
+ KUNIT_ASSERT_EQ(test, 0,
+ xe_guc_relay_send_to_vf(relay, TEST_VFID,
+ TEST_MSG, TEST_LEN, NULL, 0));
+}
+
+static int replacement_xe_guc_ct_send_recv_loopback_relay(struct xe_guc_ct *ct,
+ const u32 *msg, u32 len,
+ u32 *response_buffer)
+{
+ struct kunit *test = kunit_get_current_test();
+ struct xe_guc_relay *relay = test->priv;
+ u32 *reply = kunit_kzalloc(test, len * sizeof(u32), GFP_KERNEL);
+ int (*guc2relay)(struct xe_guc_relay *, const u32 *, u32);
+ u32 action;
+ int err;
+
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ct);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, msg);
+ KUNIT_ASSERT_GE(test, len, GUC_HXG_MSG_MIN_LEN);
+ KUNIT_ASSERT_EQ(test, GUC_HXG_TYPE_REQUEST,
+ FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]));
+ KUNIT_ASSERT_GE(test, len, GUC_HXG_REQUEST_MSG_MIN_LEN);
+ KUNIT_ASSERT_NOT_NULL(test, reply);
+
+ switch (FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, msg[0])) {
+ case XE_GUC_ACTION_PF2GUC_RELAY_TO_VF:
+ KUNIT_ASSERT_GE(test, len, PF2GUC_RELAY_TO_VF_REQUEST_MSG_MIN_LEN);
+ action = XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF;
+ guc2relay = xe_guc_relay_process_guc2pf;
+ break;
+ case XE_GUC_ACTION_VF2GUC_RELAY_TO_PF:
+ KUNIT_ASSERT_GE(test, len, VF2GUC_RELAY_TO_PF_REQUEST_MSG_MIN_LEN);
+ action = XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF;
+ guc2relay = xe_guc_relay_process_guc2vf;
+ break;
+ default:
+ KUNIT_FAIL(test, "bad RELAY action %#x", msg[0]);
+ return -EINVAL;
+ }
+
+ reply[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_GUC) |
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) |
+ FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION, action);
+ memcpy(reply + 1, msg + 1, sizeof(u32) * (len - 1));
+
+ err = guc2relay(relay, reply, len);
+ KUNIT_EXPECT_EQ(test, err, 0);
+
+ return err;
+}
+
+static void test_requires_relay_testloop(struct kunit *test)
+{
+ /*
+ * The debug relay action GUC_RELAY_ACTION_VFXPF_TESTLOOP is available
+ * only on builds with CONFIG_DRM_XE_DEBUG_SRIOV enabled.
+ * See "kunit.py --kconfig_add" option if it's missing.
+ */
+ if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV))
+ kunit_skip(test, "requires %s\n", __stringify(CONFIG_DRM_XE_DEBUG_SRIOV));
+}
+
+static void pf_loopback_nop(struct kunit *test)
+{
+ struct xe_guc_relay *relay = test->priv;
+ u32 request[] = {
+ FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+ FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_RELAY_ACTION_VFXPF_TESTLOOP) |
+ FIELD_PREP(GUC_HXG_REQUEST_MSG_0_DATA0, VFXPF_TESTLOOP_OPCODE_NOP),
+ };
+ u32 response[GUC_HXG_RESPONSE_MSG_MIN_LEN];
+ int ret;
+
+ test_requires_relay_testloop(test);
+
+ kunit_activate_static_stub(test, relay_kick_worker, relay_process_incoming_action);
+ kunit_activate_static_stub(test, xe_guc_ct_send_recv,
+ replacement_xe_guc_ct_send_recv_loopback_relay);
+ ret = xe_guc_relay_send_to_vf(relay, TEST_VFID,
+ request, ARRAY_SIZE(request),
+ response, ARRAY_SIZE(response));
+ KUNIT_ASSERT_EQ(test, ret, GUC_HXG_RESPONSE_MSG_MIN_LEN);
+ KUNIT_EXPECT_EQ(test, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, response[0]),
+ GUC_HXG_ORIGIN_HOST);
+ KUNIT_EXPECT_EQ(test, FIELD_GET(GUC_HXG_MSG_0_TYPE, response[0]),
+ GUC_HXG_TYPE_RESPONSE_SUCCESS);
+ KUNIT_EXPECT_EQ(test, FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, response[0]), 0);
+}
+
+static void pf_loopback_echo(struct kunit *test)
+{
+ struct xe_guc_relay *relay = test->priv;
+ u32 request[] = {
+ FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+ FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_RELAY_ACTION_VFXPF_TESTLOOP) |
+ FIELD_PREP(GUC_HXG_REQUEST_MSG_0_DATA0, VFXPF_TESTLOOP_OPCODE_ECHO),
+ TEST_DATA(1), TEST_DATA(2), TEST_DATA(3), TEST_DATA(4),
+ };
+ u32 response[ARRAY_SIZE(request)];
+ unsigned int n;
+ int ret;
+
+ test_requires_relay_testloop(test);
+
+ kunit_activate_static_stub(test, relay_kick_worker, relay_process_incoming_action);
+ kunit_activate_static_stub(test, xe_guc_ct_send_recv,
+ replacement_xe_guc_ct_send_recv_loopback_relay);
+ ret = xe_guc_relay_send_to_vf(relay, TEST_VFID,
+ request, ARRAY_SIZE(request),
+ response, ARRAY_SIZE(response));
+ KUNIT_ASSERT_EQ(test, ret, ARRAY_SIZE(response));
+ KUNIT_EXPECT_EQ(test, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, response[0]),
+ GUC_HXG_ORIGIN_HOST);
+ KUNIT_EXPECT_EQ(test, FIELD_GET(GUC_HXG_MSG_0_TYPE, response[0]),
+ GUC_HXG_TYPE_RESPONSE_SUCCESS);
+ KUNIT_EXPECT_EQ(test, FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, response[0]),
+ ARRAY_SIZE(response));
+ for (n = GUC_HXG_RESPONSE_MSG_MIN_LEN; n < ret; n++)
+ KUNIT_EXPECT_EQ(test, request[n], response[n]);
+}
+
+static void pf_loopback_fail(struct kunit *test)
+{
+ struct xe_guc_relay *relay = test->priv;
+ u32 request[] = {
+ FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+ FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_RELAY_ACTION_VFXPF_TESTLOOP) |
+ FIELD_PREP(GUC_HXG_REQUEST_MSG_0_DATA0, VFXPF_TESTLOOP_OPCODE_FAIL),
+ };
+ u32 response[GUC_HXG_RESPONSE_MSG_MIN_LEN];
+ int ret;
+
+ test_requires_relay_testloop(test);
+
+ kunit_activate_static_stub(test, relay_kick_worker, relay_process_incoming_action);
+ kunit_activate_static_stub(test, xe_guc_ct_send_recv,
+ replacement_xe_guc_ct_send_recv_loopback_relay);
+ ret = xe_guc_relay_send_to_vf(relay, TEST_VFID,
+ request, ARRAY_SIZE(request),
+ response, ARRAY_SIZE(response));
+ KUNIT_ASSERT_EQ(test, ret, -EREMOTEIO);
+}
+
+static void pf_loopback_busy(struct kunit *test)
+{
+ struct xe_guc_relay *relay = test->priv;
+ u32 request[] = {
+ FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+ FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_RELAY_ACTION_VFXPF_TESTLOOP) |
+ FIELD_PREP(GUC_HXG_REQUEST_MSG_0_DATA0, VFXPF_TESTLOOP_OPCODE_BUSY),
+ TEST_DATA(0xb),
+ };
+ u32 response[GUC_HXG_RESPONSE_MSG_MIN_LEN];
+ int ret;
+
+ test_requires_relay_testloop(test);
+
+ kunit_activate_static_stub(test, relay_testonly_nop, relay_process_incoming_action);
+ kunit_activate_static_stub(test, relay_kick_worker, relay_process_incoming_action);
+ kunit_activate_static_stub(test, xe_guc_ct_send_recv,
+ replacement_xe_guc_ct_send_recv_loopback_relay);
+ ret = xe_guc_relay_send_to_vf(relay, TEST_VFID,
+ request, ARRAY_SIZE(request),
+ response, ARRAY_SIZE(response));
+ KUNIT_ASSERT_EQ(test, ret, GUC_HXG_RESPONSE_MSG_MIN_LEN);
+}
+
+static void pf_loopback_retry(struct kunit *test)
+{
+ struct xe_guc_relay *relay = test->priv;
+ u32 request[] = {
+ FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+ FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_RELAY_ACTION_VFXPF_TESTLOOP) |
+ FIELD_PREP(GUC_HXG_REQUEST_MSG_0_DATA0, VFXPF_TESTLOOP_OPCODE_RETRY),
+ TEST_DATA(0xd), TEST_DATA(0xd),
+ };
+ u32 response[GUC_HXG_RESPONSE_MSG_MIN_LEN];
+ int ret;
+
+ test_requires_relay_testloop(test);
+
+ kunit_activate_static_stub(test, relay_kick_worker, relay_process_incoming_action);
+ kunit_activate_static_stub(test, xe_guc_ct_send_recv,
+ replacement_xe_guc_ct_send_recv_loopback_relay);
+ ret = xe_guc_relay_send_to_vf(relay, TEST_VFID,
+ request, ARRAY_SIZE(request),
+ response, ARRAY_SIZE(response));
+ KUNIT_ASSERT_EQ(test, ret, GUC_HXG_RESPONSE_MSG_MIN_LEN);
+}
+
+static struct kunit_case pf_relay_test_cases[] = {
+ KUNIT_CASE(pf_rejects_guc2pf_too_short),
+ KUNIT_CASE(pf_rejects_guc2pf_too_long),
+ KUNIT_CASE(pf_rejects_guc2pf_no_payload),
+ KUNIT_CASE(pf_fails_no_payload),
+ KUNIT_CASE(pf_fails_bad_origin),
+ KUNIT_CASE(pf_fails_bad_type),
+ KUNIT_CASE(pf_txn_reports_error),
+ KUNIT_CASE(pf_txn_sends_pf2guc),
+ KUNIT_CASE(pf_sends_pf2guc),
+ KUNIT_CASE(pf_loopback_nop),
+ KUNIT_CASE(pf_loopback_echo),
+ KUNIT_CASE(pf_loopback_fail),
+ KUNIT_CASE_SLOW(pf_loopback_busy),
+ KUNIT_CASE_SLOW(pf_loopback_retry),
+ {}
+};
+
+static struct kunit_suite pf_relay_suite = {
+ .name = "pf_relay",
+ .test_cases = pf_relay_test_cases,
+ .init = relay_test_init,
+};
+
+static void vf_rejects_guc2vf_too_short(struct kunit *test)
+{
+ const u32 len = GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN - 1;
+ struct xe_guc_relay *relay = test->priv;
+ const u32 *msg = test_guc2vf;
+
+ KUNIT_ASSERT_EQ(test, -EPROTO, xe_guc_relay_process_guc2vf(relay, msg, len));
+}
+
+static void vf_rejects_guc2vf_too_long(struct kunit *test)
+{
+ const u32 len = GUC2VF_RELAY_FROM_PF_EVENT_MSG_MAX_LEN + 1;
+ struct xe_guc_relay *relay = test->priv;
+ const u32 *msg = test_guc2vf;
+
+ KUNIT_ASSERT_EQ(test, -EMSGSIZE, xe_guc_relay_process_guc2vf(relay, msg, len));
+}
+
+static void vf_rejects_guc2vf_no_payload(struct kunit *test)
+{
+ const u32 len = GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN;
+ struct xe_guc_relay *relay = test->priv;
+ const u32 *msg = test_guc2vf;
+
+ KUNIT_ASSERT_EQ(test, -EPROTO, xe_guc_relay_process_guc2vf(relay, msg, len));
+}
+
+static struct kunit_case vf_relay_test_cases[] = {
+ KUNIT_CASE(vf_rejects_guc2vf_too_short),
+ KUNIT_CASE(vf_rejects_guc2vf_too_long),
+ KUNIT_CASE(vf_rejects_guc2vf_no_payload),
+ {}
+};
+
+static struct kunit_suite vf_relay_suite = {
+ .name = "vf_relay",
+ .test_cases = vf_relay_test_cases,
+ .init = relay_test_init,
+};
+
+static void xe_drops_guc2pf_if_not_ready(struct kunit *test)
+{
+ struct xe_device *xe = test->priv;
+ struct xe_guc_relay *relay = &xe_device_get_gt(xe, 0)->uc.guc.relay;
+ const u32 *msg = test_guc2pf;
+ u32 len = GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN + GUC_RELAY_MSG_MIN_LEN;
+
+ KUNIT_ASSERT_EQ(test, -ENODEV, xe_guc_relay_process_guc2pf(relay, msg, len));
+}
+
+static void xe_drops_guc2vf_if_not_ready(struct kunit *test)
+{
+ struct xe_device *xe = test->priv;
+ struct xe_guc_relay *relay = &xe_device_get_gt(xe, 0)->uc.guc.relay;
+ const u32 *msg = test_guc2vf;
+ u32 len = GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN + GUC_RELAY_MSG_MIN_LEN;
+
+ KUNIT_ASSERT_EQ(test, -ENODEV, xe_guc_relay_process_guc2vf(relay, msg, len));
+}
+
+static void xe_rejects_send_if_not_ready(struct kunit *test)
+{
+ struct xe_device *xe = test->priv;
+ struct xe_guc_relay *relay = &xe_device_get_gt(xe, 0)->uc.guc.relay;
+ u32 msg[GUC_RELAY_MSG_MIN_LEN];
+ u32 len = ARRAY_SIZE(msg);
+
+ KUNIT_ASSERT_EQ(test, -ENODEV, xe_guc_relay_send_to_pf(relay, msg, len, NULL, 0));
+ KUNIT_ASSERT_EQ(test, -ENODEV, relay_send_to(relay, TEST_VFID, msg, len, NULL, 0));
+}
+
+static struct kunit_case no_relay_test_cases[] = {
+ KUNIT_CASE(xe_drops_guc2pf_if_not_ready),
+ KUNIT_CASE(xe_drops_guc2vf_if_not_ready),
+ KUNIT_CASE(xe_rejects_send_if_not_ready),
+ {}
+};
+
+static struct kunit_suite no_relay_suite = {
+ .name = "no_relay",
+ .test_cases = no_relay_test_cases,
+ .init = xe_kunit_helper_xe_device_test_init,
+};
+
+kunit_test_suites(&no_relay_suite,
+ &pf_relay_suite,
+ &vf_relay_suite);
diff --git a/drivers/gpu/drm/xe/tests/xe_kunit_helpers.c b/drivers/gpu/drm/xe/tests/xe_kunit_helpers.c
new file mode 100644
index 0000000000..fefe79b3b7
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_kunit_helpers.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0 AND MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <kunit/test.h>
+#include <kunit/static_stub.h>
+#include <kunit/visibility.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_kunit_helpers.h>
+
+#include "tests/xe_kunit_helpers.h"
+#include "tests/xe_pci_test.h"
+#include "xe_device_types.h"
+
+/**
+ * xe_kunit_helper_alloc_xe_device - Allocate a &xe_device for a KUnit test.
+ * @test: the &kunit where this &xe_device will be used
+ * @dev: The parent device object
+ *
+ * This function allocates xe_device using drm_kunit_helper_alloc_device().
+ * The xe_device allocation is managed by the test.
+ *
+ * @dev should be allocated using drm_kunit_helper_alloc_device().
+ *
+ * This function uses KUNIT_ASSERT to detect any allocation failures.
+ *
+ * Return: A pointer to the new &xe_device.
+ */
+struct xe_device *xe_kunit_helper_alloc_xe_device(struct kunit *test,
+ struct device *dev)
+{
+ struct xe_device *xe;
+
+ xe = drm_kunit_helper_alloc_drm_device(test, dev,
+ struct xe_device,
+ drm, DRIVER_GEM);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe);
+ return xe;
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_kunit_helper_alloc_xe_device);
+
+static void kunit_action_restore_priv(void *priv)
+{
+ struct kunit *test = kunit_get_current_test();
+
+ test->priv = priv;
+}
+
+/**
+ * xe_kunit_helper_xe_device_test_init - Prepare a &xe_device for a KUnit test.
+ * @test: the &kunit where this fake &xe_device will be used
+ *
+ * This function allocates and initializes a fake &xe_device and stores its
+ * pointer as &kunit.priv to allow the test code to access it.
+ *
+ * This function can be directly used as custom implementation of
+ * &kunit_suite.init.
+ *
+ * It is possible to prepare specific variant of the fake &xe_device by passing
+ * in &kunit.priv pointer to the struct xe_pci_fake_data supplemented with
+ * desired parameters prior to calling this function.
+ *
+ * This function uses KUNIT_ASSERT to detect any failures.
+ *
+ * Return: Always 0.
+ */
+int xe_kunit_helper_xe_device_test_init(struct kunit *test)
+{
+ struct xe_device *xe;
+ struct device *dev;
+ int err;
+
+ dev = drm_kunit_helper_alloc_device(test);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
+
+ xe = xe_kunit_helper_alloc_xe_device(test, dev);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe);
+
+ err = xe_pci_fake_device_init(xe);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ err = kunit_add_action_or_reset(test, kunit_action_restore_priv, test->priv);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ test->priv = xe;
+ return 0;
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_kunit_helper_xe_device_test_init);
diff --git a/drivers/gpu/drm/xe/tests/xe_kunit_helpers.h b/drivers/gpu/drm/xe/tests/xe_kunit_helpers.h
new file mode 100644
index 0000000000..067a1babf0
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_kunit_helpers.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 AND MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_KUNIT_HELPERS_H_
+#define _XE_KUNIT_HELPERS_H_
+
+struct device;
+struct kunit;
+struct xe_device;
+
+struct xe_device *xe_kunit_helper_alloc_xe_device(struct kunit *test,
+ struct device *dev);
+int xe_kunit_helper_xe_device_test_init(struct kunit *test);
+
+#endif
diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c
index 7dd34f94e8..df5c36b70a 100644
--- a/drivers/gpu/drm/xe/tests/xe_mocs.c
+++ b/drivers/gpu/drm/xe/tests/xe_mocs.c
@@ -128,3 +128,39 @@ void xe_live_mocs_kernel_kunit(struct kunit *test)
xe_call_for_each_device(mocs_kernel_test_run_device);
}
EXPORT_SYMBOL_IF_KUNIT(xe_live_mocs_kernel_kunit);
+
+static int mocs_reset_test_run_device(struct xe_device *xe)
+{
+ /* Check the mocs setup is retained over GT reset */
+
+ struct live_mocs mocs;
+ struct xe_gt *gt;
+ unsigned int flags;
+ int id;
+ struct kunit *test = xe_cur_kunit();
+
+ for_each_gt(gt, xe, id) {
+ flags = live_mocs_init(&mocs, gt);
+ kunit_info(test, "mocs_reset_test before reset\n");
+ if (flags & HAS_GLOBAL_MOCS)
+ read_mocs_table(gt, &mocs.table);
+ if (flags & HAS_LNCF_MOCS)
+ read_l3cc_table(gt, &mocs.table);
+
+ xe_gt_reset_async(gt);
+ flush_work(&gt->reset.worker);
+
+ kunit_info(test, "mocs_reset_test after reset\n");
+ if (flags & HAS_GLOBAL_MOCS)
+ read_mocs_table(gt, &mocs.table);
+ if (flags & HAS_LNCF_MOCS)
+ read_l3cc_table(gt, &mocs.table);
+ }
+ return 0;
+}
+
+void xe_live_mocs_reset_kunit(struct kunit *test)
+{
+ xe_call_for_each_device(mocs_reset_test_run_device);
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_live_mocs_reset_kunit);
diff --git a/drivers/gpu/drm/xe/tests/xe_mocs_test.c b/drivers/gpu/drm/xe/tests/xe_mocs_test.c
index 421b819fd4..ee40f31e1e 100644
--- a/drivers/gpu/drm/xe/tests/xe_mocs_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_mocs_test.c
@@ -9,6 +9,7 @@
static struct kunit_case xe_mocs_tests[] = {
KUNIT_CASE(xe_live_mocs_kernel_kunit),
+ KUNIT_CASE(xe_live_mocs_reset_kunit),
{}
};
diff --git a/drivers/gpu/drm/xe/tests/xe_mocs_test.h b/drivers/gpu/drm/xe/tests/xe_mocs_test.h
index 7faa3575e6..e7699d4954 100644
--- a/drivers/gpu/drm/xe/tests/xe_mocs_test.h
+++ b/drivers/gpu/drm/xe/tests/xe_mocs_test.h
@@ -9,5 +9,6 @@
struct kunit;
void xe_live_mocs_kernel_kunit(struct kunit *test);
+void xe_live_mocs_reset_kunit(struct kunit *test);
#endif
diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c
index 602793644f..f62809ca8b 100644
--- a/drivers/gpu/drm/xe/tests/xe_pci.c
+++ b/drivers/gpu/drm/xe/tests/xe_pci.c
@@ -156,6 +156,9 @@ int xe_pci_fake_device_init(struct xe_device *xe)
return -ENODEV;
done:
+ xe->sriov.__mode = data && data->sriov_mode ?
+ data->sriov_mode : XE_SRIOV_MODE_NONE;
+
kunit_activate_static_stub(test, read_gmdid, fake_read_gmdid);
xe_info_init_early(xe, desc, subplatform_desc);
diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.c b/drivers/gpu/drm/xe/tests/xe_pci_test.c
index 171e4180f1..a6705a5363 100644
--- a/drivers/gpu/drm/xe/tests/xe_pci_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_pci_test.c
@@ -64,8 +64,3 @@ static struct kunit_suite xe_pci_test_suite = {
};
kunit_test_suite(xe_pci_test_suite);
-
-MODULE_AUTHOR("Intel Corporation");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("xe_pci kunit test");
-MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.h b/drivers/gpu/drm/xe/tests/xe_pci_test.h
index 811ffe5bd9..f40dcec839 100644
--- a/drivers/gpu/drm/xe/tests/xe_pci_test.h
+++ b/drivers/gpu/drm/xe/tests/xe_pci_test.h
@@ -9,6 +9,7 @@
#include <linux/types.h>
#include "xe_platform_types.h"
+#include "xe_sriov_types.h"
struct xe_device;
struct xe_graphics_desc;
@@ -23,6 +24,7 @@ void xe_call_for_each_graphics_ip(xe_graphics_fn xe_fn);
void xe_call_for_each_media_ip(xe_media_fn xe_fn);
struct xe_pci_fake_data {
+ enum xe_sriov_mode sriov_mode;
enum xe_platform platform;
enum xe_subplatform subplatform;
u32 graphics_verx100;
diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
index 4a69728976..06759d7547 100644
--- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
@@ -15,6 +15,7 @@
#include "regs/xe_reg_defs.h"
#include "xe_device.h"
#include "xe_device_types.h"
+#include "xe_kunit_helpers.h"
#include "xe_pci_test.h"
#include "xe_reg_sr.h"
#include "xe_rtp.h"
@@ -276,9 +277,7 @@ static int xe_rtp_test_init(struct kunit *test)
dev = drm_kunit_helper_alloc_device(test);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
- xe = drm_kunit_helper_alloc_drm_device(test, dev,
- struct xe_device,
- drm, DRIVER_GEM);
+ xe = xe_kunit_helper_alloc_xe_device(test, dev);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe);
/* Initialize an empty device */
@@ -312,8 +311,3 @@ static struct kunit_suite xe_rtp_test_suite = {
};
kunit_test_suite(xe_rtp_test_suite);
-
-MODULE_AUTHOR("Intel Corporation");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("xe_rtp kunit test");
-MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/tests/xe_test_mod.c b/drivers/gpu/drm/xe/tests/xe_test_mod.c
new file mode 100644
index 0000000000..875f3e6f96
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_test_mod.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+#include <linux/module.h>
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("xe kunit tests");
+MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c
index b4715b78ef..44570d8883 100644
--- a/drivers/gpu/drm/xe/tests/xe_wa_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c
@@ -9,6 +9,7 @@
#include <kunit/test.h>
#include "xe_device.h"
+#include "xe_kunit_helpers.h"
#include "xe_pci_test.h"
#include "xe_reg_sr.h"
#include "xe_tuning.h"
@@ -65,14 +66,8 @@ static const struct platform_test_case cases[] = {
PLATFORM_CASE(ALDERLAKE_P, C0),
SUBPLATFORM_CASE(ALDERLAKE_S, RPLS, D0),
SUBPLATFORM_CASE(ALDERLAKE_P, RPLU, E0),
- SUBPLATFORM_CASE(DG2, G10, A0),
- SUBPLATFORM_CASE(DG2, G10, A1),
- SUBPLATFORM_CASE(DG2, G10, B0),
SUBPLATFORM_CASE(DG2, G10, C0),
- SUBPLATFORM_CASE(DG2, G11, A0),
- SUBPLATFORM_CASE(DG2, G11, B0),
SUBPLATFORM_CASE(DG2, G11, B1),
- SUBPLATFORM_CASE(DG2, G12, A0),
SUBPLATFORM_CASE(DG2, G12, A1),
GMDID_CASE(METEORLAKE, 1270, A0, 1300, A0),
GMDID_CASE(METEORLAKE, 1271, A0, 1300, A0),
@@ -105,9 +100,7 @@ static int xe_wa_test_init(struct kunit *test)
dev = drm_kunit_helper_alloc_device(test);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev);
- xe = drm_kunit_helper_alloc_drm_device(test, dev,
- struct xe_device,
- drm, DRIVER_GEM);
+ xe = xe_kunit_helper_alloc_xe_device(test, dev);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe);
test->priv = &data;
@@ -160,8 +153,3 @@ static struct kunit_suite xe_rtp_test_suite = {
};
kunit_test_suite(xe_rtp_test_suite);
-
-MODULE_AUTHOR("Intel Corporation");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("xe_wa kunit test");
-MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c
index 7c124475c4..a35e0781b7 100644
--- a/drivers/gpu/drm/xe/xe_bb.c
+++ b/drivers/gpu/drm/xe/xe_bb.c
@@ -96,7 +96,8 @@ struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q,
{
u64 addr = xe_sa_bo_gpu_addr(bb->bo);
- xe_gt_assert(q->gt, !(q->vm && q->vm->flags & XE_VM_FLAG_MIGRATION));
+ xe_gt_assert(q->gt, !xe_sched_job_is_migration(q));
+ xe_gt_assert(q->gt, q->width == 1);
return __xe_bb_create_job(q, bb, &addr);
}
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index eb2c44a328..9c0837b6fd 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -46,22 +46,26 @@ static const struct ttm_place sys_placement_flags = {
static struct ttm_placement sys_placement = {
.num_placement = 1,
.placement = &sys_placement_flags,
- .num_busy_placement = 1,
- .busy_placement = &sys_placement_flags,
};
-static const struct ttm_place tt_placement_flags = {
- .fpfn = 0,
- .lpfn = 0,
- .mem_type = XE_PL_TT,
- .flags = 0,
+static const struct ttm_place tt_placement_flags[] = {
+ {
+ .fpfn = 0,
+ .lpfn = 0,
+ .mem_type = XE_PL_TT,
+ .flags = TTM_PL_FLAG_DESIRED,
+ },
+ {
+ .fpfn = 0,
+ .lpfn = 0,
+ .mem_type = XE_PL_SYSTEM,
+ .flags = TTM_PL_FLAG_FALLBACK,
+ }
};
static struct ttm_placement tt_placement = {
- .num_placement = 1,
- .placement = &tt_placement_flags,
- .num_busy_placement = 1,
- .busy_placement = &sys_placement_flags,
+ .num_placement = 2,
+ .placement = tt_placement_flags,
};
bool mem_type_is_vram(u32 mem_type)
@@ -216,8 +220,6 @@ static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
bo->placement = (struct ttm_placement) {
.num_placement = c,
.placement = bo->placements,
- .num_busy_placement = c,
- .busy_placement = bo->placements,
};
return 0;
@@ -237,7 +239,6 @@ static void xe_evict_flags(struct ttm_buffer_object *tbo,
/* Don't handle scatter gather BOs */
if (tbo->type == ttm_bo_type_sg) {
placement->num_placement = 0;
- placement->num_busy_placement = 0;
return;
}
@@ -572,6 +573,8 @@ static int xe_bo_move_notify(struct xe_bo *bo,
{
struct ttm_buffer_object *ttm_bo = &bo->ttm;
struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
+ struct ttm_resource *old_mem = ttm_bo->resource;
+ u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
int ret;
/*
@@ -591,6 +594,18 @@ static int xe_bo_move_notify(struct xe_bo *bo,
if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach)
dma_buf_move_notify(ttm_bo->base.dma_buf);
+ /*
+ * TTM has already nuked the mmap for us (see ttm_bo_unmap_virtual),
+ * so if we moved from VRAM make sure to unlink this from the userfault
+ * tracking.
+ */
+ if (mem_type_is_vram(old_mem_type)) {
+ mutex_lock(&xe->mem_access.vram_userfault.lock);
+ if (!list_empty(&bo->vram_userfault_link))
+ list_del_init(&bo->vram_userfault_link);
+ mutex_unlock(&xe->mem_access.vram_userfault.lock);
+ }
+
return 0;
}
@@ -1012,7 +1027,7 @@ static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo)
}
}
-struct ttm_device_funcs xe_ttm_funcs = {
+const struct ttm_device_funcs xe_ttm_funcs = {
.ttm_tt_create = xe_ttm_tt_create,
.ttm_tt_populate = xe_ttm_tt_populate,
.ttm_tt_unpopulate = xe_ttm_tt_unpopulate,
@@ -1048,6 +1063,11 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
if (bo->vm && xe_bo_is_user(bo))
xe_vm_put(bo->vm);
+ mutex_lock(&xe->mem_access.vram_userfault.lock);
+ if (!list_empty(&bo->vram_userfault_link))
+ list_del(&bo->vram_userfault_link);
+ mutex_unlock(&xe->mem_access.vram_userfault.lock);
+
kfree(bo);
}
@@ -1088,16 +1108,20 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
{
struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
struct drm_device *ddev = tbo->base.dev;
+ struct xe_device *xe = to_xe_device(ddev);
+ struct xe_bo *bo = ttm_to_xe_bo(tbo);
+ bool needs_rpm = bo->flags & XE_BO_CREATE_VRAM_MASK;
vm_fault_t ret;
int idx;
+ if (needs_rpm)
+ xe_device_mem_access_get(xe);
+
ret = ttm_bo_vm_reserve(tbo, vmf);
if (ret)
- return ret;
+ goto out;
if (drm_dev_enter(ddev, &idx)) {
- struct xe_bo *bo = ttm_to_xe_bo(tbo);
-
trace_xe_bo_cpu_fault(bo);
ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
@@ -1106,10 +1130,24 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
} else {
ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
}
+
if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
- return ret;
+ goto out;
+ /*
+ * ttm_bo_vm_reserve() already has dma_resv_lock.
+ */
+ if (ret == VM_FAULT_NOPAGE && mem_type_is_vram(tbo->resource->mem_type)) {
+ mutex_lock(&xe->mem_access.vram_userfault.lock);
+ if (list_empty(&bo->vram_userfault_link))
+ list_add(&bo->vram_userfault_link, &xe->mem_access.vram_userfault.list);
+ mutex_unlock(&xe->mem_access.vram_userfault.lock);
+ }
dma_resv_unlock(tbo->base.resv);
+out:
+ if (needs_rpm)
+ xe_device_mem_access_put(xe);
+
return ret;
}
@@ -1220,6 +1258,7 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
#ifdef CONFIG_PROC_FS
INIT_LIST_HEAD(&bo->client_link);
#endif
+ INIT_LIST_HEAD(&bo->vram_userfault_link);
drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
@@ -1319,8 +1358,6 @@ static int __xe_bo_fixed_placement(struct xe_device *xe,
bo->placement = (struct ttm_placement) {
.num_placement = 1,
.placement = place,
- .num_busy_placement = 1,
- .busy_placement = place,
};
return 0;
@@ -1534,6 +1571,38 @@ struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_til
return bo;
}
+/**
+ * xe_managed_bo_reinit_in_vram
+ * @xe: xe device
+ * @tile: Tile where the new buffer will be created
+ * @src: Managed buffer object allocated in system memory
+ *
+ * Replace a managed src buffer object allocated in system memory with a new
+ * one allocated in vram, copying the data between them.
+ * Buffer object in VRAM is not going to have the same GGTT address, the caller
+ * is responsible for making sure that any old references to it are updated.
+ *
+ * Returns 0 for success, negative error code otherwise.
+ */
+int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, struct xe_bo **src)
+{
+ struct xe_bo *bo;
+
+ xe_assert(xe, IS_DGFX(xe));
+ xe_assert(xe, !(*src)->vmap.is_iomem);
+
+ bo = xe_managed_bo_create_from_data(xe, tile, (*src)->vmap.vaddr, (*src)->size,
+ XE_BO_CREATE_VRAM_IF_DGFX(tile) |
+ XE_BO_CREATE_GGTT_BIT);
+ if (IS_ERR(bo))
+ return PTR_ERR(bo);
+
+ drmm_release_action(&xe->drm, __xe_bo_unpin_map_no_vm, *src);
+ *src = bo;
+
+ return 0;
+}
+
/*
* XXX: This is in the VM bind data path, likely should calculate this once and
* store, with a recalculation if the BO is moved.
@@ -2078,9 +2147,7 @@ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
xe_place_from_ttm_type(mem_type, &requested);
placement.num_placement = 1;
- placement.num_busy_placement = 1;
placement.placement = &requested;
- placement.busy_placement = &requested;
/*
* Stolen needs to be handled like below VRAM handling if we ever need
@@ -2230,6 +2297,16 @@ int xe_bo_dumb_create(struct drm_file *file_priv,
return err;
}
+void xe_bo_runtime_pm_release_mmap_offset(struct xe_bo *bo)
+{
+ struct ttm_buffer_object *tbo = &bo->ttm;
+ struct ttm_device *bdev = tbo->bdev;
+
+ drm_vma_node_unmap(&tbo->base.vma_node, bdev->dev_mapping);
+
+ list_del_init(&bo->vram_userfault_link);
+}
+
#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
#include "tests/xe_bo.c"
#endif
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 8be42ac6cd..c59ad15961 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -44,6 +44,7 @@
#define XE_BO_FIXED_PLACEMENT_BIT BIT(11)
#define XE_BO_PAGETABLE BIT(12)
#define XE_BO_NEEDS_CPU_ACCESS BIT(13)
+#define XE_BO_NEEDS_UC BIT(14)
/* this one is trigger internally only */
#define XE_BO_INTERNAL_TEST BIT(30)
#define XE_BO_INTERNAL_64K BIT(31)
@@ -128,6 +129,7 @@ struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile
size_t size, u32 flags);
struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
const void *data, size_t size, u32 flags);
+int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, struct xe_bo **src);
int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
u32 bo_flags);
@@ -242,13 +244,15 @@ int xe_bo_evict(struct xe_bo *bo, bool force_alloc);
int xe_bo_evict_pinned(struct xe_bo *bo);
int xe_bo_restore_pinned(struct xe_bo *bo);
-extern struct ttm_device_funcs xe_ttm_funcs;
+extern const struct ttm_device_funcs xe_ttm_funcs;
extern const char *const xe_mem_type_to_name[];
int xe_gem_create_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
+void xe_bo_runtime_pm_release_mmap_offset(struct xe_bo *bo);
+
int xe_bo_dumb_create(struct drm_file *file_priv,
struct drm_device *dev,
struct drm_mode_create_dumb *args);
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index 81dca15315..86422e113d 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -69,6 +69,9 @@ struct xe_bo {
* objects.
*/
u16 cpu_caching;
+
+ /** @vram_userfault_link: Link into @mem_access.vram_userfault.list */
+ struct list_head vram_userfault_link;
};
#define intel_bo_to_drm_bo(bo) (&(bo)->ttm.base)
diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
index c56fd7d59f..01db5b27be 100644
--- a/drivers/gpu/drm/xe/xe_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -55,6 +55,7 @@ static int info(struct seq_file *m, void *data)
drm_printf(&p, "force_execlist %s\n", str_yes_no(xe->info.force_execlist));
drm_printf(&p, "has_flat_ccs %s\n", str_yes_no(xe->info.has_flat_ccs));
drm_printf(&p, "has_usm %s\n", str_yes_no(xe->info.has_usm));
+ drm_printf(&p, "skip_guc_pc %s\n", str_yes_no(xe->info.skip_guc_pc));
for_each_gt(gt, xe, id) {
drm_printf(&p, "gt%d force wake %d\n", id,
xe_force_wake_ref(gt_to_fw(gt), XE_FW_GT));
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index 68abc0b195..68d3d623a0 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -16,6 +16,8 @@
#include "xe_guc_ct.h"
#include "xe_guc_submit.h"
#include "xe_hw_engine.h"
+#include "xe_sched_job.h"
+#include "xe_vm.h"
/**
* DOC: Xe device coredump
@@ -58,11 +60,22 @@ static struct xe_guc *exec_queue_to_guc(struct xe_exec_queue *q)
return &q->gt->uc.guc;
}
+static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
+{
+ struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work);
+
+ xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL);
+ if (ss->vm)
+ xe_vm_snapshot_capture_delayed(ss->vm);
+ xe_force_wake_put(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL);
+}
+
static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
size_t count, void *data, size_t datalen)
{
struct xe_devcoredump *coredump = data;
- struct xe_devcoredump_snapshot *ss;
+ struct xe_device *xe = coredump_to_xe(coredump);
+ struct xe_devcoredump_snapshot *ss = &coredump->snapshot;
struct drm_printer p;
struct drm_print_iterator iter;
struct timespec64 ts;
@@ -72,12 +85,14 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
if (!data || !coredump_to_xe(coredump))
return -ENODEV;
+ /* Ensure delayed work is captured before continuing */
+ flush_work(&ss->work);
+
iter.data = buffer;
iter.offset = 0;
iter.start = offset;
iter.remain = count;
- ss = &coredump->snapshot;
p = drm_coredump_printer(&iter);
drm_printf(&p, "**** Xe Device Coredump ****\n");
@@ -88,16 +103,24 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
drm_printf(&p, "Snapshot time: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec);
ts = ktime_to_timespec64(ss->boot_time);
drm_printf(&p, "Uptime: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec);
+ xe_device_snapshot_print(xe, &p);
drm_printf(&p, "\n**** GuC CT ****\n");
xe_guc_ct_snapshot_print(coredump->snapshot.ct, &p);
xe_guc_exec_queue_snapshot_print(coredump->snapshot.ge, &p);
+ drm_printf(&p, "\n**** Job ****\n");
+ xe_sched_job_snapshot_print(coredump->snapshot.job, &p);
+
drm_printf(&p, "\n**** HW Engines ****\n");
for (i = 0; i < XE_NUM_HW_ENGINES; i++)
if (coredump->snapshot.hwe[i])
xe_hw_engine_snapshot_print(coredump->snapshot.hwe[i],
&p);
+ if (coredump->snapshot.vm) {
+ drm_printf(&p, "\n**** VM state ****\n");
+ xe_vm_snapshot_print(coredump->snapshot.vm, &p);
+ }
return count - iter.remain;
}
@@ -111,21 +134,28 @@ static void xe_devcoredump_free(void *data)
if (!data || !coredump_to_xe(coredump))
return;
+ cancel_work_sync(&coredump->snapshot.work);
+
xe_guc_ct_snapshot_free(coredump->snapshot.ct);
xe_guc_exec_queue_snapshot_free(coredump->snapshot.ge);
+ xe_sched_job_snapshot_free(coredump->snapshot.job);
for (i = 0; i < XE_NUM_HW_ENGINES; i++)
if (coredump->snapshot.hwe[i])
xe_hw_engine_snapshot_free(coredump->snapshot.hwe[i]);
+ xe_vm_snapshot_free(coredump->snapshot.vm);
+ /* To prevent stale data on next snapshot, clear everything */
+ memset(&coredump->snapshot, 0, sizeof(coredump->snapshot));
coredump->captured = false;
drm_info(&coredump_to_xe(coredump)->drm,
"Xe device coredump has been deleted.\n");
}
static void devcoredump_snapshot(struct xe_devcoredump *coredump,
- struct xe_exec_queue *q)
+ struct xe_sched_job *job)
{
struct xe_devcoredump_snapshot *ss = &coredump->snapshot;
+ struct xe_exec_queue *q = job->q;
struct xe_guc *guc = exec_queue_to_guc(q);
struct xe_hw_engine *hwe;
enum xe_hw_engine_id id;
@@ -137,6 +167,9 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
ss->snapshot_time = ktime_get_real();
ss->boot_time = ktime_get_boottime();
+ ss->gt = q->gt;
+ INIT_WORK(&ss->work, xe_devcoredump_deferred_snap_work);
+
cookie = dma_fence_begin_signalling();
for (i = 0; q->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) {
if (adj_logical_mask & BIT(i)) {
@@ -150,7 +183,9 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
coredump->snapshot.ct = xe_guc_ct_snapshot_capture(&guc->ct, true);
- coredump->snapshot.ge = xe_guc_exec_queue_snapshot_capture(q);
+ coredump->snapshot.ge = xe_guc_exec_queue_snapshot_capture(job);
+ coredump->snapshot.job = xe_sched_job_snapshot_capture(job);
+ coredump->snapshot.vm = xe_vm_snapshot_capture(q->vm);
for_each_hw_engine(hwe, q->gt, id) {
if (hwe->class != q->hwe->class ||
@@ -161,21 +196,24 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
coredump->snapshot.hwe[id] = xe_hw_engine_snapshot_capture(hwe);
}
+ if (ss->vm)
+ queue_work(system_unbound_wq, &ss->work);
+
xe_force_wake_put(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
dma_fence_end_signalling(cookie);
}
/**
* xe_devcoredump - Take the required snapshots and initialize coredump device.
- * @q: The faulty xe_exec_queue, where the issue was detected.
+ * @job: The faulty xe_sched_job, where the issue was detected.
*
* This function should be called at the crash time within the serialized
* gt_reset. It is skipped if we still have the core dump device available
* with the information of the 'first' snapshot.
*/
-void xe_devcoredump(struct xe_exec_queue *q)
+void xe_devcoredump(struct xe_sched_job *job)
{
- struct xe_device *xe = gt_to_xe(q->gt);
+ struct xe_device *xe = gt_to_xe(job->q->gt);
struct xe_devcoredump *coredump = &xe->devcoredump;
if (coredump->captured) {
@@ -184,7 +222,7 @@ void xe_devcoredump(struct xe_exec_queue *q)
}
coredump->captured = true;
- devcoredump_snapshot(coredump, q);
+ devcoredump_snapshot(coredump, job);
drm_info(&xe->drm, "Xe device coredump has been created\n");
drm_info(&xe->drm, "Check your /sys/class/drm/card%d/device/devcoredump/data\n",
@@ -194,3 +232,4 @@ void xe_devcoredump(struct xe_exec_queue *q)
xe_devcoredump_read, xe_devcoredump_free);
}
#endif
+
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.h b/drivers/gpu/drm/xe/xe_devcoredump.h
index 6ac218a5c1..df8671f0b5 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.h
+++ b/drivers/gpu/drm/xe/xe_devcoredump.h
@@ -7,12 +7,12 @@
#define _XE_DEVCOREDUMP_H_
struct xe_device;
-struct xe_exec_queue;
+struct xe_sched_job;
#ifdef CONFIG_DEV_COREDUMP
-void xe_devcoredump(struct xe_exec_queue *q);
+void xe_devcoredump(struct xe_sched_job *job);
#else
-static inline void xe_devcoredump(struct xe_exec_queue *q)
+static inline void xe_devcoredump(struct xe_sched_job *job)
{
}
#endif
diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h
index 7fdad9c3d3..6f654b63c7 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump_types.h
+++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h
@@ -12,6 +12,7 @@
#include "xe_hw_engine_types.h"
struct xe_device;
+struct xe_gt;
/**
* struct xe_devcoredump_snapshot - Crash snapshot
@@ -26,13 +27,23 @@ struct xe_devcoredump_snapshot {
/** @boot_time: Relative boot time so the uptime can be calculated. */
ktime_t boot_time;
+ /** @gt: Affected GT, used by forcewake for delayed capture */
+ struct xe_gt *gt;
+ /** @work: Workqueue for deferred capture outside of signaling context */
+ struct work_struct work;
+
/* GuC snapshots */
/** @ct: GuC CT snapshot */
struct xe_guc_ct_snapshot *ct;
/** @ge: Guc Engine snapshot */
struct xe_guc_submit_exec_queue_snapshot *ge;
+
/** @hwe: HW Engine snapshot array */
struct xe_hw_engine_snapshot *hwe[XE_NUM_HW_ENGINES];
+ /** @job: Snapshot of job state */
+ struct xe_sched_job_snapshot *job;
+ /** @vm: Snapshot of VM state */
+ struct xe_vm_snapshot *vm;
};
/**
@@ -44,8 +55,6 @@ struct xe_devcoredump_snapshot {
* for reading the information.
*/
struct xe_devcoredump {
- /** @xe: Xe device. */
- struct xe_device *xe;
/** @captured: The snapshot of the first hang has already been taken. */
bool captured;
/** @snapshot: Snapshot is captured at time of the first crash */
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 5176c27e4b..b3b37ed832 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -15,32 +15,35 @@
#include <drm/drm_print.h>
#include <drm/xe_drm.h>
+#include "display/xe_display.h"
#include "regs/xe_gt_regs.h"
#include "regs/xe_regs.h"
#include "xe_bo.h"
#include "xe_debugfs.h"
-#include "xe_display.h"
#include "xe_dma_buf.h"
#include "xe_drm_client.h"
#include "xe_drv.h"
-#include "xe_exec_queue.h"
#include "xe_exec.h"
+#include "xe_exec_queue.h"
#include "xe_ggtt.h"
+#include "xe_gsc_proxy.h"
#include "xe_gt.h"
#include "xe_gt_mcr.h"
+#include "xe_hwmon.h"
#include "xe_irq.h"
+#include "xe_memirq.h"
#include "xe_mmio.h"
#include "xe_module.h"
#include "xe_pat.h"
#include "xe_pcode.h"
#include "xe_pm.h"
#include "xe_query.h"
+#include "xe_sriov.h"
#include "xe_tile.h"
#include "xe_ttm_stolen_mgr.h"
#include "xe_ttm_sys_mgr.h"
#include "xe_vm.h"
#include "xe_wait_user_fence.h"
-#include "xe_hwmon.h"
#ifdef CONFIG_LOCKDEP
struct lockdep_map xe_device_mem_access_lockdep_map = {
@@ -190,6 +193,9 @@ static void xe_device_destroy(struct drm_device *dev, void *dummy)
{
struct xe_device *xe = to_xe_device(dev);
+ if (xe->preempt_fence_wq)
+ destroy_workqueue(xe->preempt_fence_wq);
+
if (xe->ordered_wq)
destroy_workqueue(xe->ordered_wq);
@@ -255,9 +261,15 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
INIT_LIST_HEAD(&xe->pinned.external_vram);
INIT_LIST_HEAD(&xe->pinned.evicted);
+ xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", 0);
xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0);
xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0);
- if (!xe->ordered_wq || !xe->unordered_wq) {
+ if (!xe->ordered_wq || !xe->unordered_wq ||
+ !xe->preempt_fence_wq) {
+ /*
+ * Cleanup done in xe_device_destroy via
+ * drmm_add_action_or_reset register above
+ */
drm_err(&xe->drm, "Failed to allocate xe workqueues\n");
err = -ENOMEM;
goto err;
@@ -377,8 +389,14 @@ mask_err:
return err;
}
-/*
- * Initialize MMIO resources that don't require any knowledge about tile count.
+/**
+ * xe_device_probe_early: Device early probe
+ * @xe: xe device instance
+ *
+ * Initialize MMIO resources that don't require any
+ * knowledge about tile count. Also initialize pcode
+ *
+ * Return: 0 on success, error code on failure
*/
int xe_device_probe_early(struct xe_device *xe)
{
@@ -392,6 +410,10 @@ int xe_device_probe_early(struct xe_device *xe)
if (err)
return err;
+ err = xe_pcode_probe_early(xe);
+ if (err)
+ return err;
+
return 0;
}
@@ -424,10 +446,15 @@ int xe_device_probe(struct xe_device *xe)
struct xe_tile *tile;
struct xe_gt *gt;
int err;
+ u8 last_gt;
u8 id;
xe_pat_init_early(xe);
+ err = xe_sriov_init(xe);
+ if (err)
+ return err;
+
xe->info.mem_region_mask = 1;
err = xe_display_init_nommio(xe);
if (err)
@@ -448,18 +475,26 @@ int xe_device_probe(struct xe_device *xe)
err = xe_ggtt_init_early(tile->mem.ggtt);
if (err)
return err;
+ if (IS_SRIOV_VF(xe)) {
+ err = xe_memirq_init(&tile->sriov.vf.memirq);
+ if (err)
+ return err;
+ }
}
- err = drmm_add_action_or_reset(&xe->drm, xe_driver_flr_fini, xe);
- if (err)
- return err;
-
for_each_gt(gt, xe, id) {
- err = xe_pcode_probe(gt);
+ err = xe_gt_init_hwconfig(gt);
if (err)
return err;
}
+ err = drmm_add_action_or_reset(&xe->drm, xe_driver_flr_fini, xe);
+ if (err)
+ return err;
+
+ for_each_gt(gt, xe, id)
+ xe_pcode_init(gt);
+
err = xe_display_init_noirq(xe);
if (err)
return err;
@@ -502,16 +537,18 @@ int xe_device_probe(struct xe_device *xe)
goto err_irq_shutdown;
for_each_gt(gt, xe, id) {
+ last_gt = id;
+
err = xe_gt_init(gt);
if (err)
- goto err_irq_shutdown;
+ goto err_fini_gt;
}
xe_heci_gsc_init(xe);
err = xe_display_init(xe);
if (err)
- goto err_irq_shutdown;
+ goto err_fini_gt;
err = drm_dev_register(&xe->drm, 0);
if (err)
@@ -532,6 +569,14 @@ int xe_device_probe(struct xe_device *xe)
err_fini_display:
xe_display_driver_remove(xe);
+err_fini_gt:
+ for_each_gt(gt, xe, id) {
+ if (id < last_gt)
+ xe_gt_remove(gt);
+ else
+ break;
+ }
+
err_irq_shutdown:
xe_irq_shutdown(xe);
err:
@@ -549,12 +594,18 @@ static void xe_device_remove_display(struct xe_device *xe)
void xe_device_remove(struct xe_device *xe)
{
+ struct xe_gt *gt;
+ u8 id;
+
xe_device_remove_display(xe);
xe_display_fini(xe);
xe_heci_gsc_fini(xe);
+ for_each_gt(gt, xe, id)
+ xe_gt_remove(gt);
+
xe_irq_shutdown(xe);
}
@@ -659,3 +710,33 @@ void xe_device_mem_access_put(struct xe_device *xe)
xe_assert(xe, ref >= 0);
}
+
+void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p)
+{
+ struct xe_gt *gt;
+ u8 id;
+
+ drm_printf(p, "PCI ID: 0x%04x\n", xe->info.devid);
+ drm_printf(p, "PCI revision: 0x%02x\n", xe->info.revid);
+
+ for_each_gt(gt, xe, id) {
+ drm_printf(p, "GT id: %u\n", id);
+ drm_printf(p, "\tType: %s\n",
+ gt->info.type == XE_GT_TYPE_MAIN ? "main" : "media");
+ drm_printf(p, "\tIP ver: %u.%u.%u\n",
+ REG_FIELD_GET(GMD_ID_ARCH_MASK, gt->info.gmdid),
+ REG_FIELD_GET(GMD_ID_RELEASE_MASK, gt->info.gmdid),
+ REG_FIELD_GET(GMD_ID_REVID, gt->info.gmdid));
+ drm_printf(p, "\tCS reference clock: %u\n", gt->info.reference_clock);
+ }
+}
+
+u64 xe_device_canonicalize_addr(struct xe_device *xe, u64 address)
+{
+ return sign_extend64(address, xe->info.va_bits - 1);
+}
+
+u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address)
+{
+ return address & GENMASK_ULL(xe->info.va_bits - 1, 0);
+}
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index bf8efb44ed..d413bc2c6b 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -164,6 +164,16 @@ static inline bool xe_device_has_sriov(struct xe_device *xe)
return xe->info.has_sriov;
}
+static inline bool xe_device_has_memirq(struct xe_device *xe)
+{
+ return GRAPHICS_VERx100(xe) >= 1250;
+}
+
u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size);
+void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p);
+
+u64 xe_device_canonicalize_addr(struct xe_device *xe, u64 address);
+u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address);
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index e8491979a6..8e3a222b41 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -16,6 +16,7 @@
#include "xe_heci_gsc.h"
#include "xe_gt_types.h"
#include "xe_lmtt_types.h"
+#include "xe_memirq_types.h"
#include "xe_platform_types.h"
#include "xe_pt_types.h"
#include "xe_sriov_types.h"
@@ -142,10 +143,10 @@ struct xe_tile {
* * 8MB-16MB: global GTT
*/
struct {
- /** @size: size of tile's MMIO space */
+ /** @mmio.size: size of tile's MMIO space */
size_t size;
- /** @regs: pointer to tile's MMIO space (starting with registers) */
+ /** @mmio.regs: pointer to tile's MMIO space (starting with registers) */
void __iomem *regs;
} mmio;
@@ -155,31 +156,31 @@ struct xe_tile {
* Each tile has its own additional 256MB (28-bit) MMIO-extension space.
*/
struct {
- /** @size: size of tile's additional MMIO-extension space */
+ /** @mmio_ext.size: size of tile's additional MMIO-extension space */
size_t size;
- /** @regs: pointer to tile's additional MMIO-extension space */
+ /** @mmio_ext.regs: pointer to tile's additional MMIO-extension space */
void __iomem *regs;
} mmio_ext;
/** @mem: memory management info for tile */
struct {
/**
- * @vram: VRAM info for tile.
+ * @mem.vram: VRAM info for tile.
*
* Although VRAM is associated with a specific tile, it can
* still be accessed by all tiles' GTs.
*/
struct xe_mem_region vram;
- /** @vram_mgr: VRAM TTM manager */
+ /** @mem.vram_mgr: VRAM TTM manager */
struct xe_ttm_vram_mgr *vram_mgr;
- /** @ggtt: Global graphics translation table */
+ /** @mem.ggtt: Global graphics translation table */
struct xe_ggtt *ggtt;
/**
- * @kernel_bb_pool: Pool from which batchbuffers are allocated.
+ * @mem.kernel_bb_pool: Pool from which batchbuffers are allocated.
*
* Media GT shares a pool with its primary GT.
*/
@@ -192,6 +193,10 @@ struct xe_tile {
/** @sriov.pf.lmtt: Local Memory Translation Table. */
struct xe_lmtt lmtt;
} pf;
+ struct {
+ /** @sriov.vf.memirq: Memory Based Interrupts. */
+ struct xe_memirq memirq;
+ } vf;
} sriov;
/** @migrate: Migration helper for vram blits and clearing */
@@ -213,68 +218,68 @@ struct xe_device {
/** @info: device info */
struct intel_device_info {
- /** @graphics_name: graphics IP name */
+ /** @info.graphics_name: graphics IP name */
const char *graphics_name;
- /** @media_name: media IP name */
+ /** @info.media_name: media IP name */
const char *media_name;
- /** @tile_mmio_ext_size: size of MMIO extension space, per-tile */
+ /** @info.tile_mmio_ext_size: size of MMIO extension space, per-tile */
u32 tile_mmio_ext_size;
- /** @graphics_verx100: graphics IP version */
+ /** @info.graphics_verx100: graphics IP version */
u32 graphics_verx100;
- /** @media_verx100: media IP version */
+ /** @info.media_verx100: media IP version */
u32 media_verx100;
- /** @mem_region_mask: mask of valid memory regions */
+ /** @info.mem_region_mask: mask of valid memory regions */
u32 mem_region_mask;
- /** @platform: XE platform enum */
+ /** @info.platform: XE platform enum */
enum xe_platform platform;
- /** @subplatform: XE subplatform enum */
+ /** @info.subplatform: XE subplatform enum */
enum xe_subplatform subplatform;
- /** @devid: device ID */
+ /** @info.devid: device ID */
u16 devid;
- /** @revid: device revision */
+ /** @info.revid: device revision */
u8 revid;
- /** @step: stepping information for each IP */
+ /** @info.step: stepping information for each IP */
struct xe_step_info step;
- /** @dma_mask_size: DMA address bits */
+ /** @info.dma_mask_size: DMA address bits */
u8 dma_mask_size;
- /** @vram_flags: Vram flags */
+ /** @info.vram_flags: Vram flags */
u8 vram_flags;
- /** @tile_count: Number of tiles */
+ /** @info.tile_count: Number of tiles */
u8 tile_count;
- /** @gt_count: Total number of GTs for entire device */
+ /** @info.gt_count: Total number of GTs for entire device */
u8 gt_count;
- /** @vm_max_level: Max VM level */
+ /** @info.vm_max_level: Max VM level */
u8 vm_max_level;
- /** @va_bits: Maximum bits of a virtual address */
+ /** @info.va_bits: Maximum bits of a virtual address */
u8 va_bits;
- /** @is_dgfx: is discrete device */
+ /** @info.is_dgfx: is discrete device */
u8 is_dgfx:1;
- /** @has_asid: Has address space ID */
+ /** @info.has_asid: Has address space ID */
u8 has_asid:1;
- /** @force_execlist: Forced execlist submission */
+ /** @info.force_execlist: Forced execlist submission */
u8 force_execlist:1;
- /** @has_flat_ccs: Whether flat CCS metadata is used */
+ /** @info.has_flat_ccs: Whether flat CCS metadata is used */
u8 has_flat_ccs:1;
- /** @has_llc: Device has a shared CPU+GPU last level cache */
+ /** @info.has_llc: Device has a shared CPU+GPU last level cache */
u8 has_llc:1;
- /** @has_mmio_ext: Device has extra MMIO address range */
+ /** @info.has_mmio_ext: Device has extra MMIO address range */
u8 has_mmio_ext:1;
- /** @has_range_tlb_invalidation: Has range based TLB invalidations */
+ /** @info.has_range_tlb_invalidation: Has range based TLB invalidations */
u8 has_range_tlb_invalidation:1;
- /** @has_sriov: Supports SR-IOV */
+ /** @info.has_sriov: Supports SR-IOV */
u8 has_sriov:1;
- /** @has_usm: Device has unified shared memory support */
+ /** @info.has_usm: Device has unified shared memory support */
u8 has_usm:1;
- /** @enable_display: display enabled */
+ /** @info.enable_display: display enabled */
u8 enable_display:1;
- /** @skip_mtcfg: skip Multi-Tile configuration from MTCFG register */
+ /** @info.skip_mtcfg: skip Multi-Tile configuration from MTCFG register */
u8 skip_mtcfg:1;
- /** @skip_pcode: skip access to PCODE uC */
+ /** @info.skip_pcode: skip access to PCODE uC */
u8 skip_pcode:1;
- /** @has_heci_gscfi: device has heci gscfi */
+ /** @info.has_heci_gscfi: device has heci gscfi */
u8 has_heci_gscfi:1;
- /** @skip_guc_pc: Skip GuC based PM feature init */
+ /** @info.skip_guc_pc: Skip GuC based PM feature init */
u8 skip_guc_pc:1;
#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
@@ -286,10 +291,10 @@ struct xe_device {
/** @irq: device interrupt state */
struct {
- /** @lock: lock for processing irq's on this device */
+ /** @irq.lock: lock for processing irq's on this device */
spinlock_t lock;
- /** @enabled: interrupts enabled on this device */
+ /** @irq.enabled: interrupts enabled on this device */
bool enabled;
} irq;
@@ -298,17 +303,17 @@ struct xe_device {
/** @mmio: mmio info for device */
struct {
- /** @size: size of MMIO space for device */
+ /** @mmio.size: size of MMIO space for device */
size_t size;
- /** @regs: pointer to MMIO space for device */
+ /** @mmio.regs: pointer to MMIO space for device */
void __iomem *regs;
} mmio;
/** @mem: memory info for device */
struct {
- /** @vram: VRAM info for device */
+ /** @mem.vram: VRAM info for device */
struct xe_mem_region vram;
- /** @sys_mgr: system TTM manager */
+ /** @mem.sys_mgr: system TTM manager */
struct ttm_resource_manager sys_mgr;
} mem;
@@ -316,46 +321,51 @@ struct xe_device {
struct {
/** @sriov.__mode: SR-IOV mode (Don't access directly!) */
enum xe_sriov_mode __mode;
+ /** @sriov.wq: workqueue used by the virtualization workers */
+ struct workqueue_struct *wq;
} sriov;
/** @clients: drm clients info */
struct {
- /** @lock: Protects drm clients info */
+ /** @clients.lock: Protects drm clients info */
spinlock_t lock;
- /** @count: number of drm clients */
+ /** @clients.count: number of drm clients */
u64 count;
} clients;
/** @usm: unified memory state */
struct {
- /** @asid: convert a ASID to VM */
+ /** @usm.asid: convert a ASID to VM */
struct xarray asid_to_vm;
- /** @next_asid: next ASID, used to cyclical alloc asids */
+ /** @usm.next_asid: next ASID, used to cyclical alloc asids */
u32 next_asid;
- /** @num_vm_in_fault_mode: number of VM in fault mode */
+ /** @usm.num_vm_in_fault_mode: number of VM in fault mode */
u32 num_vm_in_fault_mode;
- /** @num_vm_in_non_fault_mode: number of VM in non-fault mode */
+ /** @usm.num_vm_in_non_fault_mode: number of VM in non-fault mode */
u32 num_vm_in_non_fault_mode;
- /** @lock: protects UM state */
+ /** @usm.lock: protects UM state */
struct mutex lock;
} usm;
/** @pinned: pinned BO state */
struct {
- /** @lock: protected pinned BO list state */
+ /** @pinned.lock: protected pinned BO list state */
spinlock_t lock;
- /** @evicted: pinned kernel BO that are present */
+ /** @pinned.kernel_bo_present: pinned kernel BO that are present */
struct list_head kernel_bo_present;
- /** @evicted: pinned BO that have been evicted */
+ /** @pinned.evicted: pinned BO that have been evicted */
struct list_head evicted;
- /** @external_vram: pinned external BO in vram*/
+ /** @pinned.external_vram: pinned external BO in vram*/
struct list_head external_vram;
} pinned;
/** @ufence_wq: user fence wait queue */
wait_queue_head_t ufence_wq;
+ /** @preempt_fence_wq: used to serialize preempt fences */
+ struct workqueue_struct *preempt_fence_wq;
+
/** @ordered_wq: used to serialize compute mode resume */
struct workqueue_struct *ordered_wq;
@@ -370,36 +380,57 @@ struct xe_device {
* triggering additional actions when they occur.
*/
struct {
- /** @ref: ref count of memory accesses */
+ /** @mem_access.ref: ref count of memory accesses */
atomic_t ref;
+
+ /**
+ * @mem_access.vram_userfault: Encapsulate vram_userfault
+ * related stuff
+ */
+ struct {
+ /**
+ * @mem_access.vram_userfault.lock: Protects access to
+ * @vram_usefault.list Using mutex instead of spinlock
+ * as lock is applied to entire list operation which
+ * may sleep
+ */
+ struct mutex lock;
+
+ /**
+ * @mem_access.vram_userfault.list: Keep list of userfaulted
+ * vram bo, which require to release their mmap mappings
+ * at runtime suspend path
+ */
+ struct list_head list;
+ } vram_userfault;
} mem_access;
/**
* @pat: Encapsulate PAT related stuff
*/
struct {
- /** Internal operations to abstract platforms */
+ /** @pat.ops: Internal operations to abstract platforms */
const struct xe_pat_ops *ops;
- /** PAT table to program in the HW */
+ /** @pat.table: PAT table to program in the HW */
const struct xe_pat_table_entry *table;
- /** Number of PAT entries */
+ /** @pat.n_entries: Number of PAT entries */
int n_entries;
u32 idx[__XE_CACHE_LEVEL_COUNT];
} pat;
/** @d3cold: Encapsulate d3cold related stuff */
struct {
- /** capable: Indicates if root port is d3cold capable */
+ /** @d3cold.capable: Indicates if root port is d3cold capable */
bool capable;
- /** @allowed: Indicates if d3cold is a valid device state */
+ /** @d3cold.allowed: Indicates if d3cold is a valid device state */
bool allowed;
- /** @power_lost: Indicates if card has really lost power. */
+ /** @d3cold.power_lost: Indicates if card has really lost power. */
bool power_lost;
/**
- * @vram_threshold:
+ * @d3cold.vram_threshold:
*
* This represents the permissible threshold(in megabytes)
* for vram save/restore. d3cold will be disallowed,
@@ -408,7 +439,7 @@ struct xe_device {
* Default threshold value is 300mb.
*/
u32 vram_threshold;
- /** @lock: protect vram_threshold */
+ /** @d3cold.lock: protect vram_threshold */
struct mutex lock;
} d3cold;
@@ -516,17 +547,17 @@ struct xe_file {
/** @vm: VM state for file */
struct {
- /** @xe: xarray to store VMs */
+ /** @vm.xe: xarray to store VMs */
struct xarray xa;
- /** @lock: protects file VM state */
+ /** @vm.lock: protects file VM state */
struct mutex lock;
} vm;
/** @exec_queue: Submission exec queue state for file */
struct {
- /** @xe: xarray to store engines */
+ /** @exec_queue.xe: xarray to store engines */
struct xarray xa;
- /** @lock: protects file engine state */
+ /** @exec_queue.lock: protects file engine state */
struct mutex lock;
} exec_queue;
diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c
index 6040e4d22b..87c10bd795 100644
--- a/drivers/gpu/drm/xe/xe_drm_client.c
+++ b/drivers/gpu/drm/xe/xe_drm_client.c
@@ -113,7 +113,7 @@ static void bo_meminfo(struct xe_bo *bo,
else
mem_type = XE_PL_TT;
- if (bo->ttm.base.handle_count > 1)
+ if (drm_gem_object_is_shared_for_memory_stats(&bo->ttm.base))
stats[mem_type].shared += sz;
else
stats[mem_type].private += sz;
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index 2cd4ad2fcf..cc5e0f75de 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -94,9 +94,16 @@
* Unlock all
*/
+/*
+ * Add validation and rebinding to the drm_exec locking loop, since both can
+ * trigger eviction which may require sleeping dma_resv locks.
+ */
static int xe_exec_fn(struct drm_gpuvm_exec *vm_exec)
{
- return drm_gpuvm_validate(vm_exec->vm, &vm_exec->exec);
+ struct xe_vm *vm = container_of(vm_exec->vm, struct xe_vm, gpuvm);
+
+ /* The fence slot added here is intended for the exec sched job. */
+ return xe_vm_validate_rebind(vm, &vm_exec->exec, 1);
}
int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
@@ -219,7 +226,6 @@ retry:
}
vm_exec.vm = &vm->gpuvm;
- vm_exec.num_fences = 1 + vm->xe->info.tile_count;
vm_exec.flags = DRM_EXEC_INTERRUPTIBLE_WAIT;
if (xe_vm_in_lr_mode(vm)) {
drm_exec_init(exec, vm_exec.flags, 0);
@@ -251,14 +257,6 @@ retry:
goto err_exec;
}
- /*
- * Rebind any invalidated userptr or evicted BOs in the VM, non-compute
- * VM mode only.
- */
- err = xe_vm_rebind(vm, false);
- if (err)
- goto err_put_job;
-
/* Wait behind rebinds */
if (!xe_vm_in_lr_mode(vm)) {
err = drm_sched_job_add_resv_dependencies(&job->drm,
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 5093c56d60..ead25d5e72 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -30,21 +30,23 @@ enum xe_exec_queue_sched_prop {
XE_EXEC_QUEUE_SCHED_PROP_MAX = 3,
};
-static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe,
- struct xe_vm *vm,
- u32 logical_mask,
- u16 width, struct xe_hw_engine *hwe,
- u32 flags)
+static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q,
+ u64 extensions, int ext_number, bool create);
+
+static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
+ struct xe_vm *vm,
+ u32 logical_mask,
+ u16 width, struct xe_hw_engine *hwe,
+ u32 flags, u64 extensions)
{
struct xe_exec_queue *q;
struct xe_gt *gt = hwe->gt;
int err;
- int i;
/* only kernel queues can be permanent */
XE_WARN_ON((flags & EXEC_QUEUE_FLAG_PERMANENT) && !(flags & EXEC_QUEUE_FLAG_KERNEL));
- q = kzalloc(sizeof(*q) + sizeof(struct xe_lrc) * width, GFP_KERNEL);
+ q = kzalloc(struct_size(q, lrc, width), GFP_KERNEL);
if (!q)
return ERR_PTR(-ENOMEM);
@@ -52,8 +54,6 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe,
q->flags = flags;
q->hwe = hwe;
q->gt = gt;
- if (vm)
- q->vm = xe_vm_get(vm);
q->class = hwe->class;
q->width = width;
q->logical_mask = logical_mask;
@@ -74,17 +74,43 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe,
else
q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_NORMAL;
+ if (extensions) {
+ /*
+ * may set q->usm, must come before xe_lrc_init(),
+ * may overwrite q->sched_props, must come before q->ops->init()
+ */
+ err = exec_queue_user_extensions(xe, q, extensions, 0, true);
+ if (err) {
+ kfree(q);
+ return ERR_PTR(err);
+ }
+ }
+
+ if (vm)
+ q->vm = xe_vm_get(vm);
+
if (xe_exec_queue_is_parallel(q)) {
q->parallel.composite_fence_ctx = dma_fence_context_alloc(1);
q->parallel.composite_fence_seqno = XE_FENCE_INITIAL_SEQNO;
}
- if (q->flags & EXEC_QUEUE_FLAG_VM) {
- q->bind.fence_ctx = dma_fence_context_alloc(1);
- q->bind.fence_seqno = XE_FENCE_INITIAL_SEQNO;
- }
- for (i = 0; i < width; ++i) {
- err = xe_lrc_init(q->lrc + i, hwe, q, vm, SZ_16K);
+ return q;
+}
+
+static void __xe_exec_queue_free(struct xe_exec_queue *q)
+{
+ if (q->vm)
+ xe_vm_put(q->vm);
+ kfree(q);
+}
+
+static int __xe_exec_queue_init(struct xe_exec_queue *q)
+{
+ struct xe_device *xe = gt_to_xe(q->gt);
+ int i, err;
+
+ for (i = 0; i < q->width; ++i) {
+ err = xe_lrc_init(q->lrc + i, q->hwe, q, q->vm, SZ_16K);
if (err)
goto err_lrc;
}
@@ -101,35 +127,47 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe,
* can perform GuC CT actions when needed. Caller is expected to have
* already grabbed the rpm ref outside any sensitive locks.
*/
- if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM || !vm))
+ if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM || !q->vm))
drm_WARN_ON(&xe->drm, !xe_device_mem_access_get_if_ongoing(xe));
- return q;
+ return 0;
err_lrc:
for (i = i - 1; i >= 0; --i)
xe_lrc_finish(q->lrc + i);
- kfree(q);
- return ERR_PTR(err);
+ return err;
}
struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm,
u32 logical_mask, u16 width,
- struct xe_hw_engine *hwe, u32 flags)
+ struct xe_hw_engine *hwe, u32 flags,
+ u64 extensions)
{
struct xe_exec_queue *q;
int err;
+ q = __xe_exec_queue_alloc(xe, vm, logical_mask, width, hwe, flags,
+ extensions);
+ if (IS_ERR(q))
+ return q;
+
if (vm) {
err = xe_vm_lock(vm, true);
if (err)
- return ERR_PTR(err);
+ goto err_post_alloc;
}
- q = __xe_exec_queue_create(xe, vm, logical_mask, width, hwe, flags);
+
+ err = __xe_exec_queue_init(q);
if (vm)
xe_vm_unlock(vm);
+ if (err)
+ goto err_post_alloc;
return q;
+
+err_post_alloc:
+ __xe_exec_queue_free(q);
+ return ERR_PTR(err);
}
struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt,
@@ -154,7 +192,7 @@ struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe
if (!logical_mask)
return ERR_PTR(-ENODEV);
- return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags);
+ return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags, 0);
}
void xe_exec_queue_destroy(struct kref *ref)
@@ -180,10 +218,7 @@ void xe_exec_queue_fini(struct xe_exec_queue *q)
xe_lrc_finish(q->lrc + i);
if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM || !q->vm))
xe_device_mem_access_put(gt_to_xe(q->gt));
- if (q->vm)
- xe_vm_put(q->vm);
-
- kfree(q);
+ __xe_exec_queue_free(q);
}
void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance)
@@ -241,7 +276,11 @@ static int exec_queue_set_priority(struct xe_device *xe, struct xe_exec_queue *q
if (XE_IOCTL_DBG(xe, value > xe_exec_queue_device_get_max_priority(xe)))
return -EPERM;
- return q->ops->set_priority(q, value);
+ if (!create)
+ return q->ops->set_priority(q, value);
+
+ q->sched_props.priority = value;
+ return 0;
}
static bool xe_exec_queue_enforce_schedule_limit(void)
@@ -308,7 +347,11 @@ static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue *
!xe_hw_engine_timeout_in_range(value, min, max))
return -EINVAL;
- return q->ops->set_timeslice(q, value);
+ if (!create)
+ return q->ops->set_timeslice(q, value);
+
+ q->sched_props.timeslice_us = value;
+ return 0;
}
typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe,
@@ -537,6 +580,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) {
for_each_gt(gt, xe, id) {
struct xe_exec_queue *new;
+ u32 flags;
if (xe_gt_is_media_type(gt))
continue;
@@ -555,14 +599,12 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
/* The migration vm doesn't hold rpm ref */
xe_device_mem_access_get(xe);
+ flags = EXEC_QUEUE_FLAG_VM | (id ? EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD : 0);
+
migrate_vm = xe_migrate_get_vm(gt_to_tile(gt)->migrate);
new = xe_exec_queue_create(xe, migrate_vm, logical_mask,
- args->width, hwe,
- EXEC_QUEUE_FLAG_PERSISTENT |
- EXEC_QUEUE_FLAG_VM |
- (id ?
- EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD :
- 0));
+ args->width, hwe, flags,
+ args->extensions);
xe_device_mem_access_put(xe); /* now held by engine */
@@ -608,7 +650,8 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
}
q = xe_exec_queue_create(xe, vm, logical_mask,
- args->width, hwe, 0);
+ args->width, hwe, 0,
+ args->extensions);
up_read(&vm->lock);
xe_vm_put(vm);
if (IS_ERR(q))
@@ -624,12 +667,6 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
}
}
- if (args->extensions) {
- err = exec_queue_user_extensions(xe, q, args->extensions, 0, true);
- if (XE_IOCTL_DBG(xe, err))
- goto kill_exec_queue;
- }
-
mutex_lock(&xef->exec_queue.lock);
err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL);
mutex_unlock(&xef->exec_queue.lock);
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h
index d959cc4a1a..02ce8d2046 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue.h
@@ -16,7 +16,8 @@ struct xe_file;
struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm,
u32 logical_mask, u16 width,
- struct xe_hw_engine *hw_engine, u32 flags);
+ struct xe_hw_engine *hw_engine, u32 flags,
+ u64 extensions);
struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt,
struct xe_vm *vm,
enum xe_engine_class class, u32 flags);
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index 22e1dffd06..462b331950 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -105,50 +105,39 @@ struct xe_exec_queue {
struct xe_guc_exec_queue *guc;
};
- union {
- /**
- * @parallel: parallel submission state
- */
- struct {
- /** @composite_fence_ctx: context composite fence */
- u64 composite_fence_ctx;
- /** @composite_fence_seqno: seqno for composite fence */
- u32 composite_fence_seqno;
- } parallel;
- /**
- * @bind: bind submission state
- */
- struct {
- /** @fence_ctx: context bind fence */
- u64 fence_ctx;
- /** @fence_seqno: seqno for bind fence */
- u32 fence_seqno;
- } bind;
- };
+ /**
+ * @parallel: parallel submission state
+ */
+ struct {
+ /** @parallel.composite_fence_ctx: context composite fence */
+ u64 composite_fence_ctx;
+ /** @parallel.composite_fence_seqno: seqno for composite fence */
+ u32 composite_fence_seqno;
+ } parallel;
/** @sched_props: scheduling properties */
struct {
- /** @timeslice_us: timeslice period in micro-seconds */
+ /** @sched_props.timeslice_us: timeslice period in micro-seconds */
u32 timeslice_us;
- /** @preempt_timeout_us: preemption timeout in micro-seconds */
+ /** @sched_props.preempt_timeout_us: preemption timeout in micro-seconds */
u32 preempt_timeout_us;
- /** @job_timeout_ms: job timeout in milliseconds */
+ /** @sched_props.job_timeout_ms: job timeout in milliseconds */
u32 job_timeout_ms;
- /** @priority: priority of this exec queue */
+ /** @sched_props.priority: priority of this exec queue */
enum xe_exec_queue_priority priority;
} sched_props;
/** @compute: compute exec queue state */
struct {
- /** @pfence: preemption fence */
+ /** @compute.pfence: preemption fence */
struct dma_fence *pfence;
- /** @context: preemption fence context */
+ /** @compute.context: preemption fence context */
u64 context;
- /** @seqno: preemption fence seqno */
+ /** @compute.seqno: preemption fence seqno */
u32 seqno;
- /** @link: link into VM's list of exec queues */
+ /** @compute.link: link into VM's list of exec queues */
struct list_head link;
- /** @lock: preemption fences lock */
+ /** @compute.lock: preemption fences lock */
spinlock_t lock;
} compute;
@@ -185,8 +174,6 @@ struct xe_exec_queue_ops {
int (*set_timeslice)(struct xe_exec_queue *q, u32 timeslice_us);
/** @set_preempt_timeout: Set preemption timeout for exec queue */
int (*set_preempt_timeout)(struct xe_exec_queue *q, u32 preempt_timeout_us);
- /** @set_job_timeout: Set job timeout for exec queue */
- int (*set_job_timeout)(struct xe_exec_queue *q, u32 job_timeout_ms);
/**
* @suspend: Suspend exec queue from executing, allowed to be called
* multiple times in a row before resume with the caveat that
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index acb4d9f38f..dece278593 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -416,13 +416,6 @@ static int execlist_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
return 0;
}
-static int execlist_exec_queue_set_job_timeout(struct xe_exec_queue *q,
- u32 job_timeout_ms)
-{
- /* NIY */
- return 0;
-}
-
static int execlist_exec_queue_suspend(struct xe_exec_queue *q)
{
/* NIY */
@@ -453,7 +446,6 @@ static const struct xe_exec_queue_ops execlist_exec_queue_ops = {
.set_priority = execlist_exec_queue_set_priority,
.set_timeslice = execlist_exec_queue_set_timeslice,
.set_preempt_timeout = execlist_exec_queue_set_preempt_timeout,
- .set_job_timeout = execlist_exec_queue_set_job_timeout,
.suspend = execlist_exec_queue_suspend,
.suspend_wait = execlist_exec_queue_suspend_wait,
.resume = execlist_exec_queue_resume,
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 3efd2d066b..ab96edb058 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -11,12 +11,16 @@
#include <drm/i915_drm.h>
#include "regs/xe_gt_regs.h"
+#include "regs/xe_regs.h"
+#include "xe_assert.h"
#include "xe_bo.h"
#include "xe_device.h"
#include "xe_gt.h"
+#include "xe_gt_printk.h"
#include "xe_gt_tlb_invalidation.h"
#include "xe_map.h"
#include "xe_mmio.h"
+#include "xe_sriov.h"
#include "xe_wopcm.h"
#define XELPG_GGTT_PTE_PAT0 BIT_ULL(52)
@@ -141,7 +145,11 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt)
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
unsigned int gsm_size;
- gsm_size = probe_gsm_size(pdev);
+ if (IS_SRIOV_VF(xe))
+ gsm_size = SZ_8M; /* GGTT is expected to be 4GiB */
+ else
+ gsm_size = probe_gsm_size(pdev);
+
if (gsm_size == 0) {
drm_err(&xe->drm, "Hardware reported no preallocated GSM\n");
return -ENOMEM;
@@ -312,6 +320,74 @@ void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix)
}
}
+static void xe_ggtt_dump_node(struct xe_ggtt *ggtt,
+ const struct drm_mm_node *node, const char *description)
+{
+ char buf[10];
+
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
+ string_get_size(node->size, 1, STRING_UNITS_2, buf, sizeof(buf));
+ xe_gt_dbg(ggtt->tile->primary_gt, "GGTT %#llx-%#llx (%s) %s\n",
+ node->start, node->start + node->size, buf, description);
+ }
+}
+
+/**
+ * xe_ggtt_balloon - prevent allocation of specified GGTT addresses
+ * @ggtt: the &xe_ggtt where we want to make reservation
+ * @start: the starting GGTT address of the reserved region
+ * @end: then end GGTT address of the reserved region
+ * @node: the &drm_mm_node to hold reserved GGTT node
+ *
+ * Use xe_ggtt_deballoon() to release a reserved GGTT node.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_ggtt_balloon(struct xe_ggtt *ggtt, u64 start, u64 end, struct drm_mm_node *node)
+{
+ int err;
+
+ xe_tile_assert(ggtt->tile, start < end);
+ xe_tile_assert(ggtt->tile, IS_ALIGNED(start, XE_PAGE_SIZE));
+ xe_tile_assert(ggtt->tile, IS_ALIGNED(end, XE_PAGE_SIZE));
+ xe_tile_assert(ggtt->tile, !drm_mm_node_allocated(node));
+
+ node->color = 0;
+ node->start = start;
+ node->size = end - start;
+
+ mutex_lock(&ggtt->lock);
+ err = drm_mm_reserve_node(&ggtt->mm, node);
+ mutex_unlock(&ggtt->lock);
+
+ if (xe_gt_WARN(ggtt->tile->primary_gt, err,
+ "Failed to balloon GGTT %#llx-%#llx (%pe)\n",
+ node->start, node->start + node->size, ERR_PTR(err)))
+ return err;
+
+ xe_ggtt_dump_node(ggtt, node, "balloon");
+ return 0;
+}
+
+/**
+ * xe_ggtt_deballoon - release a reserved GGTT region
+ * @ggtt: the &xe_ggtt where reserved node belongs
+ * @node: the &drm_mm_node with reserved GGTT region
+ *
+ * See xe_ggtt_balloon() for details.
+ */
+void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct drm_mm_node *node)
+{
+ if (!drm_mm_node_allocated(node))
+ return;
+
+ xe_ggtt_dump_node(ggtt, node, "deballoon");
+
+ mutex_lock(&ggtt->lock);
+ drm_mm_remove_node(node);
+ mutex_unlock(&ggtt->lock);
+}
+
int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, struct drm_mm_node *node,
u32 size, u32 align, u32 mm_flags)
{
@@ -334,7 +410,8 @@ int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
{
- u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB];
+ u16 cache_mode = bo->flags & XE_BO_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB;
+ u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode];
u64 start = bo->ggtt_node.start;
u64 offset, pte;
diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h
index a09c166dff..42705e1338 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.h
+++ b/drivers/gpu/drm/xe/xe_ggtt.h
@@ -16,6 +16,9 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt);
int xe_ggtt_init(struct xe_ggtt *ggtt);
void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix);
+int xe_ggtt_balloon(struct xe_ggtt *ggtt, u64 start, u64 size, struct drm_mm_node *node);
+void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct drm_mm_node *node);
+
int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node,
u32 size, u32 align);
int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt,
diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c
index a8a895cf4b..a61994292c 100644
--- a/drivers/gpu/drm/xe/xe_gsc.c
+++ b/drivers/gpu/drm/xe/xe_gsc.c
@@ -7,12 +7,14 @@
#include <drm/drm_managed.h>
+#include <generated/xe_wa_oob.h>
+
#include "abi/gsc_mkhi_commands_abi.h"
-#include "generated/xe_wa_oob.h"
#include "xe_bb.h"
#include "xe_bo.h"
#include "xe_device.h"
#include "xe_exec_queue.h"
+#include "xe_gsc_proxy.h"
#include "xe_gsc_submit.h"
#include "xe_gt.h"
#include "xe_gt_printk.h"
@@ -242,8 +244,31 @@ static int gsc_upload(struct xe_gsc *gsc)
if (err)
return err;
+ return 0;
+}
+
+static int gsc_upload_and_init(struct xe_gsc *gsc)
+{
+ struct xe_gt *gt = gsc_to_gt(gsc);
+ int ret;
+
+ ret = gsc_upload(gsc);
+ if (ret)
+ return ret;
+
+ xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED);
xe_gt_dbg(gt, "GSC FW async load completed\n");
+ /* HuC auth failure is not fatal */
+ if (xe_huc_is_authenticated(&gt->uc.huc, XE_HUC_AUTH_VIA_GUC))
+ xe_huc_auth(&gt->uc.huc, XE_HUC_AUTH_VIA_GSC);
+
+ ret = xe_gsc_proxy_start(gsc);
+ if (ret)
+ return ret;
+
+ xe_gt_dbg(gt, "GSC proxy init completed\n");
+
return 0;
}
@@ -252,24 +277,28 @@ static void gsc_work(struct work_struct *work)
struct xe_gsc *gsc = container_of(work, typeof(*gsc), work);
struct xe_gt *gt = gsc_to_gt(gsc);
struct xe_device *xe = gt_to_xe(gt);
+ u32 actions;
int ret;
+ spin_lock_irq(&gsc->lock);
+ actions = gsc->work_actions;
+ gsc->work_actions = 0;
+ spin_unlock_irq(&gsc->lock);
+
xe_device_mem_access_get(xe);
xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC);
- ret = gsc_upload(gsc);
- if (ret && ret != -EEXIST) {
- xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOAD_FAIL);
- goto out;
+ if (actions & GSC_ACTION_FW_LOAD) {
+ ret = gsc_upload_and_init(gsc);
+ if (ret && ret != -EEXIST)
+ xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOAD_FAIL);
+ else
+ xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_RUNNING);
}
- xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED);
-
- /* HuC auth failure is not fatal */
- if (xe_huc_is_authenticated(&gt->uc.huc, XE_HUC_AUTH_VIA_GUC))
- xe_huc_auth(&gt->uc.huc, XE_HUC_AUTH_VIA_GSC);
+ if (actions & GSC_ACTION_SW_PROXY)
+ xe_gsc_proxy_request_handler(gsc);
-out:
xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC);
xe_device_mem_access_put(xe);
}
@@ -282,6 +311,7 @@ int xe_gsc_init(struct xe_gsc *gsc)
gsc->fw.type = XE_UC_FW_TYPE_GSC;
INIT_WORK(&gsc->work, gsc_work);
+ spin_lock_init(&gsc->lock);
/* The GSC uC is only available on the media GT */
if (tile->media_gt && (gt != tile->media_gt)) {
@@ -302,6 +332,10 @@ int xe_gsc_init(struct xe_gsc *gsc)
else if (ret)
goto out;
+ ret = xe_gsc_proxy_init(gsc);
+ if (ret && ret != -ENODEV)
+ goto out;
+
return 0;
out:
@@ -356,7 +390,7 @@ int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc)
q = xe_exec_queue_create(xe, NULL,
BIT(hwe->logical_instance), 1, hwe,
EXEC_QUEUE_FLAG_KERNEL |
- EXEC_QUEUE_FLAG_PERMANENT);
+ EXEC_QUEUE_FLAG_PERMANENT, 0);
if (IS_ERR(q)) {
xe_gt_err(gt, "Failed to create queue for GSC submission\n");
err = PTR_ERR(q);
@@ -401,6 +435,10 @@ void xe_gsc_load_start(struct xe_gsc *gsc)
return;
}
+ spin_lock_irq(&gsc->lock);
+ gsc->work_actions |= GSC_ACTION_FW_LOAD;
+ spin_unlock_irq(&gsc->lock);
+
queue_work(gsc->wq, &gsc->work);
}
@@ -410,6 +448,15 @@ void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc)
flush_work(&gsc->work);
}
+/**
+ * xe_gsc_remove() - Clean up the GSC structures before driver removal
+ * @gsc: the GSC uC
+ */
+void xe_gsc_remove(struct xe_gsc *gsc)
+{
+ xe_gsc_proxy_remove(gsc);
+}
+
/*
* wa_14015076503: if the GSC FW is loaded, we need to alert it before doing a
* GSC engine reset by writing a notification bit in the GS1 register and then
diff --git a/drivers/gpu/drm/xe/xe_gsc.h b/drivers/gpu/drm/xe/xe_gsc.h
index bc1ef7f31e..c6fb32e3fd 100644
--- a/drivers/gpu/drm/xe/xe_gsc.h
+++ b/drivers/gpu/drm/xe/xe_gsc.h
@@ -14,6 +14,7 @@ int xe_gsc_init(struct xe_gsc *gsc);
int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc);
void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc);
void xe_gsc_load_start(struct xe_gsc *gsc);
+void xe_gsc_remove(struct xe_gsc *gsc);
void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep);
diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c
new file mode 100644
index 0000000000..309ef80e3b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c
@@ -0,0 +1,537 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include "xe_gsc_proxy.h"
+
+#include <linux/component.h>
+#include <linux/delay.h>
+
+#include <drm/drm_managed.h>
+#include <drm/i915_component.h>
+#include <drm/i915_gsc_proxy_mei_interface.h>
+
+#include "abi/gsc_proxy_commands_abi.h"
+#include "regs/xe_gsc_regs.h"
+#include "xe_bo.h"
+#include "xe_gsc.h"
+#include "xe_gsc_submit.h"
+#include "xe_gt.h"
+#include "xe_gt_printk.h"
+#include "xe_map.h"
+#include "xe_mmio.h"
+#include "xe_pm.h"
+
+/*
+ * GSC proxy:
+ * The GSC uC needs to communicate with the CSME to perform certain operations.
+ * Since the GSC can't perform this communication directly on platforms where it
+ * is integrated in GT, the graphics driver needs to transfer the messages from
+ * GSC to CSME and back. The proxy flow must be manually started after the GSC
+ * is loaded to signal to GSC that we're ready to handle its messages and allow
+ * it to query its init data from CSME; GSC will then trigger an HECI2 interrupt
+ * if it needs to send messages to CSME again.
+ * The proxy flow is as follow:
+ * 1 - Xe submits a request to GSC asking for the message to CSME
+ * 2 - GSC replies with the proxy header + payload for CSME
+ * 3 - Xe sends the reply from GSC as-is to CSME via the mei proxy component
+ * 4 - CSME replies with the proxy header + payload for GSC
+ * 5 - Xe submits a request to GSC with the reply from CSME
+ * 6 - GSC replies either with a new header + payload (same as step 2, so we
+ * restart from there) or with an end message.
+ */
+
+/*
+ * The component should load quite quickly in most cases, but it could take
+ * a bit. Using a very big timeout just to cover the worst case scenario
+ */
+#define GSC_PROXY_INIT_TIMEOUT_MS 20000
+
+/* shorthand define for code compactness */
+#define PROXY_HDR_SIZE (sizeof(struct xe_gsc_proxy_header))
+
+/* the protocol supports up to 32K in each direction */
+#define GSC_PROXY_BUFFER_SIZE SZ_32K
+#define GSC_PROXY_CHANNEL_SIZE (GSC_PROXY_BUFFER_SIZE * 2)
+
+static struct xe_gt *
+gsc_to_gt(struct xe_gsc *gsc)
+{
+ return container_of(gsc, struct xe_gt, uc.gsc);
+}
+
+static inline struct xe_device *kdev_to_xe(struct device *kdev)
+{
+ return dev_get_drvdata(kdev);
+}
+
+static bool gsc_proxy_init_done(struct xe_gsc *gsc)
+{
+ struct xe_gt *gt = gsc_to_gt(gsc);
+ u32 fwsts1 = xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE));
+
+ return REG_FIELD_GET(HECI1_FWSTS1_CURRENT_STATE, fwsts1) ==
+ HECI1_FWSTS1_PROXY_STATE_NORMAL;
+}
+
+static void __gsc_proxy_irq_rmw(struct xe_gsc *gsc, u32 clr, u32 set)
+{
+ struct xe_gt *gt = gsc_to_gt(gsc);
+
+ /* make sure we never accidentally write the RST bit */
+ clr |= HECI_H_CSR_RST;
+
+ xe_mmio_rmw32(gt, HECI_H_CSR(MTL_GSC_HECI2_BASE), clr, set);
+}
+
+static void gsc_proxy_irq_clear(struct xe_gsc *gsc)
+{
+ /* The status bit is cleared by writing to it */
+ __gsc_proxy_irq_rmw(gsc, 0, HECI_H_CSR_IS);
+}
+
+static void gsc_proxy_irq_toggle(struct xe_gsc *gsc, bool enabled)
+{
+ u32 set = enabled ? HECI_H_CSR_IE : 0;
+ u32 clr = enabled ? 0 : HECI_H_CSR_IE;
+
+ __gsc_proxy_irq_rmw(gsc, clr, set);
+}
+
+static int proxy_send_to_csme(struct xe_gsc *gsc, u32 size)
+{
+ struct xe_gt *gt = gsc_to_gt(gsc);
+ struct i915_gsc_proxy_component *comp = gsc->proxy.component;
+ int ret;
+
+ ret = comp->ops->send(comp->mei_dev, gsc->proxy.to_csme, size);
+ if (ret < 0) {
+ xe_gt_err(gt, "Failed to send CSME proxy message\n");
+ return ret;
+ }
+
+ ret = comp->ops->recv(comp->mei_dev, gsc->proxy.from_csme, GSC_PROXY_BUFFER_SIZE);
+ if (ret < 0) {
+ xe_gt_err(gt, "Failed to receive CSME proxy message\n");
+ return ret;
+ }
+
+ return ret;
+}
+
+static int proxy_send_to_gsc(struct xe_gsc *gsc, u32 size)
+{
+ struct xe_gt *gt = gsc_to_gt(gsc);
+ u64 addr_in = xe_bo_ggtt_addr(gsc->proxy.bo);
+ u64 addr_out = addr_in + GSC_PROXY_BUFFER_SIZE;
+ int err;
+
+ /* the message must contain at least the gsc and proxy headers */
+ if (size > GSC_PROXY_BUFFER_SIZE) {
+ xe_gt_err(gt, "Invalid GSC proxy message size: %u\n", size);
+ return -EINVAL;
+ }
+
+ err = xe_gsc_pkt_submit_kernel(gsc, addr_in, size,
+ addr_out, GSC_PROXY_BUFFER_SIZE);
+ if (err) {
+ xe_gt_err(gt, "Failed to submit gsc proxy rq (%pe)\n", ERR_PTR(err));
+ return err;
+ }
+
+ return 0;
+}
+
+static int validate_proxy_header(struct xe_gsc_proxy_header *header,
+ u32 source, u32 dest, u32 max_size)
+{
+ u32 type = FIELD_GET(GSC_PROXY_TYPE, header->hdr);
+ u32 length = FIELD_GET(GSC_PROXY_PAYLOAD_LENGTH, header->hdr);
+
+ if (header->destination != dest || header->source != source)
+ return -ENOEXEC;
+
+ if (length + PROXY_HDR_SIZE > max_size)
+ return -E2BIG;
+
+ switch (type) {
+ case GSC_PROXY_MSG_TYPE_PROXY_PAYLOAD:
+ if (length > 0)
+ break;
+ fallthrough;
+ case GSC_PROXY_MSG_TYPE_PROXY_INVALID:
+ return -EIO;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+#define proxy_header_wr(xe_, map_, offset_, field_, val_) \
+ xe_map_wr_field(xe_, map_, offset_, struct xe_gsc_proxy_header, field_, val_)
+
+#define proxy_header_rd(xe_, map_, offset_, field_) \
+ xe_map_rd_field(xe_, map_, offset_, struct xe_gsc_proxy_header, field_)
+
+static u32 emit_proxy_header(struct xe_device *xe, struct iosys_map *map, u32 offset)
+{
+ xe_map_memset(xe, map, offset, 0, PROXY_HDR_SIZE);
+
+ proxy_header_wr(xe, map, offset, hdr,
+ FIELD_PREP(GSC_PROXY_TYPE, GSC_PROXY_MSG_TYPE_PROXY_QUERY) |
+ FIELD_PREP(GSC_PROXY_PAYLOAD_LENGTH, 0));
+
+ proxy_header_wr(xe, map, offset, source, GSC_PROXY_ADDRESSING_KMD);
+ proxy_header_wr(xe, map, offset, destination, GSC_PROXY_ADDRESSING_GSC);
+ proxy_header_wr(xe, map, offset, status, 0);
+
+ return offset + PROXY_HDR_SIZE;
+}
+
+static int proxy_query(struct xe_gsc *gsc)
+{
+ struct xe_gt *gt = gsc_to_gt(gsc);
+ struct xe_device *xe = gt_to_xe(gt);
+ struct xe_gsc_proxy_header *to_csme_hdr = gsc->proxy.to_csme;
+ void *to_csme_payload = gsc->proxy.to_csme + PROXY_HDR_SIZE;
+ u32 wr_offset;
+ u32 reply_offset;
+ u32 size;
+ int ret;
+
+ wr_offset = xe_gsc_emit_header(xe, &gsc->proxy.to_gsc, 0,
+ HECI_MEADDRESS_PROXY, 0, PROXY_HDR_SIZE);
+ wr_offset = emit_proxy_header(xe, &gsc->proxy.to_gsc, wr_offset);
+
+ size = wr_offset;
+
+ while (1) {
+ /*
+ * Poison the GSC response header space to make sure we don't
+ * read a stale reply.
+ */
+ xe_gsc_poison_header(xe, &gsc->proxy.from_gsc, 0);
+
+ /* send proxy message to GSC */
+ ret = proxy_send_to_gsc(gsc, size);
+ if (ret)
+ goto proxy_error;
+
+ /* check the reply from GSC */
+ ret = xe_gsc_read_out_header(xe, &gsc->proxy.from_gsc, 0,
+ PROXY_HDR_SIZE, &reply_offset);
+ if (ret) {
+ xe_gt_err(gt, "Invalid gsc header in proxy reply (%pe)\n",
+ ERR_PTR(ret));
+ goto proxy_error;
+ }
+
+ /* copy the proxy header reply from GSC */
+ xe_map_memcpy_from(xe, to_csme_hdr, &gsc->proxy.from_gsc,
+ reply_offset, PROXY_HDR_SIZE);
+
+ /* stop if this was the last message */
+ if (FIELD_GET(GSC_PROXY_TYPE, to_csme_hdr->hdr) == GSC_PROXY_MSG_TYPE_PROXY_END)
+ break;
+
+ /* make sure the GSC-to-CSME proxy header is sane */
+ ret = validate_proxy_header(to_csme_hdr,
+ GSC_PROXY_ADDRESSING_GSC,
+ GSC_PROXY_ADDRESSING_CSME,
+ GSC_PROXY_BUFFER_SIZE - reply_offset);
+ if (ret) {
+ xe_gt_err(gt, "invalid GSC to CSME proxy header! (%pe)\n",
+ ERR_PTR(ret));
+ goto proxy_error;
+ }
+
+ /* copy the rest of the message */
+ size = FIELD_GET(GSC_PROXY_PAYLOAD_LENGTH, to_csme_hdr->hdr);
+ xe_map_memcpy_from(xe, to_csme_payload, &gsc->proxy.from_gsc,
+ reply_offset + PROXY_HDR_SIZE, size);
+
+ /* send the GSC message to the CSME */
+ ret = proxy_send_to_csme(gsc, size + PROXY_HDR_SIZE);
+ if (ret < 0)
+ goto proxy_error;
+
+ /* reply size from CSME, including the proxy header */
+ size = ret;
+ if (size < PROXY_HDR_SIZE) {
+ xe_gt_err(gt, "CSME to GSC proxy msg too small: 0x%x\n", size);
+ ret = -EPROTO;
+ goto proxy_error;
+ }
+
+ /* make sure the CSME-to-GSC proxy header is sane */
+ ret = validate_proxy_header(gsc->proxy.from_csme,
+ GSC_PROXY_ADDRESSING_CSME,
+ GSC_PROXY_ADDRESSING_GSC,
+ GSC_PROXY_BUFFER_SIZE - reply_offset);
+ if (ret) {
+ xe_gt_err(gt, "invalid CSME to GSC proxy header! %d\n", ret);
+ goto proxy_error;
+ }
+
+ /* Emit a new header for sending the reply to the GSC */
+ wr_offset = xe_gsc_emit_header(xe, &gsc->proxy.to_gsc, 0,
+ HECI_MEADDRESS_PROXY, 0, size);
+
+ /* copy the CSME reply and update the total msg size to include the GSC header */
+ xe_map_memcpy_to(xe, &gsc->proxy.to_gsc, wr_offset, gsc->proxy.from_csme, size);
+
+ size += wr_offset;
+ }
+
+proxy_error:
+ return ret < 0 ? ret : 0;
+}
+
+int xe_gsc_proxy_request_handler(struct xe_gsc *gsc)
+{
+ struct xe_gt *gt = gsc_to_gt(gsc);
+ int slept;
+ int err;
+
+ if (!gsc->proxy.component_added)
+ return -ENODEV;
+
+ /* when GSC is loaded, we can queue this before the component is bound */
+ for (slept = 0; slept < GSC_PROXY_INIT_TIMEOUT_MS; slept += 100) {
+ if (gsc->proxy.component)
+ break;
+
+ msleep(100);
+ }
+
+ mutex_lock(&gsc->proxy.mutex);
+ if (!gsc->proxy.component) {
+ xe_gt_err(gt, "GSC proxy component not bound!\n");
+ err = -EIO;
+ } else {
+ /*
+ * clear the pending interrupt and allow new proxy requests to
+ * be generated while we handle the current one
+ */
+ gsc_proxy_irq_clear(gsc);
+ err = proxy_query(gsc);
+ }
+ mutex_unlock(&gsc->proxy.mutex);
+ return err;
+}
+
+void xe_gsc_proxy_irq_handler(struct xe_gsc *gsc, u32 iir)
+{
+ struct xe_gt *gt = gsc_to_gt(gsc);
+
+ if (unlikely(!iir))
+ return;
+
+ if (!gsc->proxy.component) {
+ xe_gt_err(gt, "GSC proxy irq received without the component being bound!\n");
+ return;
+ }
+
+ spin_lock(&gsc->lock);
+ gsc->work_actions |= GSC_ACTION_SW_PROXY;
+ spin_unlock(&gsc->lock);
+
+ queue_work(gsc->wq, &gsc->work);
+}
+
+static int xe_gsc_proxy_component_bind(struct device *xe_kdev,
+ struct device *mei_kdev, void *data)
+{
+ struct xe_device *xe = kdev_to_xe(xe_kdev);
+ struct xe_gt *gt = xe->tiles[0].media_gt;
+ struct xe_gsc *gsc = &gt->uc.gsc;
+
+ mutex_lock(&gsc->proxy.mutex);
+ gsc->proxy.component = data;
+ gsc->proxy.component->mei_dev = mei_kdev;
+ mutex_unlock(&gsc->proxy.mutex);
+
+ return 0;
+}
+
+static void xe_gsc_proxy_component_unbind(struct device *xe_kdev,
+ struct device *mei_kdev, void *data)
+{
+ struct xe_device *xe = kdev_to_xe(xe_kdev);
+ struct xe_gt *gt = xe->tiles[0].media_gt;
+ struct xe_gsc *gsc = &gt->uc.gsc;
+
+ xe_gsc_wait_for_worker_completion(gsc);
+
+ mutex_lock(&gsc->proxy.mutex);
+ gsc->proxy.component = NULL;
+ mutex_unlock(&gsc->proxy.mutex);
+}
+
+static const struct component_ops xe_gsc_proxy_component_ops = {
+ .bind = xe_gsc_proxy_component_bind,
+ .unbind = xe_gsc_proxy_component_unbind,
+};
+
+static void proxy_channel_free(struct drm_device *drm, void *arg)
+{
+ struct xe_gsc *gsc = arg;
+
+ if (!gsc->proxy.bo)
+ return;
+
+ if (gsc->proxy.to_csme) {
+ kfree(gsc->proxy.to_csme);
+ gsc->proxy.to_csme = NULL;
+ gsc->proxy.from_csme = NULL;
+ }
+
+ if (gsc->proxy.bo) {
+ iosys_map_clear(&gsc->proxy.to_gsc);
+ iosys_map_clear(&gsc->proxy.from_gsc);
+ xe_bo_unpin_map_no_vm(gsc->proxy.bo);
+ gsc->proxy.bo = NULL;
+ }
+}
+
+static int proxy_channel_alloc(struct xe_gsc *gsc)
+{
+ struct xe_gt *gt = gsc_to_gt(gsc);
+ struct xe_tile *tile = gt_to_tile(gt);
+ struct xe_device *xe = gt_to_xe(gt);
+ struct xe_bo *bo;
+ void *csme;
+ int err;
+
+ csme = kzalloc(GSC_PROXY_CHANNEL_SIZE, GFP_KERNEL);
+ if (!csme)
+ return -ENOMEM;
+
+ bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_PROXY_CHANNEL_SIZE,
+ ttm_bo_type_kernel,
+ XE_BO_CREATE_SYSTEM_BIT |
+ XE_BO_CREATE_GGTT_BIT);
+ if (IS_ERR(bo)) {
+ kfree(csme);
+ return PTR_ERR(bo);
+ }
+
+ gsc->proxy.bo = bo;
+ gsc->proxy.to_gsc = IOSYS_MAP_INIT_OFFSET(&bo->vmap, 0);
+ gsc->proxy.from_gsc = IOSYS_MAP_INIT_OFFSET(&bo->vmap, GSC_PROXY_BUFFER_SIZE);
+ gsc->proxy.to_csme = csme;
+ gsc->proxy.from_csme = csme + GSC_PROXY_BUFFER_SIZE;
+
+ err = drmm_add_action_or_reset(&xe->drm, proxy_channel_free, gsc);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+/**
+ * xe_gsc_proxy_init() - init objects and MEI component required by GSC proxy
+ * @gsc: the GSC uC
+ *
+ * Return: 0 if the initialization was successful, a negative errno otherwise.
+ */
+int xe_gsc_proxy_init(struct xe_gsc *gsc)
+{
+ int err;
+ struct xe_gt *gt = gsc_to_gt(gsc);
+ struct xe_tile *tile = gt_to_tile(gt);
+ struct xe_device *xe = tile_to_xe(tile);
+
+ mutex_init(&gsc->proxy.mutex);
+
+ if (!IS_ENABLED(CONFIG_INTEL_MEI_GSC_PROXY)) {
+ xe_gt_info(gt, "can't init GSC proxy due to missing mei component\n");
+ return -ENODEV;
+ }
+
+ /* no multi-tile devices with this feature yet */
+ if (tile->id > 0) {
+ xe_gt_err(gt, "unexpected GSC proxy init on tile %u\n", tile->id);
+ return -EINVAL;
+ }
+
+ err = proxy_channel_alloc(gsc);
+ if (err)
+ return err;
+
+ err = component_add_typed(xe->drm.dev, &xe_gsc_proxy_component_ops,
+ I915_COMPONENT_GSC_PROXY);
+ if (err < 0) {
+ xe_gt_err(gt, "Failed to add GSC_PROXY component (%pe)\n", ERR_PTR(err));
+ return err;
+ }
+
+ gsc->proxy.component_added = true;
+
+ /* the component must be removed before unload, so can't use drmm for cleanup */
+
+ return 0;
+}
+
+/**
+ * xe_gsc_proxy_remove() - remove the GSC proxy MEI component
+ * @gsc: the GSC uC
+ */
+void xe_gsc_proxy_remove(struct xe_gsc *gsc)
+{
+ struct xe_gt *gt = gsc_to_gt(gsc);
+ struct xe_device *xe = gt_to_xe(gt);
+ int err = 0;
+
+ if (!gsc->proxy.component_added)
+ return;
+
+ /* disable HECI2 IRQs */
+ xe_pm_runtime_get(xe);
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC);
+ if (err)
+ xe_gt_err(gt, "failed to get forcewake to disable GSC interrupts\n");
+
+ /* try do disable irq even if forcewake failed */
+ gsc_proxy_irq_toggle(gsc, false);
+
+ if (!err)
+ xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC);
+ xe_pm_runtime_put(xe);
+
+ xe_gsc_wait_for_worker_completion(gsc);
+
+ component_del(xe->drm.dev, &xe_gsc_proxy_component_ops);
+ gsc->proxy.component_added = false;
+}
+
+/**
+ * xe_gsc_proxy_start() - start the proxy by submitting the first request
+ * @gsc: the GSC uC
+ *
+ * Return: 0 if the proxy are now enabled, a negative errno otherwise.
+ */
+int xe_gsc_proxy_start(struct xe_gsc *gsc)
+{
+ int err;
+
+ /* enable the proxy interrupt in the GSC shim layer */
+ gsc_proxy_irq_toggle(gsc, true);
+
+ /*
+ * The handling of the first proxy request must be manually triggered to
+ * notify the GSC that we're ready to support the proxy flow.
+ */
+ err = xe_gsc_proxy_request_handler(gsc);
+ if (err)
+ return err;
+
+ if (!gsc_proxy_init_done(gsc)) {
+ xe_gt_err(gsc_to_gt(gsc), "GSC FW reports proxy init not completed\n");
+ return -EIO;
+ }
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.h b/drivers/gpu/drm/xe/xe_gsc_proxy.h
new file mode 100644
index 0000000000..908f9441f0
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gsc_proxy.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GSC_PROXY_H_
+#define _XE_GSC_PROXY_H_
+
+#include <linux/types.h>
+
+struct xe_gsc;
+
+int xe_gsc_proxy_init(struct xe_gsc *gsc);
+void xe_gsc_proxy_remove(struct xe_gsc *gsc);
+int xe_gsc_proxy_start(struct xe_gsc *gsc);
+
+int xe_gsc_proxy_request_handler(struct xe_gsc *gsc);
+void xe_gsc_proxy_irq_handler(struct xe_gsc *gsc, u32 iir);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gsc_submit.c b/drivers/gpu/drm/xe/xe_gsc_submit.c
index 8c5381e591..348994b271 100644
--- a/drivers/gpu/drm/xe/xe_gsc_submit.c
+++ b/drivers/gpu/drm/xe/xe_gsc_submit.c
@@ -5,6 +5,8 @@
#include "xe_gsc_submit.h"
+#include <linux/poison.h>
+
#include "abi/gsc_command_header_abi.h"
#include "xe_bb.h"
#include "xe_exec_queue.h"
@@ -69,6 +71,17 @@ u32 xe_gsc_emit_header(struct xe_device *xe, struct iosys_map *map, u32 offset,
};
/**
+ * xe_gsc_poison_header - poison the MTL GSC header in memory
+ * @xe: the Xe device
+ * @map: the iosys map to write to
+ * @offset: offset from the start of the map at which the header resides
+ */
+void xe_gsc_poison_header(struct xe_device *xe, struct iosys_map *map, u32 offset)
+{
+ xe_map_memset(xe, map, offset, POISON_FREE, GSC_HDR_SIZE);
+};
+
+/**
* xe_gsc_check_and_update_pending - check the pending bit and update the input
* header with the retry handle from the output header
* @xe: the Xe device
@@ -112,11 +125,18 @@ int xe_gsc_read_out_header(struct xe_device *xe,
{
u32 marker = mtl_gsc_header_rd(xe, map, offset, validity_marker);
u32 size = mtl_gsc_header_rd(xe, map, offset, message_size);
+ u32 status = mtl_gsc_header_rd(xe, map, offset, status);
u32 payload_size = size - GSC_HDR_SIZE;
if (marker != GSC_HECI_VALIDITY_MARKER)
return -EPROTO;
+ if (status != 0) {
+ drm_err(&xe->drm, "GSC header readout indicates error: %d\n",
+ status);
+ return -EINVAL;
+ }
+
if (size < GSC_HDR_SIZE || payload_size < min_payload_size)
return -ENODATA;
diff --git a/drivers/gpu/drm/xe/xe_gsc_submit.h b/drivers/gpu/drm/xe/xe_gsc_submit.h
index 0801da5d44..1939855031 100644
--- a/drivers/gpu/drm/xe/xe_gsc_submit.h
+++ b/drivers/gpu/drm/xe/xe_gsc_submit.h
@@ -14,6 +14,7 @@ struct xe_gsc;
u32 xe_gsc_emit_header(struct xe_device *xe, struct iosys_map *map, u32 offset,
u8 heci_client_id, u64 host_session_id, u32 payload_size);
+void xe_gsc_poison_header(struct xe_device *xe, struct iosys_map *map, u32 offset);
bool xe_gsc_check_and_update_pending(struct xe_device *xe,
struct iosys_map *in, u32 offset_in,
diff --git a/drivers/gpu/drm/xe/xe_gsc_types.h b/drivers/gpu/drm/xe/xe_gsc_types.h
index 57fefd66a7..138d8cc0f1 100644
--- a/drivers/gpu/drm/xe/xe_gsc_types.h
+++ b/drivers/gpu/drm/xe/xe_gsc_types.h
@@ -6,12 +6,17 @@
#ifndef _XE_GSC_TYPES_H_
#define _XE_GSC_TYPES_H_
+#include <linux/iosys-map.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
#include <linux/workqueue.h>
#include "xe_uc_fw_types.h"
struct xe_bo;
struct xe_exec_queue;
+struct i915_gsc_proxy_component;
/**
* struct xe_gsc - GSC
@@ -34,6 +39,34 @@ struct xe_gsc {
/** @work: delayed load and proxy handling work */
struct work_struct work;
+
+ /** @lock: protects access to the work_actions mask */
+ spinlock_t lock;
+
+ /** @work_actions: mask of actions to be performed in the work */
+ u32 work_actions;
+#define GSC_ACTION_FW_LOAD BIT(0)
+#define GSC_ACTION_SW_PROXY BIT(1)
+
+ /** @proxy: sub-structure containing the SW proxy-related variables */
+ struct {
+ /** @proxy.component: struct for communication with mei component */
+ struct i915_gsc_proxy_component *component;
+ /** @proxy.mutex: protects the component binding and usage */
+ struct mutex mutex;
+ /** @proxy.component_added: whether the component has been added */
+ bool component_added;
+ /** @proxy.bo: object to store message to and from the GSC */
+ struct xe_bo *bo;
+ /** @proxy.to_gsc: map of the memory used to send messages to the GSC */
+ struct iosys_map to_gsc;
+ /** @proxy.from_gsc: map of the memory used to recv messages from the GSC */
+ struct iosys_map from_gsc;
+ /** @proxy.to_csme: pointer to the memory used to send messages to CSME */
+ void *to_csme;
+ /** @proxy.from_csme: pointer to the memory used to recv messages from CSME */
+ void *from_csme;
+ } proxy;
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index d23ee1397c..f9705430ad 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -78,6 +78,19 @@ void xe_gt_sanitize(struct xe_gt *gt)
gt->uc.guc.submission_state.enabled = false;
}
+/**
+ * xe_gt_remove() - Clean up the GT structures before driver removal
+ * @gt: the GT object
+ *
+ * This function should only act on objects/structures that must be cleaned
+ * before the driver removal callback is complete and therefore can't be
+ * deferred to a drmm action.
+ */
+void xe_gt_remove(struct xe_gt *gt)
+{
+ xe_uc_remove(&gt->uc);
+}
+
static void gt_fini(struct drm_device *drm, void *arg)
{
struct xe_gt *gt = arg;
@@ -235,7 +248,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt)
return -ENOMEM;
q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance), 1,
- hwe, EXEC_QUEUE_FLAG_KERNEL);
+ hwe, EXEC_QUEUE_FLAG_KERNEL, 0);
if (IS_ERR(q)) {
err = PTR_ERR(q);
xe_gt_err(gt, "hwe %s: xe_exec_queue_create failed (%pe)\n",
@@ -252,7 +265,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt)
}
nop_q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance),
- 1, hwe, EXEC_QUEUE_FLAG_KERNEL);
+ 1, hwe, EXEC_QUEUE_FLAG_KERNEL, 0);
if (IS_ERR(nop_q)) {
err = PTR_ERR(nop_q);
xe_gt_err(gt, "hwe %s: nop xe_exec_queue_create failed (%pe)\n",
@@ -301,9 +314,6 @@ int xe_gt_init_early(struct xe_gt *gt)
if (err)
return err;
- xe_gt_topology_init(gt);
- xe_gt_mcr_init(gt);
-
err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
if (err)
return err;
@@ -327,7 +337,7 @@ static void dump_pat_on_error(struct xe_gt *gt)
char prefix[32];
snprintf(prefix, sizeof(prefix), "[GT%u Error]", gt->info.id);
- p = drm_debug_printer(prefix);
+ p = drm_dbg_printer(&gt_to_xe(gt)->drm, DRM_UT_DRIVER, prefix);
xe_pat_dump(gt, &p);
}
@@ -341,8 +351,6 @@ static int gt_fw_domain_init(struct xe_gt *gt)
if (err)
goto err_hw_fence_irq;
- xe_pat_init(gt);
-
if (!xe_gt_is_media_type(gt)) {
err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt);
if (err)
@@ -351,22 +359,8 @@ static int gt_fw_domain_init(struct xe_gt *gt)
xe_lmtt_init(&gt_to_tile(gt)->sriov.pf.lmtt);
}
- err = xe_uc_init(&gt->uc);
- if (err)
- goto err_force_wake;
-
- /* Raise GT freq to speed up HuC/GuC load */
- xe_guc_pc_init_early(&gt->uc.guc.pc);
-
- err = xe_uc_init_hwconfig(&gt->uc);
- if (err)
- goto err_force_wake;
-
xe_gt_idle_sysfs_init(&gt->gtidle);
- /* XXX: Fake that we pull the engine mask from hwconfig blob */
- gt->info.engine_mask = gt->info.__engine_mask;
-
/* Enable per hw engine IRQs */
xe_irq_enable_hwe(gt);
@@ -388,6 +382,12 @@ static int gt_fw_domain_init(struct xe_gt *gt)
if (err)
goto err_force_wake;
+ /*
+ * Stash hardware-reported version. Since this register does not exist
+ * on pre-MTL platforms, reading it there will (correctly) return 0.
+ */
+ gt->info.gmdid = xe_mmio_read32(gt, GMD_ID);
+
err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
XE_WARN_ON(err);
xe_device_mem_access_put(gt_to_xe(gt));
@@ -430,10 +430,6 @@ static int all_fw_domain_init(struct xe_gt *gt)
if (err)
goto err_force_wake;
- err = xe_uc_init_post_hwconfig(&gt->uc);
- if (err)
- goto err_force_wake;
-
if (!xe_gt_is_media_type(gt)) {
/*
* USM has its only SA pool to non-block behind user operations
@@ -460,6 +456,10 @@ static int all_fw_domain_init(struct xe_gt *gt)
}
}
+ err = xe_uc_init_post_hwconfig(&gt->uc);
+ if (err)
+ goto err_force_wake;
+
err = xe_uc_init_hw(&gt->uc);
if (err)
goto err_force_wake;
@@ -489,6 +489,42 @@ err_hw_fence_irq:
return err;
}
+/*
+ * Initialize enough GT to be able to load GuC in order to obtain hwconfig and
+ * enable CTB communication.
+ */
+int xe_gt_init_hwconfig(struct xe_gt *gt)
+{
+ int err;
+
+ xe_device_mem_access_get(gt_to_xe(gt));
+ err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (err)
+ goto out;
+
+ xe_gt_topology_init(gt);
+ xe_gt_mcr_init(gt);
+ xe_pat_init(gt);
+
+ err = xe_uc_init(&gt->uc);
+ if (err)
+ goto out_fw;
+
+ err = xe_uc_init_hwconfig(&gt->uc);
+ if (err)
+ goto out_fw;
+
+ /* XXX: Fake that we pull the engine mask from hwconfig blob */
+ gt->info.engine_mask = gt->info.__engine_mask;
+
+out_fw:
+ xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+out:
+ xe_device_mem_access_put(gt_to_xe(gt));
+
+ return err;
+}
+
int xe_gt_init(struct xe_gt *gt)
{
int err;
@@ -624,12 +660,12 @@ static int gt_reset(struct xe_gt *gt)
if (err)
goto err_out;
+ xe_gt_tlb_invalidation_reset(gt);
+
err = do_gt_reset(gt);
if (err)
goto err_out;
- xe_gt_tlb_invalidation_reset(gt);
-
err = do_gt_restart(gt);
if (err)
goto err_out;
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index 4486e083f5..ed6ea8057e 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -33,6 +33,7 @@ static inline bool xe_fault_inject_gt_reset(void)
#endif
struct xe_gt *xe_gt_alloc(struct xe_tile *tile);
+int xe_gt_init_hwconfig(struct xe_gt *gt);
int xe_gt_init_early(struct xe_gt *gt);
int xe_gt_init(struct xe_gt *gt);
int xe_gt_record_default_lrcs(struct xe_gt *gt);
@@ -41,6 +42,7 @@ int xe_gt_suspend(struct xe_gt *gt);
int xe_gt_resume(struct xe_gt *gt);
void xe_gt_reset_async(struct xe_gt *gt);
void xe_gt_sanitize(struct xe_gt *gt);
+void xe_gt_remove(struct xe_gt *gt);
/**
* xe_gt_any_hw_engine_by_reset_domain - scan the list of engines and return the
diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c
index 9fcae65b64..e7a39ad7ad 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle.c
+++ b/drivers/gpu/drm/xe/xe_gt_idle.c
@@ -126,6 +126,13 @@ static const struct attribute *gt_idle_attrs[] = {
static void gt_idle_sysfs_fini(struct drm_device *drm, void *arg)
{
struct kobject *kobj = arg;
+ struct xe_gt *gt = kobj_to_gt(kobj->parent);
+
+ if (gt_to_xe(gt)->info.skip_guc_pc) {
+ XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FW_GT));
+ xe_gt_idle_disable_c6(gt);
+ xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+ }
sysfs_remove_files(kobj, gt_idle_attrs);
kobject_put(kobj);
@@ -184,7 +191,7 @@ void xe_gt_idle_enable_c6(struct xe_gt *gt)
void xe_gt_idle_disable_c6(struct xe_gt *gt)
{
xe_device_assert_mem_access(gt_to_xe(gt));
- xe_force_wake_assert_held(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
xe_mmio_write32(gt, PG_ENABLE, 0);
xe_mmio_write32(gt, RC_CONTROL, 0);
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index 8546cd3cc5..a7ab9ba645 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -10,6 +10,7 @@
#include "xe_gt_topology.h"
#include "xe_gt_types.h"
#include "xe_mmio.h"
+#include "xe_sriov.h"
/**
* DOC: GT Multicast/Replicated (MCR) Register Support
@@ -38,6 +39,8 @@
* ``init_steering_*()`` functions is to apply the platform-specific rules for
* each MCR register type to identify a steering target that will select a
* non-terminated instance.
+ *
+ * MCR registers are not available on Virtual Function (VF).
*/
#define STEER_SEMAPHORE XE_REG(0xFD0)
@@ -352,6 +355,9 @@ void xe_gt_mcr_init(struct xe_gt *gt)
BUILD_BUG_ON(IMPLICIT_STEERING + 1 != NUM_STEERING_TYPES);
BUILD_BUG_ON(ARRAY_SIZE(xe_steering_types) != NUM_STEERING_TYPES);
+ if (IS_SRIOV_VF(xe))
+ return;
+
spin_lock_init(&gt->mcr_lock);
if (gt->info.type == XE_GT_TYPE_MEDIA) {
@@ -405,6 +411,9 @@ void xe_gt_mcr_set_implicit_defaults(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
+ if (IS_SRIOV_VF(xe))
+ return;
+
if (xe->info.platform == XE_DG2) {
u32 steer_val = REG_FIELD_PREP(MCR_SLICE_MASK, 0) |
REG_FIELD_PREP(MCR_SUBSLICE_MASK, 2);
@@ -588,6 +597,8 @@ u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, struct xe_reg_mcr reg_mcr)
u32 val;
bool steer;
+ xe_gt_assert(gt, !IS_SRIOV_VF(gt_to_xe(gt)));
+
steer = xe_gt_mcr_get_nonterminated_steering(gt, reg_mcr,
&group, &instance);
@@ -619,6 +630,8 @@ u32 xe_gt_mcr_unicast_read(struct xe_gt *gt,
{
u32 val;
+ xe_gt_assert(gt, !IS_SRIOV_VF(gt_to_xe(gt)));
+
mcr_lock(gt);
val = rw_with_mcr_steering(gt, reg_mcr, MCR_OP_READ, group, instance, 0);
mcr_unlock(gt);
@@ -640,6 +653,8 @@ u32 xe_gt_mcr_unicast_read(struct xe_gt *gt,
void xe_gt_mcr_unicast_write(struct xe_gt *gt, struct xe_reg_mcr reg_mcr,
u32 value, int group, int instance)
{
+ xe_gt_assert(gt, !IS_SRIOV_VF(gt_to_xe(gt)));
+
mcr_lock(gt);
rw_with_mcr_steering(gt, reg_mcr, MCR_OP_WRITE, group, instance, value);
mcr_unlock(gt);
@@ -658,6 +673,8 @@ void xe_gt_mcr_multicast_write(struct xe_gt *gt, struct xe_reg_mcr reg_mcr,
{
struct xe_reg reg = to_xe_reg(reg_mcr);
+ xe_gt_assert(gt, !IS_SRIOV_VF(gt_to_xe(gt)));
+
/*
* Synchronize with any unicast operations. Once we have exclusive
* access, the MULTICAST bit should already be set, so there's no need
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index ab8536c4fd..fa9e9853c5 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -100,10 +100,9 @@ static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma,
{
struct xe_bo *bo = xe_vma_bo(vma);
struct xe_vm *vm = xe_vma_vm(vma);
- unsigned int num_shared = 2; /* slots for bind + move */
int err;
- err = xe_vm_prepare_vma(exec, vma, num_shared);
+ err = xe_vm_lock_vma(exec, vma);
if (err)
return err;
@@ -287,9 +286,9 @@ static bool get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf)
bool ret = false;
spin_lock_irq(&pf_queue->lock);
- if (pf_queue->head != pf_queue->tail) {
+ if (pf_queue->tail != pf_queue->head) {
desc = (const struct xe_guc_pagefault_desc *)
- (pf_queue->data + pf_queue->head);
+ (pf_queue->data + pf_queue->tail);
pf->fault_level = FIELD_GET(PFD_FAULT_LEVEL, desc->dw0);
pf->trva_fault = FIELD_GET(XE2_PFD_TRVA_FAULT, desc->dw0);
@@ -307,7 +306,7 @@ static bool get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf)
pf->page_addr |= FIELD_GET(PFD_VIRTUAL_ADDR_LO, desc->dw2) <<
PFD_VIRTUAL_ADDR_LO_SHIFT;
- pf_queue->head = (pf_queue->head + PF_MSG_LEN_DW) %
+ pf_queue->tail = (pf_queue->tail + PF_MSG_LEN_DW) %
PF_QUEUE_NUM_DW;
ret = true;
}
@@ -320,7 +319,7 @@ static bool pf_queue_full(struct pf_queue *pf_queue)
{
lockdep_assert_held(&pf_queue->lock);
- return CIRC_SPACE(pf_queue->tail, pf_queue->head, PF_QUEUE_NUM_DW) <=
+ return CIRC_SPACE(pf_queue->head, pf_queue->tail, PF_QUEUE_NUM_DW) <=
PF_MSG_LEN_DW;
}
@@ -333,6 +332,11 @@ int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len)
u32 asid;
bool full;
+ /*
+ * The below logic doesn't work unless PF_QUEUE_NUM_DW % PF_MSG_LEN_DW == 0
+ */
+ BUILD_BUG_ON(PF_QUEUE_NUM_DW % PF_MSG_LEN_DW);
+
if (unlikely(len != PF_MSG_LEN_DW))
return -EPROTO;
@@ -342,8 +346,8 @@ int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len)
spin_lock_irqsave(&pf_queue->lock, flags);
full = pf_queue_full(pf_queue);
if (!full) {
- memcpy(pf_queue->data + pf_queue->tail, msg, len * sizeof(u32));
- pf_queue->tail = (pf_queue->tail + len) % PF_QUEUE_NUM_DW;
+ memcpy(pf_queue->data + pf_queue->head, msg, len * sizeof(u32));
+ pf_queue->head = (pf_queue->head + len) % PF_QUEUE_NUM_DW;
queue_work(gt->usm.pf_wq, &pf_queue->worker);
} else {
drm_warn(&xe->drm, "PF Queue full, shouldn't be possible");
@@ -389,7 +393,7 @@ static void pf_queue_work_func(struct work_struct *w)
send_pagefault_reply(&gt->uc.guc, &reply);
if (time_after(jiffies, threshold) &&
- pf_queue->head != pf_queue->tail) {
+ pf_queue->tail != pf_queue->head) {
queue_work(gt->usm.pf_wq, w);
break;
}
@@ -564,9 +568,9 @@ static bool get_acc(struct acc_queue *acc_queue, struct acc *acc)
bool ret = false;
spin_lock(&acc_queue->lock);
- if (acc_queue->head != acc_queue->tail) {
+ if (acc_queue->tail != acc_queue->head) {
desc = (const struct xe_guc_acc_desc *)
- (acc_queue->data + acc_queue->head);
+ (acc_queue->data + acc_queue->tail);
acc->granularity = FIELD_GET(ACC_GRANULARITY, desc->dw2);
acc->sub_granularity = FIELD_GET(ACC_SUBG_HI, desc->dw1) << 31 |
@@ -579,7 +583,7 @@ static bool get_acc(struct acc_queue *acc_queue, struct acc *acc)
acc->va_range_base = make_u64(desc->dw3 & ACC_VIRTUAL_ADDR_RANGE_HI,
desc->dw2 & ACC_VIRTUAL_ADDR_RANGE_LO);
- acc_queue->head = (acc_queue->head + ACC_MSG_LEN_DW) %
+ acc_queue->tail = (acc_queue->tail + ACC_MSG_LEN_DW) %
ACC_QUEUE_NUM_DW;
ret = true;
}
@@ -607,7 +611,7 @@ static void acc_queue_work_func(struct work_struct *w)
}
if (time_after(jiffies, threshold) &&
- acc_queue->head != acc_queue->tail) {
+ acc_queue->tail != acc_queue->head) {
queue_work(gt->usm.acc_wq, w);
break;
}
@@ -618,7 +622,7 @@ static bool acc_queue_full(struct acc_queue *acc_queue)
{
lockdep_assert_held(&acc_queue->lock);
- return CIRC_SPACE(acc_queue->tail, acc_queue->head, ACC_QUEUE_NUM_DW) <=
+ return CIRC_SPACE(acc_queue->head, acc_queue->tail, ACC_QUEUE_NUM_DW) <=
ACC_MSG_LEN_DW;
}
@@ -629,6 +633,11 @@ int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len)
u32 asid;
bool full;
+ /*
+ * The below logic doesn't work unless ACC_QUEUE_NUM_DW % ACC_MSG_LEN_DW == 0
+ */
+ BUILD_BUG_ON(ACC_QUEUE_NUM_DW % ACC_MSG_LEN_DW);
+
if (unlikely(len != ACC_MSG_LEN_DW))
return -EPROTO;
@@ -638,9 +647,9 @@ int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len)
spin_lock(&acc_queue->lock);
full = acc_queue_full(acc_queue);
if (!full) {
- memcpy(acc_queue->data + acc_queue->tail, msg,
+ memcpy(acc_queue->data + acc_queue->head, msg,
len * sizeof(u32));
- acc_queue->tail = (acc_queue->tail + len) % ACC_QUEUE_NUM_DW;
+ acc_queue->head = (acc_queue->head + len) % ACC_QUEUE_NUM_DW;
queue_work(gt->usm.acc_wq, &acc_queue->worker);
} else {
drm_warn(&gt_to_xe(gt)->drm, "ACC Queue full, dropping ACC");
diff --git a/drivers/gpu/drm/xe/xe_gt_printk.h b/drivers/gpu/drm/xe/xe_gt_printk.h
index 5991bcadd4..c2b004d3f4 100644
--- a/drivers/gpu/drm/xe/xe_gt_printk.h
+++ b/drivers/gpu/drm/xe/xe_gt_printk.h
@@ -43,4 +43,48 @@
#define xe_gt_WARN_ON_ONCE(_gt, _condition) \
xe_gt_WARN_ONCE((_gt), _condition, "%s(%s)", "gt_WARN_ON_ONCE", __stringify(_condition))
+static inline void __xe_gt_printfn_err(struct drm_printer *p, struct va_format *vaf)
+{
+ struct xe_gt *gt = p->arg;
+
+ xe_gt_err(gt, "%pV", vaf);
+}
+
+static inline void __xe_gt_printfn_info(struct drm_printer *p, struct va_format *vaf)
+{
+ struct xe_gt *gt = p->arg;
+
+ xe_gt_info(gt, "%pV", vaf);
+}
+
+/**
+ * xe_gt_err_printer - Construct a &drm_printer that outputs to xe_gt_err()
+ * @gt: the &xe_gt pointer to use in xe_gt_err()
+ *
+ * Return: The &drm_printer object.
+ */
+static inline struct drm_printer xe_gt_err_printer(struct xe_gt *gt)
+{
+ struct drm_printer p = {
+ .printfn = __xe_gt_printfn_err,
+ .arg = gt,
+ };
+ return p;
+}
+
+/**
+ * xe_gt_info_printer - Construct a &drm_printer that outputs to xe_gt_info()
+ * @gt: the &xe_gt pointer to use in xe_gt_info()
+ *
+ * Return: The &drm_printer object.
+ */
+static inline struct drm_printer xe_gt_info_printer(struct xe_gt *gt)
+{
+ struct drm_printer p = {
+ .printfn = __xe_gt_printfn_info,
+ .arg = gt,
+ };
+ return p;
+}
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_printk.h b/drivers/gpu/drm/xe/xe_gt_sriov_printk.h
new file mode 100644
index 0000000000..17624b1630
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_printk.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GT_SRIOV_PRINTK_H_
+#define _XE_GT_SRIOV_PRINTK_H_
+
+#include "xe_gt_printk.h"
+#include "xe_sriov_printk.h"
+
+#define __xe_gt_sriov_printk(gt, _level, fmt, ...) \
+ xe_gt_printk((gt), _level, "%s" fmt, xe_sriov_printk_prefix(gt_to_xe(gt)), ##__VA_ARGS__)
+
+#define xe_gt_sriov_err(_gt, _fmt, ...) \
+ __xe_gt_sriov_printk(_gt, err, _fmt, ##__VA_ARGS__)
+
+#define xe_gt_sriov_notice(_gt, _fmt, ...) \
+ __xe_gt_sriov_printk(_gt, notice, _fmt, ##__VA_ARGS__)
+
+#define xe_gt_sriov_info(_gt, _fmt, ...) \
+ __xe_gt_sriov_printk(_gt, info, _fmt, ##__VA_ARGS__)
+
+#define xe_gt_sriov_dbg(_gt, _fmt, ...) \
+ __xe_gt_sriov_printk(_gt, dbg, _fmt, ##__VA_ARGS__)
+
+/* for low level noisy debug messages */
+#ifdef CONFIG_DRM_XE_DEBUG_SRIOV
+#define xe_gt_sriov_dbg_verbose(_gt, _fmt, ...) xe_gt_sriov_dbg(_gt, _fmt, ##__VA_ARGS__)
+#else
+#define xe_gt_sriov_dbg_verbose(_gt, _fmt, ...) typecheck(struct xe_gt *, (_gt))
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index f4c485289d..e598a4363d 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -8,6 +8,7 @@
#include "abi/guc_actions_abi.h"
#include "xe_device.h"
#include "xe_gt.h"
+#include "xe_gt_printk.h"
#include "xe_guc.h"
#include "xe_guc_ct.h"
#include "xe_trace.h"
@@ -30,8 +31,8 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work)
break;
trace_xe_gt_tlb_invalidation_fence_timeout(fence);
- drm_err(&gt_to_xe(gt)->drm, "gt%d: TLB invalidation fence timeout, seqno=%d recv=%d",
- gt->info.id, fence->seqno, gt->tlb_invalidation.seqno_recv);
+ xe_gt_err(gt, "TLB invalidation fence timeout, seqno=%d recv=%d",
+ fence->seqno, gt->tlb_invalidation.seqno_recv);
list_del(&fence->link);
fence->base.error = -ETIME;
@@ -60,7 +61,6 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
INIT_LIST_HEAD(&gt->tlb_invalidation.pending_fences);
spin_lock_init(&gt->tlb_invalidation.pending_lock);
spin_lock_init(&gt->tlb_invalidation.lock);
- gt->tlb_invalidation.fence_context = dma_fence_context_alloc(1);
INIT_DELAYED_WORK(&gt->tlb_invalidation.fence_tdr,
xe_gt_tlb_fence_timeout);
@@ -320,9 +320,7 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
*/
int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno)
{
- struct xe_device *xe = gt_to_xe(gt);
struct xe_guc *guc = &gt->uc.guc;
- struct drm_printer p = drm_err_printer(__func__);
int ret;
/* Execlists not supported */
@@ -337,8 +335,10 @@ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno)
tlb_invalidation_seqno_past(gt, seqno),
TLB_TIMEOUT);
if (!ret) {
- drm_err(&xe->drm, "gt%d: TLB invalidation time'd out, seqno=%d, recv=%d\n",
- gt->info.id, seqno, gt->tlb_invalidation.seqno_recv);
+ struct drm_printer p = xe_gt_err_printer(gt);
+
+ xe_gt_err(gt, "TLB invalidation time'd out, seqno=%d, recv=%d\n",
+ seqno, gt->tlb_invalidation.seqno_recv);
xe_guc_ct_print(&guc->ct, &p, true);
return -ETIME;
}
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
index a8d7f272c3..5dc62fe1be 100644
--- a/drivers/gpu/drm/xe/xe_gt_topology.c
+++ b/drivers/gpu/drm/xe/xe_gt_topology.c
@@ -84,7 +84,7 @@ void
xe_gt_topology_init(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
- struct drm_printer p = drm_debug_printer("GT topology");
+ struct drm_printer p;
int num_geometry_regs, num_compute_regs;
get_num_dss_regs(xe, &num_geometry_regs, &num_compute_regs);
@@ -107,6 +107,8 @@ xe_gt_topology_init(struct xe_gt *gt)
XE2_GT_COMPUTE_DSS_2);
load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss);
+ p = drm_dbg_printer(&gt_to_xe(gt)->drm, DRM_UT_DRIVER, "GT topology");
+
xe_gt_topology_dump(gt, &p);
}
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index f746846604..07b2f724ec 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -103,20 +103,22 @@ struct xe_gt {
/** @info: GT info */
struct {
- /** @type: type of GT */
+ /** @info.type: type of GT */
enum xe_gt_type type;
- /** @id: Unique ID of this GT within the PCI Device */
+ /** @info.id: Unique ID of this GT within the PCI Device */
u8 id;
- /** @reference_clock: clock frequency */
+ /** @info.reference_clock: clock frequency */
u32 reference_clock;
- /** @engine_mask: mask of engines present on GT */
+ /** @info.engine_mask: mask of engines present on GT */
u64 engine_mask;
/**
- * @__engine_mask: mask of engines present on GT read from
+ * @info.__engine_mask: mask of engines present on GT read from
* xe_pci.c, used to fake reading the engine_mask from the
* hwconfig blob.
*/
u64 __engine_mask;
+ /** @info.gmdid: raw GMD_ID value from hardware */
+ u32 gmdid;
} info;
/**
@@ -125,14 +127,14 @@ struct xe_gt {
* specific offset, as well as their own forcewake handling.
*/
struct {
- /** @fw: force wake for GT */
+ /** @mmio.fw: force wake for GT */
struct xe_force_wake fw;
/**
- * @adj_limit: adjust MMIO address if address is below this
+ * @mmio.adj_limit: adjust MMIO address if address is below this
* value
*/
u32 adj_limit;
- /** @adj_offset: offect to add to MMIO address when adjusting */
+ /** @mmio.adj_offset: offect to add to MMIO address when adjusting */
u32 adj_offset;
} mmio;
@@ -144,7 +146,7 @@ struct xe_gt {
/** @reset: state for GT resets */
struct {
/**
- * @worker: work so GT resets can done async allowing to reset
+ * @reset.worker: work so GT resets can done async allowing to reset
* code to safely flush all code paths
*/
struct work_struct worker;
@@ -152,36 +154,30 @@ struct xe_gt {
/** @tlb_invalidation: TLB invalidation state */
struct {
- /** @seqno: TLB invalidation seqno, protected by CT lock */
+ /** @tlb_invalidation.seqno: TLB invalidation seqno, protected by CT lock */
#define TLB_INVALIDATION_SEQNO_MAX 0x100000
int seqno;
/**
- * @seqno_recv: last received TLB invalidation seqno, protected by CT lock
+ * @tlb_invalidation.seqno_recv: last received TLB invalidation seqno,
+ * protected by CT lock
*/
int seqno_recv;
/**
- * @pending_fences: list of pending fences waiting TLB
+ * @tlb_invalidation.pending_fences: list of pending fences waiting TLB
* invaliations, protected by CT lock
*/
struct list_head pending_fences;
/**
- * @pending_lock: protects @pending_fences and updating
- * @seqno_recv.
+ * @tlb_invalidation.pending_lock: protects @tlb_invalidation.pending_fences
+ * and updating @tlb_invalidation.seqno_recv.
*/
spinlock_t pending_lock;
/**
- * @fence_tdr: schedules a delayed call to
+ * @tlb_invalidation.fence_tdr: schedules a delayed call to
* xe_gt_tlb_fence_timeout after the timeut interval is over.
*/
struct delayed_work fence_tdr;
- /** @fence_context: context for TLB invalidation fences */
- u64 fence_context;
- /**
- * @fence_seqno: seqno to TLB invalidation fences, protected by
- * tlb_invalidation.lock
- */
- u32 fence_seqno;
- /** @lock: protects TLB invalidation fences */
+ /** @tlb_invalidation.lock: protects TLB invalidation fences */
spinlock_t lock;
} tlb_invalidation;
@@ -196,7 +192,7 @@ struct xe_gt {
/** @usm: unified shared memory state */
struct {
/**
- * @bb_pool: Pool from which batchbuffers, for USM operations
+ * @usm.bb_pool: Pool from which batchbuffers, for USM operations
* (e.g. migrations, fixing page tables), are allocated.
* Dedicated pool needed so USM operations to not get blocked
* behind any user operations which may have resulted in a
@@ -204,66 +200,67 @@ struct xe_gt {
*/
struct xe_sa_manager *bb_pool;
/**
- * @reserved_bcs_instance: reserved BCS instance used for USM
+ * @usm.reserved_bcs_instance: reserved BCS instance used for USM
* operations (e.g. mmigrations, fixing page tables)
*/
u16 reserved_bcs_instance;
- /** @pf_wq: page fault work queue, unbound, high priority */
+ /** @usm.pf_wq: page fault work queue, unbound, high priority */
struct workqueue_struct *pf_wq;
- /** @acc_wq: access counter work queue, unbound, high priority */
+ /** @usm.acc_wq: access counter work queue, unbound, high priority */
struct workqueue_struct *acc_wq;
/**
- * @pf_queue: Page fault queue used to sync faults so faults can
+ * @usm.pf_queue: Page fault queue used to sync faults so faults can
* be processed not under the GuC CT lock. The queue is sized so
* it can sync all possible faults (1 per physical engine).
* Multiple queues exists for page faults from different VMs are
* be processed in parallel.
*/
struct pf_queue {
- /** @gt: back pointer to GT */
+ /** @usm.pf_queue.gt: back pointer to GT */
struct xe_gt *gt;
#define PF_QUEUE_NUM_DW 128
- /** @data: data in the page fault queue */
+ /** @usm.pf_queue.data: data in the page fault queue */
u32 data[PF_QUEUE_NUM_DW];
/**
- * @head: head pointer in DWs for page fault queue,
- * moved by worker which processes faults.
+ * @usm.pf_queue.tail: tail pointer in DWs for page fault queue,
+ * moved by worker which processes faults (consumer).
*/
- u16 head;
+ u16 tail;
/**
- * @tail: tail pointer in DWs for page fault queue,
- * moved by G2H handler.
+ * @usm.pf_queue.head: head pointer in DWs for page fault queue,
+ * moved by G2H handler (producer).
*/
- u16 tail;
- /** @lock: protects page fault queue */
+ u16 head;
+ /** @usm.pf_queue.lock: protects page fault queue */
spinlock_t lock;
- /** @worker: to process page faults */
+ /** @usm.pf_queue.worker: to process page faults */
struct work_struct worker;
#define NUM_PF_QUEUE 4
} pf_queue[NUM_PF_QUEUE];
/**
- * @acc_queue: Same as page fault queue, cannot process access
+ * @usm.acc_queue: Same as page fault queue, cannot process access
* counters under CT lock.
*/
struct acc_queue {
- /** @gt: back pointer to GT */
+ /** @usm.acc_queue.gt: back pointer to GT */
struct xe_gt *gt;
#define ACC_QUEUE_NUM_DW 128
- /** @data: data in the page fault queue */
+ /** @usm.acc_queue.data: data in the page fault queue */
u32 data[ACC_QUEUE_NUM_DW];
/**
- * @head: head pointer in DWs for page fault queue,
- * moved by worker which processes faults.
+ * @usm.acc_queue.tail: tail pointer in DWs for access counter queue,
+ * moved by worker which processes counters
+ * (consumer).
*/
- u16 head;
+ u16 tail;
/**
- * @tail: tail pointer in DWs for page fault queue,
- * moved by G2H handler.
+ * @usm.acc_queue.head: head pointer in DWs for access counter queue,
+ * moved by G2H handler (producer).
*/
- u16 tail;
- /** @lock: protects page fault queue */
+ u16 head;
+ /** @usm.acc_queue.lock: protects page fault queue */
spinlock_t lock;
- /** @worker: to process access counters */
+ /** @usm.acc_queue.worker: to process access counters */
struct work_struct worker;
#define NUM_ACC_QUEUE 4
} acc_queue[NUM_ACC_QUEUE];
@@ -300,7 +297,7 @@ struct xe_gt {
/** @pcode: GT's PCODE */
struct {
- /** @lock: protecting GT's PCODE mailbox data */
+ /** @pcode.lock: protecting GT's PCODE mailbox data */
struct mutex lock;
} pcode;
@@ -312,32 +309,32 @@ struct xe_gt {
/** @mocs: info */
struct {
- /** @uc_index: UC index */
+ /** @mocs.uc_index: UC index */
u8 uc_index;
- /** @wb_index: WB index, only used on L3_CCS platforms */
+ /** @mocs.wb_index: WB index, only used on L3_CCS platforms */
u8 wb_index;
} mocs;
/** @fuse_topo: GT topology reported by fuse registers */
struct {
- /** @g_dss_mask: dual-subslices usable by geometry */
+ /** @fuse_topo.g_dss_mask: dual-subslices usable by geometry */
xe_dss_mask_t g_dss_mask;
- /** @c_dss_mask: dual-subslices usable by compute */
+ /** @fuse_topo.c_dss_mask: dual-subslices usable by compute */
xe_dss_mask_t c_dss_mask;
- /** @eu_mask_per_dss: EU mask per DSS*/
+ /** @fuse_topo.eu_mask_per_dss: EU mask per DSS*/
xe_eu_mask_t eu_mask_per_dss;
} fuse_topo;
/** @steering: register steering for individual HW units */
struct {
- /* @ranges: register ranges used for this steering type */
+ /** @steering.ranges: register ranges used for this steering type */
const struct xe_mmio_range *ranges;
- /** @group_target: target to steer accesses to */
+ /** @steering.group_target: target to steer accesses to */
u16 group_target;
- /** @instance_target: instance to steer accesses to */
+ /** @steering.instance_target: instance to steer accesses to */
u16 instance_target;
} steering[NUM_STEERING_TYPES];
@@ -349,13 +346,13 @@ struct xe_gt {
/** @wa_active: keep track of active workarounds */
struct {
- /** @gt: bitmap with active GT workarounds */
+ /** @wa_active.gt: bitmap with active GT workarounds */
unsigned long *gt;
- /** @engine: bitmap with active engine workarounds */
+ /** @wa_active.engine: bitmap with active engine workarounds */
unsigned long *engine;
- /** @lrc: bitmap with active LRC workarounds */
+ /** @wa_active.lrc: bitmap with active LRC workarounds */
unsigned long *lrc;
- /** @oob: bitmap with active OOB workaroudns */
+ /** @wa_active.oob: bitmap with active OOB workaroudns */
unsigned long *oob;
} wa_active;
};
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index 0a61390c64..a38f59b435 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -7,9 +7,10 @@
#include <drm/drm_managed.h>
+#include <generated/xe_wa_oob.h>
+
#include "abi/guc_actions_abi.h"
#include "abi/guc_errors_abi.h"
-#include "generated/xe_wa_oob.h"
#include "regs/xe_gt_regs.h"
#include "regs/xe_guc_regs.h"
#include "xe_bo.h"
@@ -21,9 +22,12 @@
#include "xe_guc_hwconfig.h"
#include "xe_guc_log.h"
#include "xe_guc_pc.h"
+#include "xe_guc_relay.h"
#include "xe_guc_submit.h"
+#include "xe_memirq.h"
#include "xe_mmio.h"
#include "xe_platform_types.h"
+#include "xe_sriov.h"
#include "xe_uc.h"
#include "xe_uc_fw.h"
#include "xe_wa.h"
@@ -129,22 +133,24 @@ static u32 guc_ctl_ads_flags(struct xe_guc *guc)
return flags;
}
+#define GUC_VER(maj, min, pat) (((maj) << 16) | ((min) << 8) | (pat))
+
static u32 guc_ctl_wa_flags(struct xe_guc *guc)
{
struct xe_device *xe = guc_to_xe(guc);
struct xe_gt *gt = guc_to_gt(guc);
+ struct xe_uc_fw *uc_fw = &guc->fw;
+ struct xe_uc_fw_version *version = &uc_fw->versions.found[XE_UC_FW_VER_RELEASE];
+
u32 flags = 0;
if (XE_WA(gt, 22012773006))
flags |= GUC_WA_POLLCS;
- if (XE_WA(gt, 16011759253))
- flags |= GUC_WA_GAM_CREDITS;
-
if (XE_WA(gt, 14014475959))
flags |= GUC_WA_HOLD_CCS_SWITCHOUT;
- if (XE_WA(gt, 22011391025) || XE_WA(gt, 14012197797))
+ if (XE_WA(gt, 22011391025))
flags |= GUC_WA_DUAL_QUEUE;
/*
@@ -155,9 +161,6 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc)
if (GRAPHICS_VERx100(xe) < 1270)
flags |= GUC_WA_PRE_PARSER;
- if (XE_WA(gt, 16011777198))
- flags |= GUC_WA_RCS_RESET_BEFORE_RC6;
-
if (XE_WA(gt, 22012727170) || XE_WA(gt, 22012727685))
flags |= GUC_WA_CONTEXT_ISOLATION;
@@ -168,6 +171,14 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc)
if (XE_WA(gt, 1509372804))
flags |= GUC_WA_RENDER_RST_RC6_EXIT;
+ if (XE_WA(gt, 14018913170)) {
+ if (GUC_VER(version->major, version->minor, version->patch) >= GUC_VER(70, 7, 0))
+ flags |= GUC_WA_ENABLE_TSC_CHECK_ON_RC6;
+ else
+ drm_dbg(&xe->drm, "Skip WA 14018913170: GUC version expected >= 70.7.0, found %u.%u.%u\n",
+ version->major, version->minor, version->patch);
+ }
+
return flags;
}
@@ -241,11 +252,54 @@ static void guc_fini(struct drm_device *drm, void *arg)
struct xe_guc *guc = arg;
xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
- xe_guc_pc_fini(&guc->pc);
xe_uc_fini_hw(&guc_to_gt(guc)->uc);
xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL);
}
+/**
+ * xe_guc_comm_init_early - early initialization of GuC communication
+ * @guc: the &xe_guc to initialize
+ *
+ * Must be called prior to first MMIO communication with GuC firmware.
+ */
+void xe_guc_comm_init_early(struct xe_guc *guc)
+{
+ struct xe_gt *gt = guc_to_gt(guc);
+
+ if (xe_gt_is_media_type(gt))
+ guc->notify_reg = MED_GUC_HOST_INTERRUPT;
+ else
+ guc->notify_reg = GUC_HOST_INTERRUPT;
+}
+
+static int xe_guc_realloc_post_hwconfig(struct xe_guc *guc)
+{
+ struct xe_tile *tile = gt_to_tile(guc_to_gt(guc));
+ struct xe_device *xe = guc_to_xe(guc);
+ int ret;
+
+ if (!IS_DGFX(guc_to_xe(guc)))
+ return 0;
+
+ ret = xe_managed_bo_reinit_in_vram(xe, tile, &guc->fw.bo);
+ if (ret)
+ return ret;
+
+ ret = xe_managed_bo_reinit_in_vram(xe, tile, &guc->log.bo);
+ if (ret)
+ return ret;
+
+ ret = xe_managed_bo_reinit_in_vram(xe, tile, &guc->ads.bo);
+ if (ret)
+ return ret;
+
+ ret = xe_managed_bo_reinit_in_vram(xe, tile, &guc->ct.bo);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
int xe_guc_init(struct xe_guc *guc)
{
struct xe_device *xe = guc_to_xe(guc);
@@ -272,7 +326,7 @@ int xe_guc_init(struct xe_guc *guc)
if (ret)
goto out;
- ret = xe_guc_pc_init(&guc->pc);
+ ret = xe_guc_relay_init(&guc->relay);
if (ret)
goto out;
@@ -282,10 +336,7 @@ int xe_guc_init(struct xe_guc *guc)
guc_init_params(guc);
- if (xe_gt_is_media_type(gt))
- guc->notify_reg = MED_GUC_HOST_INTERRUPT;
- else
- guc->notify_reg = GUC_HOST_INTERRUPT;
+ xe_guc_comm_init_early(guc);
xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE);
@@ -304,8 +355,18 @@ out:
*/
int xe_guc_init_post_hwconfig(struct xe_guc *guc)
{
+ int ret;
+
+ ret = xe_guc_realloc_post_hwconfig(guc);
+ if (ret)
+ return ret;
+
guc_init_params_post_hwconfig(guc);
+ ret = xe_guc_pc_init(&guc->pc);
+ if (ret)
+ return ret;
+
return xe_guc_ads_init_post_hwconfig(&guc->ads);
}
@@ -429,7 +490,6 @@ static int guc_wait_ucode(struct xe_guc *guc)
if (ret) {
struct drm_device *drm = &xe->drm;
- struct drm_printer p = drm_info_printer(drm->dev);
drm_info(drm, "GuC load failed: status = 0x%08X\n", status);
drm_info(drm, "GuC load failed: status: Reset = %d, BootROM = 0x%02X, UKernel = 0x%02X, MIA = 0x%02X, Auth = 0x%02X\n",
@@ -451,8 +511,6 @@ static int guc_wait_ucode(struct xe_guc *guc)
SOFT_SCRATCH(13)));
ret = -ENXIO;
}
-
- xe_guc_log_print(&guc->log, &p);
} else {
drm_dbg(&xe->drm, "GuC successfully loaded");
}
@@ -516,6 +574,9 @@ int xe_guc_min_load_for_hwconfig(struct xe_guc *guc)
xe_guc_ads_populate_minimal(&guc->ads);
+ /* Raise GT freq to speed up HuC/GuC load */
+ xe_guc_pc_init_early(&guc->pc);
+
ret = __xe_guc_upload(guc);
if (ret)
return ret;
@@ -579,9 +640,19 @@ static void guc_enable_irq(struct xe_guc *guc)
int xe_guc_enable_communication(struct xe_guc *guc)
{
+ struct xe_device *xe = guc_to_xe(guc);
int err;
- guc_enable_irq(guc);
+ if (IS_SRIOV_VF(xe) && xe_device_has_memirq(xe)) {
+ struct xe_gt *gt = guc_to_gt(guc);
+ struct xe_tile *tile = gt_to_tile(gt);
+
+ err = xe_memirq_init_guc(&tile->sriov.vf.memirq, guc);
+ if (err)
+ return err;
+ } else {
+ guc_enable_irq(guc);
+ }
xe_mmio_rmw32(guc_to_gt(guc), PMINTRMSK,
ARAT_EXPIRED_INTRMSK, 0);
@@ -650,7 +721,7 @@ int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request,
BUILD_BUG_ON(VF_SW_FLAG_COUNT != MED_VF_SW_FLAG_COUNT);
- xe_assert(xe, !guc->ct.enabled);
+ xe_assert(xe, !xe_guc_ct_enabled(&guc->ct));
xe_assert(xe, len);
xe_assert(xe, len <= VF_SW_FLAG_COUNT);
xe_assert(xe, len <= MED_VF_SW_FLAG_COUNT);
@@ -707,8 +778,12 @@ timeout:
if (unlikely(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) !=
GUC_HXG_ORIGIN_GUC))
goto proto;
- if (unlikely(ret))
+ if (unlikely(ret)) {
+ if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) !=
+ GUC_HXG_TYPE_NO_RESPONSE_BUSY)
+ goto proto;
goto timeout;
+ }
}
if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) ==
@@ -832,7 +907,7 @@ int xe_guc_stop(struct xe_guc *guc)
{
int ret;
- xe_guc_ct_disable(&guc->ct);
+ xe_guc_ct_stop(&guc->ct);
ret = xe_guc_submit_stop(guc);
if (ret)
diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h
index d3e49e7fd7..94f2dc5f6f 100644
--- a/drivers/gpu/drm/xe/xe_guc.h
+++ b/drivers/gpu/drm/xe/xe_guc.h
@@ -13,6 +13,7 @@
struct drm_printer;
+void xe_guc_comm_init_early(struct xe_guc *guc);
int xe_guc_init(struct xe_guc *guc);
int xe_guc_init_post_hwconfig(struct xe_guc *guc);
int xe_guc_post_load_init(struct xe_guc *guc);
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index 390e6f1bf4..5339f8a490 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -100,7 +100,7 @@ struct __guc_ads_blob {
struct guc_engine_usage engine_usage;
struct guc_um_init_params um_init_params;
/* From here on, location is dynamic! Refer to above diagram. */
- struct guc_mmio_reg regset[0];
+ struct guc_mmio_reg regset[];
} __packed;
#define ads_blob_read(ads_, field_) \
@@ -273,7 +273,7 @@ int xe_guc_ads_init(struct xe_guc_ads *ads)
ads->regset_size = calculate_regset_size(gt);
bo = xe_managed_bo_create_pin_map(xe, tile, guc_ads_size(ads) + MAX_GOLDEN_LRC_SIZE,
- XE_BO_CREATE_VRAM_IF_DGFX(tile) |
+ XE_BO_CREATE_SYSTEM_BIT |
XE_BO_CREATE_GGTT_BIT);
if (IS_ERR(bo))
return PTR_ERR(bo);
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 24a33fa364..8bbfa45798 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -9,16 +9,21 @@
#include <linux/circ_buf.h>
#include <linux/delay.h>
+#include <kunit/static_stub.h>
+
#include <drm/drm_managed.h>
#include "abi/guc_actions_abi.h"
+#include "abi/guc_actions_sriov_abi.h"
#include "abi/guc_klvs_abi.h"
#include "xe_bo.h"
#include "xe_device.h"
#include "xe_gt.h"
#include "xe_gt_pagefault.h"
+#include "xe_gt_printk.h"
#include "xe_gt_tlb_invalidation.h"
#include "xe_guc.h"
+#include "xe_guc_relay.h"
#include "xe_guc_submit.h"
#include "xe_map.h"
#include "xe_pm.h"
@@ -28,6 +33,7 @@
struct g2h_fence {
u32 *response_buffer;
u32 seqno;
+ u32 response_data;
u16 response_len;
u16 error;
u16 hint;
@@ -40,6 +46,7 @@ struct g2h_fence {
static void g2h_fence_init(struct g2h_fence *g2h_fence, u32 *response_buffer)
{
g2h_fence->response_buffer = response_buffer;
+ g2h_fence->response_data = 0;
g2h_fence->response_len = 0;
g2h_fence->fail = false;
g2h_fence->retry = false;
@@ -113,6 +120,7 @@ static void guc_ct_fini(struct drm_device *drm, void *arg)
{
struct xe_guc_ct *ct = arg;
+ destroy_workqueue(ct->g2h_wq);
xa_destroy(&ct->fence_lookup);
}
@@ -138,17 +146,24 @@ int xe_guc_ct_init(struct xe_guc_ct *ct)
xe_assert(xe, !(guc_ct_size() % PAGE_SIZE));
- drmm_mutex_init(&xe->drm, &ct->lock);
+ ct->g2h_wq = alloc_ordered_workqueue("xe-g2h-wq", 0);
+ if (!ct->g2h_wq)
+ return -ENOMEM;
+
spin_lock_init(&ct->fast_lock);
xa_init(&ct->fence_lookup);
INIT_WORK(&ct->g2h_worker, g2h_worker_func);
init_waitqueue_head(&ct->wq);
init_waitqueue_head(&ct->g2h_fence_wq);
+ err = drmm_mutex_init(&xe->drm, &ct->lock);
+ if (err)
+ return err;
+
primelockdep(ct);
bo = xe_managed_bo_create_pin_map(xe, tile, guc_ct_size(),
- XE_BO_CREATE_VRAM_IF_DGFX(tile) |
+ XE_BO_CREATE_SYSTEM_BIT |
XE_BO_CREATE_GGTT_BIT);
if (IS_ERR(bo))
return PTR_ERR(bo);
@@ -159,6 +174,8 @@ int xe_guc_ct_init(struct xe_guc_ct *ct)
if (err)
return err;
+ xe_assert(xe, ct->state == XE_GUC_CT_STATE_NOT_INITIALIZED);
+ ct->state = XE_GUC_CT_STATE_DISABLED;
return 0;
}
@@ -278,12 +295,35 @@ static int guc_ct_control_toggle(struct xe_guc_ct *ct, bool enable)
return ret > 0 ? -EPROTO : ret;
}
+static void xe_guc_ct_set_state(struct xe_guc_ct *ct,
+ enum xe_guc_ct_state state)
+{
+ mutex_lock(&ct->lock); /* Serialise dequeue_one_g2h() */
+ spin_lock_irq(&ct->fast_lock); /* Serialise CT fast-path */
+
+ xe_gt_assert(ct_to_gt(ct), ct->g2h_outstanding == 0 ||
+ state == XE_GUC_CT_STATE_STOPPED);
+
+ ct->g2h_outstanding = 0;
+ ct->state = state;
+
+ spin_unlock_irq(&ct->fast_lock);
+
+ /*
+ * Lockdep doesn't like this under the fast lock and he destroy only
+ * needs to be serialized with the send path which ct lock provides.
+ */
+ xa_destroy(&ct->fence_lookup);
+
+ mutex_unlock(&ct->lock);
+}
+
int xe_guc_ct_enable(struct xe_guc_ct *ct)
{
struct xe_device *xe = ct_to_xe(ct);
int err;
- xe_assert(xe, !ct->enabled);
+ xe_assert(xe, !xe_guc_ct_enabled(ct));
guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap);
guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap);
@@ -300,12 +340,7 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct)
if (err)
goto err_out;
- mutex_lock(&ct->lock);
- spin_lock_irq(&ct->fast_lock);
- ct->g2h_outstanding = 0;
- ct->enabled = true;
- spin_unlock_irq(&ct->fast_lock);
- mutex_unlock(&ct->lock);
+ xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_ENABLED);
smp_mb();
wake_up_all(&ct->wq);
@@ -319,15 +354,34 @@ err_out:
return err;
}
+static void stop_g2h_handler(struct xe_guc_ct *ct)
+{
+ cancel_work_sync(&ct->g2h_worker);
+}
+
+/**
+ * xe_guc_ct_disable - Set GuC to disabled state
+ * @ct: the &xe_guc_ct
+ *
+ * Set GuC CT to disabled state and stop g2h handler. No outstanding g2h expected
+ * in this transition.
+ */
void xe_guc_ct_disable(struct xe_guc_ct *ct)
{
- mutex_lock(&ct->lock); /* Serialise dequeue_one_g2h() */
- spin_lock_irq(&ct->fast_lock); /* Serialise CT fast-path */
- ct->enabled = false; /* Finally disable CT communication */
- spin_unlock_irq(&ct->fast_lock);
- mutex_unlock(&ct->lock);
+ xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_DISABLED);
+ stop_g2h_handler(ct);
+}
- xa_destroy(&ct->fence_lookup);
+/**
+ * xe_guc_ct_stop - Set GuC to stopped state
+ * @ct: the &xe_guc_ct
+ *
+ * Set GuC CT to stopped state, stop g2h handler, and clear any outstanding g2h
+ */
+void xe_guc_ct_stop(struct xe_guc_ct *ct)
+{
+ xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_STOPPED);
+ stop_g2h_handler(ct);
}
static bool h2g_has_room(struct xe_guc_ct *ct, u32 cmd_len)
@@ -448,7 +502,7 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
} else {
cmd[1] =
- FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) |
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_FAST_REQUEST) |
FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
}
@@ -475,13 +529,34 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
return 0;
}
+/*
+ * The CT protocol accepts a 16 bits fence. This field is fully owned by the
+ * driver, the GuC will just copy it to the reply message. Since we need to
+ * be able to distinguish between replies to REQUEST and FAST_REQUEST messages,
+ * we use one bit of the seqno as an indicator for that and a rolling counter
+ * for the remaining 15 bits.
+ */
+#define CT_SEQNO_MASK GENMASK(14, 0)
+#define CT_SEQNO_UNTRACKED BIT(15)
+static u16 next_ct_seqno(struct xe_guc_ct *ct, bool is_g2h_fence)
+{
+ u32 seqno = ct->fence_seqno++ & CT_SEQNO_MASK;
+
+ if (!is_g2h_fence)
+ seqno |= CT_SEQNO_UNTRACKED;
+
+ return seqno;
+}
+
static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
u32 len, u32 g2h_len, u32 num_g2h,
struct g2h_fence *g2h_fence)
{
struct xe_device *xe = ct_to_xe(ct);
+ u16 seqno;
int ret;
+ xe_assert(xe, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED);
xe_assert(xe, !g2h_len || !g2h_fence);
xe_assert(xe, !num_g2h || !g2h_fence);
xe_assert(xe, !g2h_len || num_g2h);
@@ -493,11 +568,18 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
goto out;
}
- if (unlikely(!ct->enabled)) {
+ if (ct->state == XE_GUC_CT_STATE_DISABLED) {
ret = -ENODEV;
goto out;
}
+ if (ct->state == XE_GUC_CT_STATE_STOPPED) {
+ ret = -ECANCELED;
+ goto out;
+ }
+
+ xe_assert(xe, xe_guc_ct_enabled(ct));
+
if (g2h_fence) {
g2h_len = GUC_CTB_HXG_MSG_MAX_LEN;
num_g2h = 1;
@@ -505,7 +587,7 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
if (g2h_fence_needs_alloc(g2h_fence)) {
void *ptr;
- g2h_fence->seqno = (ct->fence_seqno++ & 0xffff);
+ g2h_fence->seqno = next_ct_seqno(ct, true);
ptr = xa_store(&ct->fence_lookup,
g2h_fence->seqno,
g2h_fence, GFP_ATOMIC);
@@ -514,6 +596,10 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
goto out;
}
}
+
+ seqno = g2h_fence->seqno;
+ } else {
+ seqno = next_ct_seqno(ct, false);
}
if (g2h_len)
@@ -523,8 +609,7 @@ retry:
if (unlikely(ret))
goto out_unlock;
- ret = h2g_write(ct, action, len, g2h_fence ? g2h_fence->seqno : 0,
- !!g2h_fence);
+ ret = h2g_write(ct, action, len, seqno, !!g2h_fence);
if (unlikely(ret)) {
if (ret == -EAGAIN)
goto retry;
@@ -682,7 +767,8 @@ static bool retry_failure(struct xe_guc_ct *ct, int ret)
return false;
#define ct_alive(ct) \
- (ct->enabled && !ct->ctbs.h2g.info.broken && !ct->ctbs.g2h.info.broken)
+ (xe_guc_ct_enabled(ct) && !ct->ctbs.h2g.info.broken && \
+ !ct->ctbs.g2h.info.broken)
if (!wait_event_interruptible_timeout(ct->wq, ct_alive(ct), HZ * 5))
return false;
#undef ct_alive
@@ -752,12 +838,31 @@ retry_same_fence:
ret = -EIO;
}
- return ret > 0 ? 0 : ret;
+ return ret > 0 ? response_buffer ? g2h_fence.response_len : g2h_fence.response_data : ret;
}
+/**
+ * xe_guc_ct_send_recv - Send and receive HXG to the GuC
+ * @ct: the &xe_guc_ct
+ * @action: the dword array with `HXG Request`_ message (can't be NULL)
+ * @len: length of the `HXG Request`_ message (in dwords, can't be 0)
+ * @response_buffer: placeholder for the `HXG Response`_ message (can be NULL)
+ *
+ * Send a `HXG Request`_ message to the GuC over CT communication channel and
+ * blocks until GuC replies with a `HXG Response`_ message.
+ *
+ * For non-blocking communication with GuC use xe_guc_ct_send().
+ *
+ * Note: The size of &response_buffer must be at least GUC_CTB_MAX_DWORDS_.
+ *
+ * Return: response length (in dwords) if &response_buffer was not NULL, or
+ * DATA0 from `HXG Response`_ if &response_buffer was NULL, or
+ * a negative error code on failure.
+ */
int xe_guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
u32 *response_buffer)
{
+ KUNIT_STATIC_STUB_REDIRECT(xe_guc_ct_send_recv, ct, action, len, response_buffer);
return guc_ct_send_recv(ct, action, len, response_buffer, false);
}
@@ -767,9 +872,20 @@ int xe_guc_ct_send_recv_no_fail(struct xe_guc_ct *ct, const u32 *action,
return guc_ct_send_recv(ct, action, len, response_buffer, true);
}
+static u32 *msg_to_hxg(u32 *msg)
+{
+ return msg + GUC_CTB_MSG_MIN_LEN;
+}
+
+static u32 msg_len_to_hxg_len(u32 len)
+{
+ return len - GUC_CTB_MSG_MIN_LEN;
+}
+
static int parse_g2h_event(struct xe_guc_ct *ct, u32 *msg, u32 len)
{
- u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]);
+ u32 *hxg = msg_to_hxg(msg);
+ u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]);
lockdep_assert_held(&ct->lock);
@@ -786,18 +902,41 @@ static int parse_g2h_event(struct xe_guc_ct *ct, u32 *msg, u32 len)
static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len)
{
- struct xe_device *xe = ct_to_xe(ct);
- u32 response_len = len - GUC_CTB_MSG_MIN_LEN;
+ struct xe_gt *gt = ct_to_gt(ct);
+ struct xe_device *xe = gt_to_xe(gt);
+ u32 *hxg = msg_to_hxg(msg);
+ u32 hxg_len = msg_len_to_hxg_len(len);
u32 fence = FIELD_GET(GUC_CTB_MSG_0_FENCE, msg[0]);
- u32 type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]);
+ u32 type = FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]);
struct g2h_fence *g2h_fence;
lockdep_assert_held(&ct->lock);
+ /*
+ * Fences for FAST_REQUEST messages are not tracked in ct->fence_lookup.
+ * Those messages should never fail, so if we do get an error back it
+ * means we're likely doing an illegal operation and the GuC is
+ * rejecting it. We have no way to inform the code that submitted the
+ * H2G that the message was rejected, so we need to escalate the
+ * failure to trigger a reset.
+ */
+ if (fence & CT_SEQNO_UNTRACKED) {
+ if (type == GUC_HXG_TYPE_RESPONSE_FAILURE)
+ xe_gt_err(gt, "FAST_REQ H2G fence 0x%x failed! e=0x%x, h=%u\n",
+ fence,
+ FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, hxg[0]),
+ FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, hxg[0]));
+ else
+ xe_gt_err(gt, "unexpected response %u for FAST_REQ H2G fence 0x%x!\n",
+ type, fence);
+
+ return -EPROTO;
+ }
+
g2h_fence = xa_erase(&ct->fence_lookup, fence);
if (unlikely(!g2h_fence)) {
/* Don't tear down channel, as send could've timed out */
- drm_warn(&xe->drm, "G2H fence (%u) not found!\n", fence);
+ xe_gt_warn(gt, "G2H fence (%u) not found!\n", fence);
g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN);
return 0;
}
@@ -806,18 +945,16 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len)
if (type == GUC_HXG_TYPE_RESPONSE_FAILURE) {
g2h_fence->fail = true;
- g2h_fence->error =
- FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, msg[1]);
- g2h_fence->hint =
- FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, msg[1]);
+ g2h_fence->error = FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, hxg[0]);
+ g2h_fence->hint = FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, hxg[0]);
} else if (type == GUC_HXG_TYPE_NO_RESPONSE_RETRY) {
g2h_fence->retry = true;
- g2h_fence->reason =
- FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, msg[1]);
+ g2h_fence->reason = FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, hxg[0]);
} else if (g2h_fence->response_buffer) {
- g2h_fence->response_len = response_len;
- memcpy(g2h_fence->response_buffer, msg + GUC_CTB_MSG_MIN_LEN,
- response_len * sizeof(u32));
+ g2h_fence->response_len = hxg_len;
+ memcpy(g2h_fence->response_buffer, hxg, hxg_len * sizeof(u32));
+ } else {
+ g2h_fence->response_data = FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, hxg[0]);
}
g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN);
@@ -833,14 +970,13 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len)
static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
{
struct xe_device *xe = ct_to_xe(ct);
- u32 hxg, origin, type;
+ u32 *hxg = msg_to_hxg(msg);
+ u32 origin, type;
int ret;
lockdep_assert_held(&ct->lock);
- hxg = msg[1];
-
- origin = FIELD_GET(GUC_HXG_MSG_0_ORIGIN, hxg);
+ origin = FIELD_GET(GUC_HXG_MSG_0_ORIGIN, hxg[0]);
if (unlikely(origin != GUC_HXG_ORIGIN_GUC)) {
drm_err(&xe->drm,
"G2H channel broken on read, origin=%d, reset required\n",
@@ -850,7 +986,7 @@ static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
return -EPROTO;
}
- type = FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg);
+ type = FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]);
switch (type) {
case GUC_HXG_TYPE_EVENT:
ret = parse_g2h_event(ct, msg, len);
@@ -876,14 +1012,19 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
{
struct xe_device *xe = ct_to_xe(ct);
struct xe_guc *guc = ct_to_guc(ct);
- u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]);
- u32 *payload = msg + GUC_CTB_HXG_MSG_MIN_LEN;
- u32 adj_len = len - GUC_CTB_HXG_MSG_MIN_LEN;
+ u32 hxg_len = msg_len_to_hxg_len(len);
+ u32 *hxg = msg_to_hxg(msg);
+ u32 action, adj_len;
+ u32 *payload;
int ret = 0;
- if (FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]) != GUC_HXG_TYPE_EVENT)
+ if (FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_EVENT)
return 0;
+ action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]);
+ payload = hxg + GUC_HXG_EVENT_MSG_MIN_LEN;
+ adj_len = hxg_len - GUC_HXG_EVENT_MSG_MIN_LEN;
+
switch (action) {
case XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE:
ret = xe_guc_sched_done_handler(guc, payload, adj_len);
@@ -920,6 +1061,12 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
ret = xe_guc_access_counter_notify_handler(guc, payload,
adj_len);
break;
+ case XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF:
+ ret = xe_guc_relay_process_guc2pf(&guc->relay, hxg, hxg_len);
+ break;
+ case XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF:
+ ret = xe_guc_relay_process_guc2vf(&guc->relay, hxg, hxg_len);
+ break;
default:
drm_err(&xe->drm, "unexpected action 0x%04x\n", action);
}
@@ -938,15 +1085,22 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
u32 tail, head, len;
s32 avail;
u32 action;
+ u32 *hxg;
+ xe_assert(xe, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED);
lockdep_assert_held(&ct->fast_lock);
- if (!ct->enabled)
+ if (ct->state == XE_GUC_CT_STATE_DISABLED)
return -ENODEV;
+ if (ct->state == XE_GUC_CT_STATE_STOPPED)
+ return -ECANCELED;
+
if (g2h->info.broken)
return -EPIPE;
+ xe_assert(xe, xe_guc_ct_enabled(ct));
+
/* Calculate DW available to read */
tail = desc_read(xe, g2h, tail);
avail = tail - g2h->info.head;
@@ -988,10 +1142,11 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
avail * sizeof(u32));
}
- action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]);
+ hxg = msg_to_hxg(msg);
+ action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]);
if (fast_path) {
- if (FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]) != GUC_HXG_TYPE_EVENT)
+ if (FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_EVENT)
return 0;
switch (action) {
@@ -1017,9 +1172,11 @@ static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len)
{
struct xe_device *xe = ct_to_xe(ct);
struct xe_guc *guc = ct_to_guc(ct);
- u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]);
- u32 *payload = msg + GUC_CTB_HXG_MSG_MIN_LEN;
- u32 adj_len = len - GUC_CTB_HXG_MSG_MIN_LEN;
+ u32 hxg_len = msg_len_to_hxg_len(len);
+ u32 *hxg = msg_to_hxg(msg);
+ u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]);
+ u32 *payload = hxg + GUC_HXG_MSG_MIN_LEN;
+ u32 adj_len = hxg_len - GUC_HXG_MSG_MIN_LEN;
int ret = 0;
switch (action) {
@@ -1245,7 +1402,7 @@ struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct,
return NULL;
}
- if (ct->enabled) {
+ if (xe_guc_ct_enabled(ct)) {
snapshot->ct_enabled = true;
snapshot->g2h_outstanding = READ_ONCE(ct->g2h_outstanding);
guc_ctb_snapshot_capture(xe, &ct->ctbs.h2g,
@@ -1271,7 +1428,7 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
return;
if (snapshot->ct_enabled) {
- drm_puts(p, "\nH2G CTB (all sizes in DW):\n");
+ drm_puts(p, "H2G CTB (all sizes in DW):\n");
guc_ctb_snapshot_print(&snapshot->h2g, p);
drm_puts(p, "\nG2H CTB (all sizes in DW):\n");
@@ -1280,7 +1437,7 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
drm_printf(p, "\tg2h outstanding: %d\n",
snapshot->g2h_outstanding);
} else {
- drm_puts(p, "\nCT disabled\n");
+ drm_puts(p, "CT disabled\n");
}
}
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h
index f15f8a4857..105bb8e99a 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.h
+++ b/drivers/gpu/drm/xe/xe_guc_ct.h
@@ -13,6 +13,7 @@ struct drm_printer;
int xe_guc_ct_init(struct xe_guc_ct *ct);
int xe_guc_ct_enable(struct xe_guc_ct *ct);
void xe_guc_ct_disable(struct xe_guc_ct *ct);
+void xe_guc_ct_stop(struct xe_guc_ct *ct);
void xe_guc_ct_fast_path(struct xe_guc_ct *ct);
struct xe_guc_ct_snapshot *
@@ -22,11 +23,18 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot,
void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot);
void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool atomic);
+static inline bool xe_guc_ct_enabled(struct xe_guc_ct *ct)
+{
+ return ct->state == XE_GUC_CT_STATE_ENABLED;
+}
+
static inline void xe_guc_ct_irq_handler(struct xe_guc_ct *ct)
{
+ if (!xe_guc_ct_enabled(ct))
+ return;
+
wake_up_all(&ct->wq);
- if (ct->enabled)
- queue_work(system_unbound_wq, &ct->g2h_worker);
+ queue_work(ct->g2h_wq, &ct->g2h_worker);
xe_guc_ct_fast_path(ct);
}
diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h
index d814d4ee3f..fede4c6e93 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h
@@ -73,6 +73,20 @@ struct xe_guc_ct_snapshot {
};
/**
+ * enum xe_guc_ct_state - CT state
+ * @XE_GUC_CT_STATE_NOT_INITIALIZED: CT not initialized, messages not expected in this state
+ * @XE_GUC_CT_STATE_DISABLED: CT disabled, messages not expected in this state
+ * @XE_GUC_CT_STATE_STOPPED: CT stopped, drop messages without errors
+ * @XE_GUC_CT_STATE_ENABLED: CT enabled, messages sent / received in this state
+ */
+enum xe_guc_ct_state {
+ XE_GUC_CT_STATE_NOT_INITIALIZED = 0,
+ XE_GUC_CT_STATE_DISABLED,
+ XE_GUC_CT_STATE_STOPPED,
+ XE_GUC_CT_STATE_ENABLED,
+};
+
+/**
* struct xe_guc_ct - GuC command transport (CT) layer
*
* Includes a pair of CT buffers for bi-directional communication and tracking
@@ -87,17 +101,17 @@ struct xe_guc_ct {
spinlock_t fast_lock;
/** @ctbs: buffers for sending and receiving commands */
struct {
- /** @send: Host to GuC (H2G, send) channel */
+ /** @ctbs.send: Host to GuC (H2G, send) channel */
struct guc_ctb h2g;
- /** @recv: GuC to Host (G2H, receive) channel */
+ /** @ctbs.recv: GuC to Host (G2H, receive) channel */
struct guc_ctb g2h;
} ctbs;
/** @g2h_outstanding: number of outstanding G2H */
u32 g2h_outstanding;
/** @g2h_worker: worker to process G2H messages */
struct work_struct g2h_worker;
- /** @enabled: CT enabled */
- bool enabled;
+ /** @state: CT state */
+ enum xe_guc_ct_state state;
/** @fence_seqno: G2H fence seqno - 16 bits used by CT */
u32 fence_seqno;
/** @fence_lookup: G2H fence lookup */
@@ -106,6 +120,8 @@ struct xe_guc_ct {
wait_queue_head_t wq;
/** @g2h_fence_wq: wait queue used for G2H fencing */
wait_queue_head_t g2h_fence_wq;
+ /** @g2h_wq: used to process G2H */
+ struct workqueue_struct *g2h_wq;
/** @msg: Message buffer */
u32 msg[GUC_CTB_MSG_MAX_LEN];
/** @fast_msg: Message buffer */
diff --git a/drivers/gpu/drm/xe/xe_guc_db_mgr.c b/drivers/gpu/drm/xe/xe_guc_db_mgr.c
new file mode 100644
index 0000000000..8d9a0287df
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_db_mgr.c
@@ -0,0 +1,266 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/bitmap.h>
+#include <linux/mutex.h>
+
+#include <drm/drm_managed.h>
+
+#include "regs/xe_guc_regs.h"
+
+#include "xe_assert.h"
+#include "xe_gt_printk.h"
+#include "xe_guc.h"
+#include "xe_guc_db_mgr.h"
+#include "xe_guc_types.h"
+
+/**
+ * DOC: GuC Doorbells
+ *
+ * The GFX doorbell solution provides a mechanism for submission of workload
+ * to the graphics hardware by a ring3 application without the penalty of
+ * ring transition for each workload submission.
+ *
+ * In SR-IOV mode, the doorbells are treated as shared resource and PF must
+ * be able to provision exclusive range of IDs across VFs, which may want to
+ * use this feature.
+ */
+
+static struct xe_guc *dbm_to_guc(struct xe_guc_db_mgr *dbm)
+{
+ return container_of(dbm, struct xe_guc, dbm);
+}
+
+static struct xe_gt *dbm_to_gt(struct xe_guc_db_mgr *dbm)
+{
+ return guc_to_gt(dbm_to_guc(dbm));
+}
+
+static struct xe_device *dbm_to_xe(struct xe_guc_db_mgr *dbm)
+{
+ return gt_to_xe(dbm_to_gt(dbm));
+}
+
+#define dbm_assert(_dbm, _cond) xe_gt_assert(dbm_to_gt(_dbm), _cond)
+#define dbm_mutex(_dbm) (&dbm_to_guc(_dbm)->submission_state.lock)
+
+static void dbm_print_locked(struct xe_guc_db_mgr *dbm, struct drm_printer *p, int indent);
+
+static void __fini_dbm(struct drm_device *drm, void *arg)
+{
+ struct xe_guc_db_mgr *dbm = arg;
+ unsigned int weight;
+
+ mutex_lock(dbm_mutex(dbm));
+
+ weight = bitmap_weight(dbm->bitmap, dbm->count);
+ if (weight) {
+ struct drm_printer p = xe_gt_info_printer(dbm_to_gt(dbm));
+
+ xe_gt_err(dbm_to_gt(dbm), "GuC doorbells manager unclean (%u/%u)\n",
+ weight, dbm->count);
+ dbm_print_locked(dbm, &p, 1);
+ }
+
+ bitmap_free(dbm->bitmap);
+ dbm->bitmap = NULL;
+ dbm->count = 0;
+
+ mutex_unlock(dbm_mutex(dbm));
+}
+
+/**
+ * xe_guc_db_mgr_init() - Initialize GuC Doorbells Manager.
+ * @dbm: the &xe_guc_db_mgr to initialize
+ * @count: number of doorbells to manage
+ *
+ * The bare-metal or PF driver can pass ~0 as &count to indicate that all
+ * doorbells supported by the hardware are available for use.
+ *
+ * Only VF's drivers will have to provide explicit number of doorbells IDs
+ * that they can use.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_guc_db_mgr_init(struct xe_guc_db_mgr *dbm, unsigned int count)
+{
+ int ret;
+
+ if (count == ~0)
+ count = GUC_NUM_DOORBELLS;
+
+ dbm_assert(dbm, !dbm->bitmap);
+ dbm_assert(dbm, count <= GUC_NUM_DOORBELLS);
+
+ if (!count)
+ goto done;
+
+ dbm->bitmap = bitmap_zalloc(count, GFP_KERNEL);
+ if (!dbm->bitmap)
+ return -ENOMEM;
+ dbm->count = count;
+
+ ret = drmm_add_action_or_reset(&dbm_to_xe(dbm)->drm, __fini_dbm, dbm);
+ if (ret)
+ return ret;
+done:
+ xe_gt_dbg(dbm_to_gt(dbm), "using %u doorbell(s)\n", dbm->count);
+ return 0;
+}
+
+static int dbm_reserve_chunk_locked(struct xe_guc_db_mgr *dbm,
+ unsigned int count, unsigned int spare)
+{
+ unsigned int used;
+ int index;
+
+ dbm_assert(dbm, count);
+ dbm_assert(dbm, count <= GUC_NUM_DOORBELLS);
+ dbm_assert(dbm, dbm->count <= GUC_NUM_DOORBELLS);
+ lockdep_assert_held(dbm_mutex(dbm));
+
+ if (!dbm->count)
+ return -ENODATA;
+
+ if (spare) {
+ used = bitmap_weight(dbm->bitmap, dbm->count);
+ if (used + count + spare > dbm->count)
+ return -EDQUOT;
+ }
+
+ index = bitmap_find_next_zero_area(dbm->bitmap, dbm->count, 0, count, 0);
+ if (index >= dbm->count)
+ return -ENOSPC;
+
+ bitmap_set(dbm->bitmap, index, count);
+
+ return index;
+}
+
+static void dbm_release_chunk_locked(struct xe_guc_db_mgr *dbm,
+ unsigned int start, unsigned int count)
+{
+ dbm_assert(dbm, count);
+ dbm_assert(dbm, count <= GUC_NUM_DOORBELLS);
+ dbm_assert(dbm, dbm->count);
+ dbm_assert(dbm, dbm->count <= GUC_NUM_DOORBELLS);
+ lockdep_assert_held(dbm_mutex(dbm));
+
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
+ unsigned int n;
+
+ for (n = 0; n < count; n++)
+ dbm_assert(dbm, test_bit(start + n, dbm->bitmap));
+ }
+ bitmap_clear(dbm->bitmap, start, count);
+}
+
+/**
+ * xe_guc_db_mgr_reserve_id_locked() - Reserve a single GuC Doorbell ID.
+ * @dbm: the &xe_guc_db_mgr
+ *
+ * This function expects that submission lock is already taken.
+ *
+ * Return: ID of the allocated GuC doorbell or a negative error code on failure.
+ */
+int xe_guc_db_mgr_reserve_id_locked(struct xe_guc_db_mgr *dbm)
+{
+ return dbm_reserve_chunk_locked(dbm, 1, 0);
+}
+
+/**
+ * xe_guc_db_mgr_release_id_locked() - Release a single GuC Doorbell ID.
+ * @dbm: the &xe_guc_db_mgr
+ * @id: the GuC Doorbell ID to release
+ *
+ * This function expects that submission lock is already taken.
+ */
+void xe_guc_db_mgr_release_id_locked(struct xe_guc_db_mgr *dbm, unsigned int id)
+{
+ return dbm_release_chunk_locked(dbm, id, 1);
+}
+
+/**
+ * xe_guc_db_mgr_reserve_range() - Reserve a range of GuC Doorbell IDs.
+ * @dbm: the &xe_guc_db_mgr
+ * @count: number of GuC doorbell IDs to reserve
+ * @spare: number of GuC doorbell IDs to keep available
+ *
+ * This function is dedicated for the for use by the PF which expects that
+ * allocated range for the VF will be contiguous and that there will be at
+ * least &spare IDs still available for the PF use after this reservation.
+ *
+ * Return: starting ID of the allocated GuC doorbell ID range or
+ * a negative error code on failure.
+ */
+int xe_guc_db_mgr_reserve_range(struct xe_guc_db_mgr *dbm,
+ unsigned int count, unsigned int spare)
+{
+ int ret;
+
+ mutex_lock(dbm_mutex(dbm));
+ ret = dbm_reserve_chunk_locked(dbm, count, spare);
+ mutex_unlock(dbm_mutex(dbm));
+
+ return ret;
+}
+
+/**
+ * xe_guc_db_mgr_release_range() - Release a range of Doorbell IDs.
+ * @dbm: the &xe_guc_db_mgr
+ * @start: the starting ID of GuC doorbell ID range to release
+ * @count: number of GuC doorbell IDs to release
+ */
+void xe_guc_db_mgr_release_range(struct xe_guc_db_mgr *dbm,
+ unsigned int start, unsigned int count)
+{
+ mutex_lock(dbm_mutex(dbm));
+ dbm_release_chunk_locked(dbm, start, count);
+ mutex_unlock(dbm_mutex(dbm));
+}
+
+static void dbm_print_locked(struct xe_guc_db_mgr *dbm, struct drm_printer *p, int indent)
+{
+ unsigned int rs, re;
+ unsigned int total;
+
+ drm_printf_indent(p, indent, "count: %u\n", dbm->count);
+ if (!dbm->bitmap)
+ return;
+
+ total = 0;
+ for_each_clear_bitrange(rs, re, dbm->bitmap, dbm->count) {
+ drm_printf_indent(p, indent, "available range: %u..%u (%u)\n",
+ rs, re - 1, re - rs);
+ total += re - rs;
+ }
+ drm_printf_indent(p, indent, "available total: %u\n", total);
+
+ total = 0;
+ for_each_set_bitrange(rs, re, dbm->bitmap, dbm->count) {
+ drm_printf_indent(p, indent, "reserved range: %u..%u (%u)\n",
+ rs, re - 1, re - rs);
+ total += re - rs;
+ }
+ drm_printf_indent(p, indent, "reserved total: %u\n", total);
+}
+
+/**
+ * xe_guc_db_mgr_print() - Print status of GuC Doorbells Manager.
+ * @dbm: the &xe_guc_db_mgr to print
+ * @p: the &drm_printer to print to
+ * @indent: tab indentation level
+ */
+void xe_guc_db_mgr_print(struct xe_guc_db_mgr *dbm,
+ struct drm_printer *p, int indent)
+{
+ mutex_lock(dbm_mutex(dbm));
+ dbm_print_locked(dbm, p, indent);
+ mutex_unlock(dbm_mutex(dbm));
+}
+
+#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST)
+#include "tests/xe_guc_db_mgr_test.c"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_db_mgr.h b/drivers/gpu/drm/xe/xe_guc_db_mgr.h
new file mode 100644
index 0000000000..c250fa0ca9
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_db_mgr.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GUC_DB_MGR_H_
+#define _XE_GUC_DB_MGR_H_
+
+struct drm_printer;
+struct xe_guc_db_mgr;
+
+int xe_guc_db_mgr_init(struct xe_guc_db_mgr *dbm, unsigned int count);
+
+int xe_guc_db_mgr_reserve_id_locked(struct xe_guc_db_mgr *dbm);
+void xe_guc_db_mgr_release_id_locked(struct xe_guc_db_mgr *dbm, unsigned int id);
+
+int xe_guc_db_mgr_reserve_range(struct xe_guc_db_mgr *dbm, unsigned int count, unsigned int spare);
+void xe_guc_db_mgr_release_range(struct xe_guc_db_mgr *dbm, unsigned int start, unsigned int count);
+
+void xe_guc_db_mgr_print(struct xe_guc_db_mgr *dbm, struct drm_printer *p, int indent);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h
index 4dd5a88a78..c281fdbfd2 100644
--- a/drivers/gpu/drm/xe/xe_guc_fwif.h
+++ b/drivers/gpu/drm/xe/xe_guc_fwif.h
@@ -97,6 +97,7 @@ struct guc_update_exec_queue_policy {
#define GUC_WA_POLLCS BIT(18)
#define GUC_WA_RENDER_RST_RC6_EXIT BIT(19)
#define GUC_WA_RCS_REGS_IN_CCS_REGS_LIST BIT(21)
+#define GUC_WA_ENABLE_TSC_CHECK_ON_RC6 BIT(22)
#define GUC_CTL_FEATURE 2
#define GUC_CTL_ENABLE_SLPC BIT(2)
diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c b/drivers/gpu/drm/xe/xe_guc_hwconfig.c
index 2a13a00917..ea49f3885c 100644
--- a/drivers/gpu/drm/xe/xe_guc_hwconfig.c
+++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c
@@ -78,7 +78,7 @@ int xe_guc_hwconfig_init(struct xe_guc *guc)
return -EINVAL;
bo = xe_managed_bo_create_pin_map(xe, tile, PAGE_ALIGN(size),
- XE_BO_CREATE_VRAM_IF_DGFX(tile) |
+ XE_BO_CREATE_SYSTEM_BIT |
XE_BO_CREATE_GGTT_BIT);
if (IS_ERR(bo))
return PTR_ERR(bo);
diff --git a/drivers/gpu/drm/xe/xe_guc_hxg_helpers.h b/drivers/gpu/drm/xe/xe_guc_hxg_helpers.h
new file mode 100644
index 0000000000..aeeb573c68
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_hxg_helpers.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GUC_HXG_HELPERS_H_
+#define _XE_GUC_HXG_HELPERS_H_
+
+#include <linux/bitfield.h>
+#include <linux/types.h>
+
+#include "abi/guc_messages_abi.h"
+
+/**
+ * hxg_sizeof - Queries size of the object or type (in HXG units).
+ * @T: the object or type
+ *
+ * Force a compilation error if actual size is not aligned to HXG unit (u32).
+ *
+ * Return: size in dwords (u32).
+ */
+#define hxg_sizeof(T) (sizeof(T) / sizeof(u32) + BUILD_BUG_ON_ZERO(sizeof(T) % sizeof(u32)))
+
+static inline const char *guc_hxg_type_to_string(unsigned int type)
+{
+ switch (type) {
+ case GUC_HXG_TYPE_REQUEST:
+ return "request";
+ case GUC_HXG_TYPE_FAST_REQUEST:
+ return "fast-request";
+ case GUC_HXG_TYPE_EVENT:
+ return "event";
+ case GUC_HXG_TYPE_NO_RESPONSE_BUSY:
+ return "busy";
+ case GUC_HXG_TYPE_NO_RESPONSE_RETRY:
+ return "retry";
+ case GUC_HXG_TYPE_RESPONSE_FAILURE:
+ return "failure";
+ case GUC_HXG_TYPE_RESPONSE_SUCCESS:
+ return "response";
+ default:
+ return "<invalid>";
+ }
+}
+
+static inline bool guc_hxg_type_is_action(unsigned int type)
+{
+ switch (type) {
+ case GUC_HXG_TYPE_REQUEST:
+ case GUC_HXG_TYPE_FAST_REQUEST:
+ case GUC_HXG_TYPE_EVENT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool guc_hxg_type_is_reply(unsigned int type)
+{
+ switch (type) {
+ case GUC_HXG_TYPE_NO_RESPONSE_BUSY:
+ case GUC_HXG_TYPE_NO_RESPONSE_RETRY:
+ case GUC_HXG_TYPE_RESPONSE_FAILURE:
+ case GUC_HXG_TYPE_RESPONSE_SUCCESS:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline u32 guc_hxg_msg_encode_success(u32 *msg, u32 data0)
+{
+ msg[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_SUCCESS) |
+ FIELD_PREP(GUC_HXG_RESPONSE_MSG_0_DATA0, data0);
+
+ return GUC_HXG_RESPONSE_MSG_MIN_LEN;
+}
+
+static inline u32 guc_hxg_msg_encode_failure(u32 *msg, u32 error, u32 hint)
+{
+ msg[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_FAILURE) |
+ FIELD_PREP(GUC_HXG_FAILURE_MSG_0_HINT, hint) |
+ FIELD_PREP(GUC_HXG_FAILURE_MSG_0_ERROR, error);
+
+ return GUC_HXG_FAILURE_MSG_LEN;
+}
+
+static inline u32 guc_hxg_msg_encode_busy(u32 *msg, u32 counter)
+{
+ msg[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_NO_RESPONSE_BUSY) |
+ FIELD_PREP(GUC_HXG_BUSY_MSG_0_COUNTER, counter);
+
+ return GUC_HXG_BUSY_MSG_LEN;
+}
+
+static inline u32 guc_hxg_msg_encode_retry(u32 *msg, u32 reason)
+{
+ msg[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_NO_RESPONSE_RETRY) |
+ FIELD_PREP(GUC_HXG_RETRY_MSG_0_REASON, reason);
+
+ return GUC_HXG_RETRY_MSG_LEN;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c
index bcd2f4d340..45135c3520 100644
--- a/drivers/gpu/drm/xe/xe_guc_log.c
+++ b/drivers/gpu/drm/xe/xe_guc_log.c
@@ -84,7 +84,7 @@ int xe_guc_log_init(struct xe_guc_log *log)
struct xe_bo *bo;
bo = xe_managed_bo_create_pin_map(xe, tile, guc_log_size(),
- XE_BO_CREATE_VRAM_IF_DGFX(tile) |
+ XE_BO_CREATE_SYSTEM_BIT |
XE_BO_CREATE_GGTT_BIT);
if (IS_ERR(bo))
return PTR_ERR(bo);
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index d917025925..dd9d65f923 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -381,8 +381,6 @@ u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc)
struct xe_device *xe = gt_to_xe(gt);
u32 freq;
- xe_device_mem_access_get(gt_to_xe(gt));
-
/* When in RC6, actual frequency reported will be 0. */
if (GRAPHICS_VERx100(xe) >= 1270) {
freq = xe_mmio_read32(gt, MTL_MIRROR_TARGET_WP1);
@@ -394,8 +392,6 @@ u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc)
freq = decode_freq(freq);
- xe_device_mem_access_put(gt_to_xe(gt));
-
return freq;
}
@@ -412,14 +408,13 @@ int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq)
struct xe_gt *gt = pc_to_gt(pc);
int ret;
- xe_device_mem_access_get(gt_to_xe(gt));
/*
* GuC SLPC plays with cur freq request when GuCRC is enabled
* Block RC6 for a more reliable read.
*/
ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
if (ret)
- goto out;
+ return ret;
*freq = xe_mmio_read32(gt, RPNSWREQ);
@@ -427,9 +422,7 @@ int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq)
*freq = decode_freq(*freq);
XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
-out:
- xe_device_mem_access_put(gt_to_xe(gt));
- return ret;
+ return 0;
}
/**
@@ -451,12 +444,7 @@ u32 xe_guc_pc_get_rp0_freq(struct xe_guc_pc *pc)
*/
u32 xe_guc_pc_get_rpe_freq(struct xe_guc_pc *pc)
{
- struct xe_gt *gt = pc_to_gt(pc);
- struct xe_device *xe = gt_to_xe(gt);
-
- xe_device_mem_access_get(xe);
pc_update_rp_values(pc);
- xe_device_mem_access_put(xe);
return pc->rpe_freq;
}
@@ -485,7 +473,6 @@ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq)
struct xe_gt *gt = pc_to_gt(pc);
int ret;
- xe_device_mem_access_get(pc_to_xe(pc));
mutex_lock(&pc->freq_lock);
if (!pc->freq_ready) {
/* Might be in the middle of a gt reset */
@@ -511,7 +498,6 @@ fw:
XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
out:
mutex_unlock(&pc->freq_lock);
- xe_device_mem_access_put(pc_to_xe(pc));
return ret;
}
@@ -528,7 +514,6 @@ int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq)
{
int ret;
- xe_device_mem_access_get(pc_to_xe(pc));
mutex_lock(&pc->freq_lock);
if (!pc->freq_ready) {
/* Might be in the middle of a gt reset */
@@ -544,8 +529,6 @@ int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq)
out:
mutex_unlock(&pc->freq_lock);
- xe_device_mem_access_put(pc_to_xe(pc));
-
return ret;
}
@@ -561,7 +544,6 @@ int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq)
{
int ret;
- xe_device_mem_access_get(pc_to_xe(pc));
mutex_lock(&pc->freq_lock);
if (!pc->freq_ready) {
/* Might be in the middle of a gt reset */
@@ -577,7 +559,6 @@ int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq)
out:
mutex_unlock(&pc->freq_lock);
- xe_device_mem_access_put(pc_to_xe(pc));
return ret;
}
@@ -594,7 +575,6 @@ int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq)
{
int ret;
- xe_device_mem_access_get(pc_to_xe(pc));
mutex_lock(&pc->freq_lock);
if (!pc->freq_ready) {
/* Might be in the middle of a gt reset */
@@ -610,7 +590,6 @@ int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq)
out:
mutex_unlock(&pc->freq_lock);
- xe_device_mem_access_put(pc_to_xe(pc));
return ret;
}
@@ -623,8 +602,6 @@ enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc)
struct xe_gt *gt = pc_to_gt(pc);
u32 reg, gt_c_state;
- xe_device_mem_access_get(gt_to_xe(gt));
-
if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) {
reg = xe_mmio_read32(gt, MTL_MIRROR_TARGET_WP1);
gt_c_state = REG_FIELD_GET(MTL_CC_MASK, reg);
@@ -633,8 +610,6 @@ enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc)
gt_c_state = REG_FIELD_GET(RCN_MASK, reg);
}
- xe_device_mem_access_put(gt_to_xe(gt));
-
switch (gt_c_state) {
case GT_C6:
return GT_IDLE_C6;
@@ -654,9 +629,7 @@ u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc)
struct xe_gt *gt = pc_to_gt(pc);
u32 reg;
- xe_device_mem_access_get(gt_to_xe(gt));
reg = xe_mmio_read32(gt, GT_GFX_RC6);
- xe_device_mem_access_put(gt_to_xe(gt));
return reg;
}
@@ -670,9 +643,7 @@ u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc)
struct xe_gt *gt = pc_to_gt(pc);
u64 reg;
- xe_device_mem_access_get(gt_to_xe(gt));
reg = xe_mmio_read32(gt, MTL_MEDIA_MC6);
- xe_device_mem_access_put(gt_to_xe(gt));
return reg;
}
@@ -801,23 +772,19 @@ int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc)
if (xe->info.skip_guc_pc)
return 0;
- xe_device_mem_access_get(pc_to_xe(pc));
-
ret = pc_action_setup_gucrc(pc, XE_GUCRC_HOST_CONTROL);
if (ret)
- goto out;
+ return ret;
ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
if (ret)
- goto out;
+ return ret;
xe_gt_idle_disable_c6(gt);
XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
-out:
- xe_device_mem_access_put(pc_to_xe(pc));
- return ret;
+ return 0;
}
static void pc_init_pcode_freq(struct xe_guc_pc *pc)
@@ -870,11 +837,9 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
xe_gt_assert(gt, xe_device_uc_enabled(xe));
- xe_device_mem_access_get(pc_to_xe(pc));
-
ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
if (ret)
- goto out_fail_force_wake;
+ return ret;
if (xe->info.skip_guc_pc) {
if (xe->info.platform != XE_PVC)
@@ -914,8 +879,6 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
out:
XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
-out_fail_force_wake:
- xe_device_mem_access_put(pc_to_xe(pc));
return ret;
}
@@ -928,12 +891,9 @@ int xe_guc_pc_stop(struct xe_guc_pc *pc)
struct xe_device *xe = pc_to_xe(pc);
int ret;
- xe_device_mem_access_get(pc_to_xe(pc));
-
if (xe->info.skip_guc_pc) {
xe_gt_idle_disable_c6(pc_to_gt(pc));
- ret = 0;
- goto out;
+ return 0;
}
mutex_lock(&pc->freq_lock);
@@ -942,36 +902,29 @@ int xe_guc_pc_stop(struct xe_guc_pc *pc)
ret = pc_action_shutdown(pc);
if (ret)
- goto out;
+ return ret;
if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_NOT_RUNNING)) {
drm_err(&pc_to_xe(pc)->drm, "GuC PC Shutdown failed\n");
- ret = -EIO;
+ return -EIO;
}
-out:
- xe_device_mem_access_put(pc_to_xe(pc));
- return ret;
+ return 0;
}
/**
* xe_guc_pc_fini - Finalize GuC's Power Conservation component
- * @pc: Xe_GuC_PC instance
+ * @drm: DRM device
+ * @arg: opaque pointer that should point to Xe_GuC_PC instance
*/
-void xe_guc_pc_fini(struct xe_guc_pc *pc)
+static void xe_guc_pc_fini(struct drm_device *drm, void *arg)
{
- struct xe_device *xe = pc_to_xe(pc);
-
- if (xe->info.skip_guc_pc) {
- xe_device_mem_access_get(xe);
- xe_gt_idle_disable_c6(pc_to_gt(pc));
- xe_device_mem_access_put(xe);
- return;
- }
+ struct xe_guc_pc *pc = arg;
+ xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL);
XE_WARN_ON(xe_guc_pc_gucrc_disable(pc));
XE_WARN_ON(xe_guc_pc_stop(pc));
- mutex_destroy(&pc->freq_lock);
+ xe_force_wake_put(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL);
}
/**
@@ -985,11 +938,14 @@ int xe_guc_pc_init(struct xe_guc_pc *pc)
struct xe_device *xe = gt_to_xe(gt);
struct xe_bo *bo;
u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data));
+ int err;
if (xe->info.skip_guc_pc)
return 0;
- mutex_init(&pc->freq_lock);
+ err = drmm_mutex_init(&xe->drm, &pc->freq_lock);
+ if (err)
+ return err;
bo = xe_managed_bo_create_pin_map(xe, tile, size,
XE_BO_CREATE_VRAM_IF_DGFX(tile) |
@@ -998,5 +954,10 @@ int xe_guc_pc_init(struct xe_guc_pc *pc)
return PTR_ERR(bo);
pc->bo = bo;
+
+ err = drmm_add_action_or_reset(&xe->drm, xe_guc_pc_fini, pc);
+ if (err)
+ return err;
+
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h
index cecad8e930..d3680d8949 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.h
+++ b/drivers/gpu/drm/xe/xe_guc_pc.h
@@ -9,7 +9,6 @@
#include "xe_guc_pc_types.h"
int xe_guc_pc_init(struct xe_guc_pc *pc);
-void xe_guc_pc_fini(struct xe_guc_pc *pc);
int xe_guc_pc_start(struct xe_guc_pc *pc);
int xe_guc_pc_stop(struct xe_guc_pc *pc);
int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc);
diff --git a/drivers/gpu/drm/xe/xe_guc_relay.c b/drivers/gpu/drm/xe/xe_guc_relay.c
new file mode 100644
index 0000000000..c0a2d8d5d3
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_relay.c
@@ -0,0 +1,941 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/bitfield.h>
+#include <linux/delay.h>
+
+#include <drm/drm_managed.h>
+
+#include <kunit/static_stub.h>
+#include <kunit/test-bug.h>
+
+#include "abi/guc_actions_sriov_abi.h"
+#include "abi/guc_relay_actions_abi.h"
+#include "abi/guc_relay_communication_abi.h"
+
+#include "xe_assert.h"
+#include "xe_device.h"
+#include "xe_gt.h"
+#include "xe_gt_sriov_printk.h"
+#include "xe_guc.h"
+#include "xe_guc_ct.h"
+#include "xe_guc_hxg_helpers.h"
+#include "xe_guc_relay.h"
+#include "xe_guc_relay_types.h"
+#include "xe_sriov.h"
+
+/*
+ * How long should we wait for the response?
+ * XXX this value is subject for the profiling.
+ */
+#define RELAY_TIMEOUT_MSEC (2500)
+
+static void relays_worker_fn(struct work_struct *w);
+
+static struct xe_guc *relay_to_guc(struct xe_guc_relay *relay)
+{
+ return container_of(relay, struct xe_guc, relay);
+}
+
+static struct xe_guc_ct *relay_to_ct(struct xe_guc_relay *relay)
+{
+ return &relay_to_guc(relay)->ct;
+}
+
+static struct xe_gt *relay_to_gt(struct xe_guc_relay *relay)
+{
+ return guc_to_gt(relay_to_guc(relay));
+}
+
+static struct xe_device *relay_to_xe(struct xe_guc_relay *relay)
+{
+ return gt_to_xe(relay_to_gt(relay));
+}
+
+#define relay_assert(relay, condition) xe_gt_assert(relay_to_gt(relay), condition)
+#define relay_notice(relay, msg...) xe_gt_sriov_notice(relay_to_gt(relay), "relay: " msg)
+#define relay_debug(relay, msg...) xe_gt_sriov_dbg_verbose(relay_to_gt(relay), "relay: " msg)
+
+static int relay_get_totalvfs(struct xe_guc_relay *relay)
+{
+ struct xe_device *xe = relay_to_xe(relay);
+ struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+
+ KUNIT_STATIC_STUB_REDIRECT(relay_get_totalvfs, relay);
+ return IS_SRIOV_VF(xe) ? 0 : pci_sriov_get_totalvfs(pdev);
+}
+
+static bool relay_is_ready(struct xe_guc_relay *relay)
+{
+ return mempool_initialized(&relay->pool);
+}
+
+static u32 relay_get_next_rid(struct xe_guc_relay *relay)
+{
+ u32 rid;
+
+ spin_lock(&relay->lock);
+ rid = ++relay->last_rid;
+ spin_unlock(&relay->lock);
+
+ return rid;
+}
+
+/**
+ * struct relay_transaction - internal data used to handle transactions
+ *
+ * Relation between struct relay_transaction members::
+ *
+ * <-------------------- GUC_CTB_MAX_DWORDS -------------->
+ * <-------- GUC_RELAY_MSG_MAX_LEN --->
+ * <--- offset ---> <--- request_len ------->
+ * +----------------+-------------------------+----------+--+
+ * | | | | |
+ * +----------------+-------------------------+----------+--+
+ * ^ ^
+ * / /
+ * request_buf request
+ *
+ * <-------------------- GUC_CTB_MAX_DWORDS -------------->
+ * <-------- GUC_RELAY_MSG_MAX_LEN --->
+ * <--- offset ---> <--- response_len --->
+ * +----------------+----------------------+-------------+--+
+ * | | | | |
+ * +----------------+----------------------+-------------+--+
+ * ^ ^
+ * / /
+ * response_buf response
+ */
+struct relay_transaction {
+ /**
+ * @incoming: indicates whether this transaction represents an incoming
+ * request from the remote VF/PF or this transaction
+ * represents outgoing request to the remote VF/PF.
+ */
+ bool incoming;
+
+ /**
+ * @remote: PF/VF identifier of the origin (or target) of the relay
+ * request message.
+ */
+ u32 remote;
+
+ /** @rid: identifier of the VF/PF relay message. */
+ u32 rid;
+
+ /**
+ * @request: points to the inner VF/PF request message, copied to the
+ * #response_buf starting at #offset.
+ */
+ u32 *request;
+
+ /** @request_len: length of the inner VF/PF request message. */
+ u32 request_len;
+
+ /**
+ * @response: points to the placeholder buffer where inner VF/PF
+ * response will be located, for outgoing transaction
+ * this could be caller's buffer (if provided) otherwise
+ * it points to the #response_buf starting at #offset.
+ */
+ u32 *response;
+
+ /**
+ * @response_len: length of the inner VF/PF response message (only
+ * if #status is 0), initially set to the size of the
+ * placeholder buffer where response message will be
+ * copied.
+ */
+ u32 response_len;
+
+ /**
+ * @offset: offset to the start of the inner VF/PF relay message inside
+ * buffers; this offset is equal the length of the outer GuC
+ * relay header message.
+ */
+ u32 offset;
+
+ /**
+ * @request_buf: buffer with VF/PF request message including outer
+ * transport message.
+ */
+ u32 request_buf[GUC_CTB_MAX_DWORDS];
+
+ /**
+ * @response_buf: buffer with VF/PF response message including outer
+ * transport message.
+ */
+ u32 response_buf[GUC_CTB_MAX_DWORDS];
+
+ /**
+ * @reply: status of the reply, 0 means that data pointed by the
+ * #response is valid.
+ */
+ int reply;
+
+ /** @done: completion of the outgoing transaction. */
+ struct completion done;
+
+ /** @link: transaction list link */
+ struct list_head link;
+};
+
+static u32 prepare_pf2guc(u32 *msg, u32 target, u32 rid)
+{
+ msg[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+ FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, XE_GUC_ACTION_PF2GUC_RELAY_TO_VF);
+ msg[1] = FIELD_PREP(PF2GUC_RELAY_TO_VF_REQUEST_MSG_1_VFID, target);
+ msg[2] = FIELD_PREP(PF2GUC_RELAY_TO_VF_REQUEST_MSG_2_RELAY_ID, rid);
+
+ return PF2GUC_RELAY_TO_VF_REQUEST_MSG_MIN_LEN;
+}
+
+static u32 prepare_vf2guc(u32 *msg, u32 rid)
+{
+ msg[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+ FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, XE_GUC_ACTION_VF2GUC_RELAY_TO_PF);
+ msg[1] = FIELD_PREP(VF2GUC_RELAY_TO_PF_REQUEST_MSG_1_RELAY_ID, rid);
+
+ return VF2GUC_RELAY_TO_PF_REQUEST_MSG_MIN_LEN;
+}
+
+static struct relay_transaction *
+__relay_get_transaction(struct xe_guc_relay *relay, bool incoming, u32 remote, u32 rid,
+ const u32 *action, u32 action_len, u32 *resp, u32 resp_size)
+{
+ struct relay_transaction *txn;
+
+ relay_assert(relay, action_len >= GUC_RELAY_MSG_MIN_LEN);
+ relay_assert(relay, action_len <= GUC_RELAY_MSG_MAX_LEN);
+ relay_assert(relay, !(!!resp ^ !!resp_size));
+ relay_assert(relay, resp_size <= GUC_RELAY_MSG_MAX_LEN);
+ relay_assert(relay, resp_size == 0 || resp_size >= GUC_RELAY_MSG_MIN_LEN);
+
+ if (unlikely(!relay_is_ready(relay)))
+ return ERR_PTR(-ENODEV);
+
+ /*
+ * For incoming requests we can't use GFP_KERNEL as those are delivered
+ * with CTB lock held which is marked as used in the reclaim path.
+ * Btw, that's one of the reason why we use mempool here!
+ */
+ txn = mempool_alloc(&relay->pool, incoming ? GFP_ATOMIC : GFP_KERNEL);
+ if (!txn)
+ return ERR_PTR(-ENOMEM);
+
+ txn->incoming = incoming;
+ txn->remote = remote;
+ txn->rid = rid;
+ txn->offset = remote ?
+ prepare_pf2guc(incoming ? txn->response_buf : txn->request_buf, remote, rid) :
+ prepare_vf2guc(incoming ? txn->response_buf : txn->request_buf, rid);
+
+ relay_assert(relay, txn->offset);
+ relay_assert(relay, txn->offset + GUC_RELAY_MSG_MAX_LEN <= ARRAY_SIZE(txn->request_buf));
+ relay_assert(relay, txn->offset + GUC_RELAY_MSG_MAX_LEN <= ARRAY_SIZE(txn->response_buf));
+
+ txn->request = txn->request_buf + txn->offset;
+ memcpy(&txn->request_buf[txn->offset], action, sizeof(u32) * action_len);
+ txn->request_len = action_len;
+
+ txn->response = resp ?: txn->response_buf + txn->offset;
+ txn->response_len = resp_size ?: GUC_RELAY_MSG_MAX_LEN;
+ txn->reply = -ENOMSG;
+ INIT_LIST_HEAD(&txn->link);
+ init_completion(&txn->done);
+
+ return txn;
+}
+
+static struct relay_transaction *
+relay_new_transaction(struct xe_guc_relay *relay, u32 target, const u32 *action, u32 len,
+ u32 *resp, u32 resp_size)
+{
+ u32 rid = relay_get_next_rid(relay);
+
+ return __relay_get_transaction(relay, false, target, rid, action, len, resp, resp_size);
+}
+
+static struct relay_transaction *
+relay_new_incoming_transaction(struct xe_guc_relay *relay, u32 origin, u32 rid,
+ const u32 *action, u32 len)
+{
+ return __relay_get_transaction(relay, true, origin, rid, action, len, NULL, 0);
+}
+
+static void relay_release_transaction(struct xe_guc_relay *relay, struct relay_transaction *txn)
+{
+ relay_assert(relay, list_empty(&txn->link));
+
+ txn->offset = 0;
+ txn->response = NULL;
+ txn->reply = -ESTALE;
+ mempool_free(txn, &relay->pool);
+}
+
+static int relay_send_transaction(struct xe_guc_relay *relay, struct relay_transaction *txn)
+{
+ u32 len = txn->incoming ? txn->response_len : txn->request_len;
+ u32 *buf = txn->incoming ? txn->response_buf : txn->request_buf;
+ u32 *msg = buf + txn->offset;
+ int ret;
+
+ relay_assert(relay, txn->offset);
+ relay_assert(relay, txn->offset + len <= GUC_CTB_MAX_DWORDS);
+ relay_assert(relay, len >= GUC_RELAY_MSG_MIN_LEN);
+ relay_assert(relay, len <= GUC_RELAY_MSG_MAX_LEN);
+
+ relay_debug(relay, "sending %s.%u to %u = %*ph\n",
+ guc_hxg_type_to_string(FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0])),
+ txn->rid, txn->remote, (int)sizeof(u32) * len, msg);
+
+ ret = xe_guc_ct_send_block(relay_to_ct(relay), buf, len + txn->offset);
+
+ if (unlikely(ret > 0)) {
+ relay_notice(relay, "Unexpected data=%d from GuC, wrong ABI?\n", ret);
+ ret = -EPROTO;
+ }
+ if (unlikely(ret < 0)) {
+ relay_notice(relay, "Failed to send %s.%x to GuC (%pe) %*ph ...\n",
+ guc_hxg_type_to_string(FIELD_GET(GUC_HXG_MSG_0_TYPE, buf[0])),
+ FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, buf[0]),
+ ERR_PTR(ret), (int)sizeof(u32) * txn->offset, buf);
+ relay_notice(relay, "Failed to send %s.%u to %u (%pe) %*ph\n",
+ guc_hxg_type_to_string(FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0])),
+ txn->rid, txn->remote, ERR_PTR(ret), (int)sizeof(u32) * len, msg);
+ }
+
+ return ret;
+}
+
+static void __fini_relay(struct drm_device *drm, void *arg)
+{
+ struct xe_guc_relay *relay = arg;
+
+ mempool_exit(&relay->pool);
+}
+
+/**
+ * xe_guc_relay_init - Initialize a &xe_guc_relay
+ * @relay: the &xe_guc_relay to initialize
+ *
+ * Initialize remaining members of &xe_guc_relay that may depend
+ * on the SR-IOV mode.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_guc_relay_init(struct xe_guc_relay *relay)
+{
+ const int XE_RELAY_MEMPOOL_MIN_NUM = 1;
+ struct xe_device *xe = relay_to_xe(relay);
+ int err;
+
+ relay_assert(relay, !relay_is_ready(relay));
+
+ if (!IS_SRIOV(xe))
+ return 0;
+
+ spin_lock_init(&relay->lock);
+ INIT_WORK(&relay->worker, relays_worker_fn);
+ INIT_LIST_HEAD(&relay->pending_relays);
+ INIT_LIST_HEAD(&relay->incoming_actions);
+
+ err = mempool_init_kmalloc_pool(&relay->pool, XE_RELAY_MEMPOOL_MIN_NUM +
+ relay_get_totalvfs(relay),
+ sizeof(struct relay_transaction));
+ if (err)
+ return err;
+
+ relay_debug(relay, "using mempool with %d elements\n", relay->pool.min_nr);
+
+ return drmm_add_action_or_reset(&xe->drm, __fini_relay, relay);
+}
+
+static u32 to_relay_error(int err)
+{
+ /* XXX: assume that relay errors match errno codes */
+ return err < 0 ? -err : GUC_RELAY_ERROR_UNDISCLOSED;
+}
+
+static int from_relay_error(u32 error)
+{
+ /* XXX: assume that relay errors match errno codes */
+ return error ? -error : -ENODATA;
+}
+
+static u32 sanitize_relay_error(u32 error)
+{
+ /* XXX TBD if generic error codes will be allowed */
+ if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG))
+ error = GUC_RELAY_ERROR_UNDISCLOSED;
+ return error;
+}
+
+static u32 sanitize_relay_error_hint(u32 hint)
+{
+ /* XXX TBD if generic error codes will be allowed */
+ if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG))
+ hint = 0;
+ return hint;
+}
+
+static u32 prepare_error_reply(u32 *msg, u32 error, u32 hint)
+{
+ msg[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_FAILURE) |
+ FIELD_PREP(GUC_HXG_FAILURE_MSG_0_HINT, hint) |
+ FIELD_PREP(GUC_HXG_FAILURE_MSG_0_ERROR, error);
+
+ XE_WARN_ON(!FIELD_FIT(GUC_HXG_FAILURE_MSG_0_ERROR, error));
+ XE_WARN_ON(!FIELD_FIT(GUC_HXG_FAILURE_MSG_0_HINT, hint));
+
+ return GUC_HXG_FAILURE_MSG_LEN;
+}
+
+static void relay_testonly_nop(struct xe_guc_relay *relay)
+{
+ KUNIT_STATIC_STUB_REDIRECT(relay_testonly_nop, relay);
+}
+
+static int relay_send_message_and_wait(struct xe_guc_relay *relay,
+ struct relay_transaction *txn,
+ u32 *buf, u32 buf_size)
+{
+ unsigned long timeout = msecs_to_jiffies(RELAY_TIMEOUT_MSEC);
+ u32 *msg = &txn->request_buf[txn->offset];
+ u32 len = txn->request_len;
+ u32 type, action, data0;
+ int ret;
+ long n;
+
+ type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]);
+ action = FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, msg[0]);
+ data0 = FIELD_GET(GUC_HXG_REQUEST_MSG_0_DATA0, msg[0]);
+
+ relay_debug(relay, "%s.%u to %u action %#x:%u\n",
+ guc_hxg_type_to_string(type),
+ txn->rid, txn->remote, action, data0);
+
+ /* list ordering does not need to match RID ordering */
+ spin_lock(&relay->lock);
+ list_add_tail(&txn->link, &relay->pending_relays);
+ spin_unlock(&relay->lock);
+
+resend:
+ ret = relay_send_transaction(relay, txn);
+ if (unlikely(ret < 0))
+ goto unlink;
+
+wait:
+ n = wait_for_completion_timeout(&txn->done, timeout);
+ if (unlikely(n == 0 && txn->reply)) {
+ ret = -ETIME;
+ goto unlink;
+ }
+
+ relay_debug(relay, "%u.%u reply %d after %u msec\n",
+ txn->remote, txn->rid, txn->reply, jiffies_to_msecs(timeout - n));
+ if (unlikely(txn->reply)) {
+ reinit_completion(&txn->done);
+ if (txn->reply == -EAGAIN)
+ goto resend;
+ if (txn->reply == -EBUSY) {
+ relay_testonly_nop(relay);
+ goto wait;
+ }
+ if (txn->reply > 0)
+ ret = from_relay_error(txn->reply);
+ else
+ ret = txn->reply;
+ goto unlink;
+ }
+
+ relay_debug(relay, "%u.%u response %*ph\n", txn->remote, txn->rid,
+ (int)sizeof(u32) * txn->response_len, txn->response);
+ relay_assert(relay, txn->response_len >= GUC_RELAY_MSG_MIN_LEN);
+ ret = txn->response_len;
+
+unlink:
+ spin_lock(&relay->lock);
+ list_del_init(&txn->link);
+ spin_unlock(&relay->lock);
+
+ if (unlikely(ret < 0)) {
+ relay_notice(relay, "Unsuccessful %s.%u %#x:%u to %u (%pe) %*ph\n",
+ guc_hxg_type_to_string(type), txn->rid,
+ action, data0, txn->remote, ERR_PTR(ret),
+ (int)sizeof(u32) * len, msg);
+ }
+
+ return ret;
+}
+
+static int relay_send_to(struct xe_guc_relay *relay, u32 target,
+ const u32 *msg, u32 len, u32 *buf, u32 buf_size)
+{
+ struct relay_transaction *txn;
+ int ret;
+
+ relay_assert(relay, len >= GUC_RELAY_MSG_MIN_LEN);
+ relay_assert(relay, len <= GUC_RELAY_MSG_MAX_LEN);
+ relay_assert(relay, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_HOST);
+ relay_assert(relay, guc_hxg_type_is_action(FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0])));
+
+ if (unlikely(!relay_is_ready(relay)))
+ return -ENODEV;
+
+ txn = relay_new_transaction(relay, target, msg, len, buf, buf_size);
+ if (IS_ERR(txn))
+ return PTR_ERR(txn);
+
+ switch (FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0])) {
+ case GUC_HXG_TYPE_REQUEST:
+ ret = relay_send_message_and_wait(relay, txn, buf, buf_size);
+ break;
+ case GUC_HXG_TYPE_FAST_REQUEST:
+ relay_assert(relay, !GUC_HXG_TYPE_FAST_REQUEST);
+ fallthrough;
+ case GUC_HXG_TYPE_EVENT:
+ ret = relay_send_transaction(relay, txn);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ relay_release_transaction(relay, txn);
+ return ret;
+}
+
+#ifdef CONFIG_PCI_IOV
+/**
+ * xe_guc_relay_send_to_vf - Send a message to the VF.
+ * @relay: the &xe_guc_relay which will send the message
+ * @target: target VF number
+ * @msg: request message to be sent
+ * @len: length of the request message (in dwords, can't be 0)
+ * @buf: placeholder for the response message
+ * @buf_size: size of the response message placeholder (in dwords)
+ *
+ * This function can only be used by the driver running in the SR-IOV PF mode.
+ *
+ * Return: Non-negative response length (in dwords) or
+ * a negative error code on failure.
+ */
+int xe_guc_relay_send_to_vf(struct xe_guc_relay *relay, u32 target,
+ const u32 *msg, u32 len, u32 *buf, u32 buf_size)
+{
+ relay_assert(relay, IS_SRIOV_PF(relay_to_xe(relay)));
+
+ return relay_send_to(relay, target, msg, len, buf, buf_size);
+}
+#endif
+
+/**
+ * xe_guc_relay_send_to_pf - Send a message to the PF.
+ * @relay: the &xe_guc_relay which will send the message
+ * @msg: request message to be sent
+ * @len: length of the message (in dwords, can't be 0)
+ * @buf: placeholder for the response message
+ * @buf_size: size of the response message placeholder (in dwords)
+ *
+ * This function can only be used by driver running in SR-IOV VF mode.
+ *
+ * Return: Non-negative response length (in dwords) or
+ * a negative error code on failure.
+ */
+int xe_guc_relay_send_to_pf(struct xe_guc_relay *relay,
+ const u32 *msg, u32 len, u32 *buf, u32 buf_size)
+{
+ relay_assert(relay, IS_SRIOV_VF(relay_to_xe(relay)));
+
+ return relay_send_to(relay, PFID, msg, len, buf, buf_size);
+}
+
+static int relay_handle_reply(struct xe_guc_relay *relay, u32 origin,
+ u32 rid, int reply, const u32 *msg, u32 len)
+{
+ struct relay_transaction *pending;
+ int err = -ESRCH;
+
+ spin_lock(&relay->lock);
+ list_for_each_entry(pending, &relay->pending_relays, link) {
+ if (pending->remote != origin || pending->rid != rid) {
+ relay_debug(relay, "%u.%u still awaits response\n",
+ pending->remote, pending->rid);
+ continue;
+ }
+ err = 0; /* found! */
+ if (reply == 0) {
+ if (len > pending->response_len) {
+ reply = -ENOBUFS;
+ err = -ENOBUFS;
+ } else {
+ memcpy(pending->response, msg, 4 * len);
+ pending->response_len = len;
+ }
+ }
+ pending->reply = reply;
+ complete_all(&pending->done);
+ break;
+ }
+ spin_unlock(&relay->lock);
+
+ return err;
+}
+
+static int relay_handle_failure(struct xe_guc_relay *relay, u32 origin,
+ u32 rid, const u32 *msg, u32 len)
+{
+ int error = FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, msg[0]);
+ u32 hint __maybe_unused = FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, msg[0]);
+
+ relay_assert(relay, len);
+ relay_debug(relay, "%u.%u error %#x (%pe) hint %u debug %*ph\n",
+ origin, rid, error, ERR_PTR(-error), hint, 4 * (len - 1), msg + 1);
+
+ return relay_handle_reply(relay, origin, rid, error ?: -EREMOTEIO, NULL, 0);
+}
+
+static int relay_testloop_action_handler(struct xe_guc_relay *relay, u32 origin,
+ const u32 *msg, u32 len, u32 *response, u32 size)
+{
+ static ktime_t last_reply = 0;
+ u32 type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]);
+ u32 action = FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, msg[0]);
+ u32 opcode = FIELD_GET(GUC_HXG_REQUEST_MSG_0_DATA0, msg[0]);
+ ktime_t now = ktime_get();
+ bool busy;
+ int ret;
+
+ relay_assert(relay, guc_hxg_type_is_action(type));
+ relay_assert(relay, action == GUC_RELAY_ACTION_VFXPF_TESTLOOP);
+
+ if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV))
+ return -ECONNREFUSED;
+
+ if (!last_reply)
+ last_reply = now;
+ busy = ktime_before(now, ktime_add_ms(last_reply, 2 * RELAY_TIMEOUT_MSEC));
+ if (!busy)
+ last_reply = now;
+
+ switch (opcode) {
+ case VFXPF_TESTLOOP_OPCODE_NOP:
+ if (type == GUC_HXG_TYPE_EVENT)
+ return 0;
+ return guc_hxg_msg_encode_success(response, 0);
+ case VFXPF_TESTLOOP_OPCODE_BUSY:
+ if (type == GUC_HXG_TYPE_EVENT)
+ return -EPROTO;
+ msleep(RELAY_TIMEOUT_MSEC / 8);
+ if (busy)
+ return -EINPROGRESS;
+ return guc_hxg_msg_encode_success(response, 0);
+ case VFXPF_TESTLOOP_OPCODE_RETRY:
+ if (type == GUC_HXG_TYPE_EVENT)
+ return -EPROTO;
+ msleep(RELAY_TIMEOUT_MSEC / 8);
+ if (busy)
+ return guc_hxg_msg_encode_retry(response, 0);
+ return guc_hxg_msg_encode_success(response, 0);
+ case VFXPF_TESTLOOP_OPCODE_ECHO:
+ if (type == GUC_HXG_TYPE_EVENT)
+ return -EPROTO;
+ if (size < len)
+ return -ENOBUFS;
+ ret = guc_hxg_msg_encode_success(response, len);
+ memcpy(response + ret, msg + ret, (len - ret) * sizeof(u32));
+ return len;
+ case VFXPF_TESTLOOP_OPCODE_FAIL:
+ return -EHWPOISON;
+ default:
+ break;
+ }
+
+ relay_notice(relay, "Unexpected action %#x opcode %#x\n", action, opcode);
+ return -EBADRQC;
+}
+
+static int relay_action_handler(struct xe_guc_relay *relay, u32 origin,
+ const u32 *msg, u32 len, u32 *response, u32 size)
+{
+ u32 type;
+ int ret;
+
+ relay_assert(relay, len >= GUC_HXG_MSG_MIN_LEN);
+
+ if (FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, msg[0]) == GUC_RELAY_ACTION_VFXPF_TESTLOOP)
+ return relay_testloop_action_handler(relay, origin, msg, len, response, size);
+
+ type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]);
+
+ /* XXX: PF services will be added later */
+ ret = -EOPNOTSUPP;
+
+ if (type == GUC_HXG_TYPE_EVENT)
+ relay_assert(relay, ret <= 0);
+
+ return ret;
+}
+
+static struct relay_transaction *relay_dequeue_transaction(struct xe_guc_relay *relay)
+{
+ struct relay_transaction *txn;
+
+ spin_lock(&relay->lock);
+ txn = list_first_entry_or_null(&relay->incoming_actions, struct relay_transaction, link);
+ if (txn)
+ list_del_init(&txn->link);
+ spin_unlock(&relay->lock);
+
+ return txn;
+}
+
+static void relay_process_incoming_action(struct xe_guc_relay *relay)
+{
+ struct relay_transaction *txn;
+ bool again = false;
+ u32 type;
+ int ret;
+
+ txn = relay_dequeue_transaction(relay);
+ if (!txn)
+ return;
+
+ type = FIELD_GET(GUC_HXG_MSG_0_TYPE, txn->request_buf[txn->offset]);
+
+ ret = relay_action_handler(relay, txn->remote,
+ txn->request_buf + txn->offset, txn->request_len,
+ txn->response_buf + txn->offset,
+ ARRAY_SIZE(txn->response_buf) - txn->offset);
+
+ if (ret == -EINPROGRESS) {
+ again = true;
+ ret = guc_hxg_msg_encode_busy(txn->response_buf + txn->offset, 0);
+ }
+
+ if (ret > 0) {
+ txn->response_len = ret;
+ ret = relay_send_transaction(relay, txn);
+ }
+
+ if (ret < 0) {
+ u32 error = to_relay_error(ret);
+
+ relay_notice(relay, "Failed to handle %s.%u from %u (%pe) %*ph\n",
+ guc_hxg_type_to_string(type), txn->rid, txn->remote,
+ ERR_PTR(ret), 4 * txn->request_len, txn->request_buf + txn->offset);
+
+ txn->response_len = prepare_error_reply(txn->response_buf + txn->offset,
+ txn->remote ?
+ sanitize_relay_error(error) : error,
+ txn->remote ?
+ sanitize_relay_error_hint(-ret) : -ret);
+ ret = relay_send_transaction(relay, txn);
+ again = false;
+ }
+
+ if (again) {
+ spin_lock(&relay->lock);
+ list_add(&txn->link, &relay->incoming_actions);
+ spin_unlock(&relay->lock);
+ return;
+ }
+
+ if (unlikely(ret < 0))
+ relay_notice(relay, "Failed to process action.%u (%pe) %*ph\n",
+ txn->rid, ERR_PTR(ret), 4 * txn->request_len,
+ txn->request_buf + txn->offset);
+
+ relay_release_transaction(relay, txn);
+}
+
+static bool relay_needs_worker(struct xe_guc_relay *relay)
+{
+ return !list_empty(&relay->incoming_actions);
+}
+
+static void relay_kick_worker(struct xe_guc_relay *relay)
+{
+ KUNIT_STATIC_STUB_REDIRECT(relay_kick_worker, relay);
+ queue_work(relay_to_xe(relay)->sriov.wq, &relay->worker);
+}
+
+static void relays_worker_fn(struct work_struct *w)
+{
+ struct xe_guc_relay *relay = container_of(w, struct xe_guc_relay, worker);
+
+ relay_process_incoming_action(relay);
+
+ if (relay_needs_worker(relay))
+ relay_kick_worker(relay);
+}
+
+static int relay_queue_action_msg(struct xe_guc_relay *relay, u32 origin, u32 rid,
+ const u32 *msg, u32 len)
+{
+ struct relay_transaction *txn;
+
+ txn = relay_new_incoming_transaction(relay, origin, rid, msg, len);
+ if (IS_ERR(txn))
+ return PTR_ERR(txn);
+
+ spin_lock(&relay->lock);
+ list_add_tail(&txn->link, &relay->incoming_actions);
+ spin_unlock(&relay->lock);
+
+ relay_kick_worker(relay);
+ return 0;
+}
+
+static int relay_process_msg(struct xe_guc_relay *relay, u32 origin, u32 rid,
+ const u32 *msg, u32 len)
+{
+ u32 type;
+ int err;
+
+ if (unlikely(len < GUC_HXG_MSG_MIN_LEN))
+ return -EPROTO;
+
+ if (FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) != GUC_HXG_ORIGIN_HOST)
+ return -EPROTO;
+
+ type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]);
+ relay_debug(relay, "received %s.%u from %u = %*ph\n",
+ guc_hxg_type_to_string(type), rid, origin, 4 * len, msg);
+
+ switch (type) {
+ case GUC_HXG_TYPE_REQUEST:
+ case GUC_HXG_TYPE_FAST_REQUEST:
+ case GUC_HXG_TYPE_EVENT:
+ err = relay_queue_action_msg(relay, origin, rid, msg, len);
+ break;
+ case GUC_HXG_TYPE_RESPONSE_SUCCESS:
+ err = relay_handle_reply(relay, origin, rid, 0, msg, len);
+ break;
+ case GUC_HXG_TYPE_NO_RESPONSE_BUSY:
+ err = relay_handle_reply(relay, origin, rid, -EBUSY, NULL, 0);
+ break;
+ case GUC_HXG_TYPE_NO_RESPONSE_RETRY:
+ err = relay_handle_reply(relay, origin, rid, -EAGAIN, NULL, 0);
+ break;
+ case GUC_HXG_TYPE_RESPONSE_FAILURE:
+ err = relay_handle_failure(relay, origin, rid, msg, len);
+ break;
+ default:
+ err = -EBADRQC;
+ }
+
+ if (unlikely(err))
+ relay_notice(relay, "Failed to process %s.%u from %u (%pe) %*ph\n",
+ guc_hxg_type_to_string(type), rid, origin,
+ ERR_PTR(err), 4 * len, msg);
+
+ return err;
+}
+
+/**
+ * xe_guc_relay_process_guc2vf - Handle relay notification message from the GuC.
+ * @relay: the &xe_guc_relay which will handle the message
+ * @msg: message to be handled
+ * @len: length of the message (in dwords)
+ *
+ * This function will handle relay messages received from the GuC.
+ *
+ * This function is can only be used if driver is running in SR-IOV mode.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_guc_relay_process_guc2vf(struct xe_guc_relay *relay, const u32 *msg, u32 len)
+{
+ u32 rid;
+
+ relay_assert(relay, len >= GUC_HXG_MSG_MIN_LEN);
+ relay_assert(relay, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_GUC);
+ relay_assert(relay, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]) == GUC_HXG_TYPE_EVENT);
+ relay_assert(relay, FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[0]) ==
+ XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF);
+
+ if (unlikely(!IS_SRIOV_VF(relay_to_xe(relay)) && !kunit_get_current_test()))
+ return -EPERM;
+
+ if (unlikely(!relay_is_ready(relay)))
+ return -ENODEV;
+
+ if (unlikely(len < GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN))
+ return -EPROTO;
+
+ if (unlikely(len > GUC2VF_RELAY_FROM_PF_EVENT_MSG_MAX_LEN))
+ return -EMSGSIZE;
+
+ if (unlikely(FIELD_GET(GUC_HXG_EVENT_MSG_0_DATA0, msg[0])))
+ return -EPFNOSUPPORT;
+
+ rid = FIELD_GET(GUC2VF_RELAY_FROM_PF_EVENT_MSG_1_RELAY_ID, msg[1]);
+
+ return relay_process_msg(relay, PFID, rid,
+ msg + GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN,
+ len - GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN);
+}
+
+#ifdef CONFIG_PCI_IOV
+/**
+ * xe_guc_relay_process_guc2pf - Handle relay notification message from the GuC.
+ * @relay: the &xe_guc_relay which will handle the message
+ * @msg: message to be handled
+ * @len: length of the message (in dwords)
+ *
+ * This function will handle relay messages received from the GuC.
+ *
+ * This function can only be used if driver is running in SR-IOV PF mode.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_guc_relay_process_guc2pf(struct xe_guc_relay *relay, const u32 *msg, u32 len)
+{
+ u32 origin, rid;
+ int err;
+
+ relay_assert(relay, len >= GUC_HXG_EVENT_MSG_MIN_LEN);
+ relay_assert(relay, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_GUC);
+ relay_assert(relay, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]) == GUC_HXG_TYPE_EVENT);
+ relay_assert(relay, FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[0]) ==
+ XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF);
+
+ if (unlikely(!IS_SRIOV_PF(relay_to_xe(relay)) && !kunit_get_current_test()))
+ return -EPERM;
+
+ if (unlikely(!relay_is_ready(relay)))
+ return -ENODEV;
+
+ if (unlikely(len < GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN))
+ return -EPROTO;
+
+ if (unlikely(len > GUC2PF_RELAY_FROM_VF_EVENT_MSG_MAX_LEN))
+ return -EMSGSIZE;
+
+ if (unlikely(FIELD_GET(GUC_HXG_EVENT_MSG_0_DATA0, msg[0])))
+ return -EPFNOSUPPORT;
+
+ origin = FIELD_GET(GUC2PF_RELAY_FROM_VF_EVENT_MSG_1_VFID, msg[1]);
+ rid = FIELD_GET(GUC2PF_RELAY_FROM_VF_EVENT_MSG_2_RELAY_ID, msg[2]);
+
+ if (unlikely(origin > relay_get_totalvfs(relay)))
+ return -ENOENT;
+
+ err = relay_process_msg(relay, origin, rid,
+ msg + GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN,
+ len - GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN);
+
+ return err;
+}
+#endif
+
+#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST)
+#include "tests/xe_guc_relay_test.c"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_relay.h b/drivers/gpu/drm/xe/xe_guc_relay.h
new file mode 100644
index 0000000000..385429aa18
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_relay.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GUC_RELAY_H_
+#define _XE_GUC_RELAY_H_
+
+#include <linux/types.h>
+#include <linux/errno.h>
+
+struct xe_guc_relay;
+
+int xe_guc_relay_init(struct xe_guc_relay *relay);
+
+int xe_guc_relay_send_to_pf(struct xe_guc_relay *relay,
+ const u32 *msg, u32 len, u32 *buf, u32 buf_size);
+
+int xe_guc_relay_process_guc2vf(struct xe_guc_relay *relay, const u32 *msg, u32 len);
+
+#ifdef CONFIG_PCI_IOV
+int xe_guc_relay_send_to_vf(struct xe_guc_relay *relay, u32 target,
+ const u32 *msg, u32 len, u32 *buf, u32 buf_size);
+int xe_guc_relay_process_guc2pf(struct xe_guc_relay *relay, const u32 *msg, u32 len);
+#else
+static inline int xe_guc_relay_send_to_vf(struct xe_guc_relay *relay, u32 target,
+ const u32 *msg, u32 len, u32 *buf, u32 buf_size)
+{
+ return -ENODEV;
+}
+static inline int xe_guc_relay_process_guc2pf(struct xe_guc_relay *relay, const u32 *msg, u32 len)
+{
+ return -ENODEV;
+}
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_relay_types.h b/drivers/gpu/drm/xe/xe_guc_relay_types.h
new file mode 100644
index 0000000000..5999fcb77e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_relay_types.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_GUC_RELAY_TYPES_H_
+#define _XE_GUC_RELAY_TYPES_H_
+
+#include <linux/mempool.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+
+/**
+ * struct xe_guc_relay - Data used by the VF-PF Relay Communication over GuC.
+ */
+struct xe_guc_relay {
+ /**@lock: protects all internal data. */
+ spinlock_t lock;
+
+ /** @worker: dispatches incoming action messages. */
+ struct work_struct worker;
+
+ /** @pending_relays: list of sent requests that await a response. */
+ struct list_head pending_relays;
+
+ /** @incoming_actions: list of incoming relay action messages to process. */
+ struct list_head incoming_actions;
+
+ /** @pool: pool of the relay message buffers. */
+ mempool_t pool;
+
+ /** @last_rid: last Relay-ID used while sending a message. */
+ u32 last_rid;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index bd1079aa88..3757e0dc5c 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -23,6 +23,7 @@
#include "xe_force_wake.h"
#include "xe_gpu_scheduler.h"
#include "xe_gt.h"
+#include "xe_gt_printk.h"
#include "xe_guc.h"
#include "xe_guc_ct.h"
#include "xe_guc_exec_queue_types.h"
@@ -311,7 +312,7 @@ static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa
q->guc->id - GUC_ID_START_MLRC,
order_base_2(q->width));
else
- ida_simple_remove(&guc->submission_state.guc_ids, q->guc->id);
+ ida_free(&guc->submission_state.guc_ids, q->guc->id);
}
static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
@@ -335,8 +336,8 @@ static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
ret = bitmap_find_free_region(bitmap, GUC_ID_NUMBER_MLRC,
order_base_2(q->width));
} else {
- ret = ida_simple_get(&guc->submission_state.guc_ids, 0,
- GUC_ID_NUMBER_SLRC, GFP_NOWAIT);
+ ret = ida_alloc_max(&guc->submission_state.guc_ids,
+ GUC_ID_NUMBER_SLRC - 1, GFP_NOWAIT);
}
if (ret < 0)
return ret;
@@ -811,7 +812,8 @@ static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p)
static void simple_error_capture(struct xe_exec_queue *q)
{
struct xe_guc *guc = exec_queue_to_guc(q);
- struct drm_printer p = drm_err_printer("");
+ struct xe_device *xe = guc_to_xe(guc);
+ struct drm_printer p = drm_err_printer(&xe->drm, NULL);
struct xe_hw_engine *hwe;
enum xe_hw_engine_id id;
u32 adj_logical_mask = q->logical_mask;
@@ -928,13 +930,15 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
int i = 0;
if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) {
- xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_KERNEL));
- xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)));
-
drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx",
xe_sched_job_seqno(job), q->guc->id, q->flags);
+ xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL,
+ "Kernel-submitted job timed out\n");
+ xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q),
+ "VM job timed out on non-killed execqueue\n");
+
simple_error_capture(q);
- xe_devcoredump(q);
+ xe_devcoredump(job);
} else {
drm_dbg(&xe->drm, "Timedout signaled job: seqno=%u, guc_id=%d, flags=0x%lx",
xe_sched_job_seqno(job), q->guc->id, q->flags);
@@ -1253,6 +1257,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
return 0;
err_entity:
+ mutex_unlock(&guc->submission_state.lock);
xe_sched_entity_fini(&ge->entity);
err_sched:
xe_sched_fini(&ge->sched);
@@ -1348,21 +1353,6 @@ static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
return 0;
}
-static int guc_exec_queue_set_job_timeout(struct xe_exec_queue *q, u32 job_timeout_ms)
-{
- struct xe_gpu_scheduler *sched = &q->guc->sched;
- struct xe_guc *guc = exec_queue_to_guc(q);
- struct xe_device *xe = guc_to_xe(guc);
-
- xe_assert(xe, !exec_queue_registered(q));
- xe_assert(xe, !exec_queue_banned(q));
- xe_assert(xe, !exec_queue_killed(q));
-
- sched->base.timeout = job_timeout_ms;
-
- return 0;
-}
-
static int guc_exec_queue_suspend(struct xe_exec_queue *q)
{
struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND;
@@ -1413,7 +1403,6 @@ static const struct xe_exec_queue_ops guc_exec_queue_ops = {
.set_priority = guc_exec_queue_set_priority,
.set_timeslice = guc_exec_queue_set_timeslice,
.set_preempt_timeout = guc_exec_queue_set_preempt_timeout,
- .set_job_timeout = guc_exec_queue_set_job_timeout,
.suspend = guc_exec_queue_suspend,
.suspend_wait = guc_exec_queue_suspend_wait,
.resume = guc_exec_queue_resume,
@@ -1794,7 +1783,7 @@ guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps
/**
* xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine.
- * @q: Xe exec queue.
+ * @job: faulty Xe scheduled job.
*
* This can be printed out in a later stage like during dev_coredump
* analysis.
@@ -1803,21 +1792,17 @@ guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps
* caller, using `xe_guc_exec_queue_snapshot_free`.
*/
struct xe_guc_submit_exec_queue_snapshot *
-xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q)
+xe_guc_exec_queue_snapshot_capture(struct xe_sched_job *job)
{
- struct xe_guc *guc = exec_queue_to_guc(q);
- struct xe_device *xe = guc_to_xe(guc);
+ struct xe_exec_queue *q = job->q;
struct xe_gpu_scheduler *sched = &q->guc->sched;
- struct xe_sched_job *job;
struct xe_guc_submit_exec_queue_snapshot *snapshot;
int i;
snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC);
- if (!snapshot) {
- drm_err(&xe->drm, "Skipping GuC Engine snapshot entirely.\n");
+ if (!snapshot)
return NULL;
- }
snapshot->guc.id = q->guc->id;
memcpy(&snapshot->name, &q->name, sizeof(snapshot->name));
@@ -1833,9 +1818,7 @@ xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q)
snapshot->lrc = kmalloc_array(q->width, sizeof(struct lrc_snapshot),
GFP_ATOMIC);
- if (!snapshot->lrc) {
- drm_err(&xe->drm, "Skipping GuC Engine LRC snapshot.\n");
- } else {
+ if (snapshot->lrc) {
for (i = 0; i < q->width; ++i) {
struct xe_lrc *lrc = q->lrc + i;
@@ -1863,17 +1846,17 @@ xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q)
sizeof(struct pending_list_snapshot),
GFP_ATOMIC);
- if (!snapshot->pending_list) {
- drm_err(&xe->drm, "Skipping GuC Engine pending_list snapshot.\n");
- } else {
+ if (snapshot->pending_list) {
+ struct xe_sched_job *job_iter;
+
i = 0;
- list_for_each_entry(job, &sched->base.pending_list, drm.list) {
+ list_for_each_entry(job_iter, &sched->base.pending_list, drm.list) {
snapshot->pending_list[i].seqno =
- xe_sched_job_seqno(job);
+ xe_sched_job_seqno(job_iter);
snapshot->pending_list[i].fence =
- dma_fence_is_signaled(job->fence) ? 1 : 0;
+ dma_fence_is_signaled(job_iter->fence) ? 1 : 0;
snapshot->pending_list[i].finished =
- dma_fence_is_signaled(&job->drm.s_fence->finished)
+ dma_fence_is_signaled(&job_iter->drm.s_fence->finished)
? 1 : 0;
i++;
}
@@ -1959,10 +1942,28 @@ void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *s
static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p)
{
struct xe_guc_submit_exec_queue_snapshot *snapshot;
+ struct xe_gpu_scheduler *sched = &q->guc->sched;
+ struct xe_sched_job *job;
+ bool found = false;
- snapshot = xe_guc_exec_queue_snapshot_capture(q);
+ spin_lock(&sched->base.job_list_lock);
+ list_for_each_entry(job, &sched->base.pending_list, drm.list) {
+ if (job->q == q) {
+ xe_sched_job_get(job);
+ found = true;
+ break;
+ }
+ }
+ spin_unlock(&sched->base.job_list_lock);
+
+ if (!found)
+ return;
+
+ snapshot = xe_guc_exec_queue_snapshot_capture(job);
xe_guc_exec_queue_snapshot_print(snapshot, p);
xe_guc_exec_queue_snapshot_free(snapshot);
+
+ xe_sched_job_put(job);
}
/**
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h
index fc97869c5b..723dc2bd8d 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.h
+++ b/drivers/gpu/drm/xe/xe_guc_submit.h
@@ -9,8 +9,8 @@
#include <linux/types.h>
struct drm_printer;
-struct xe_exec_queue;
struct xe_guc;
+struct xe_sched_job;
int xe_guc_submit_init(struct xe_guc *guc);
@@ -27,7 +27,7 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len);
struct xe_guc_submit_exec_queue_snapshot *
-xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q);
+xe_guc_exec_queue_snapshot_capture(struct xe_sched_job *job);
void
xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot,
struct drm_printer *p);
diff --git a/drivers/gpu/drm/xe/xe_guc_submit_types.h b/drivers/gpu/drm/xe/xe_guc_submit_types.h
index 649b0a8526..72fc0f42b0 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_submit_types.h
@@ -102,9 +102,9 @@ struct xe_guc_submit_exec_queue_snapshot {
/** @sched_props: scheduling properties */
struct {
- /** @timeslice_us: timeslice period in micro-seconds */
+ /** @sched_props.timeslice_us: timeslice period in micro-seconds */
u32 timeslice_us;
- /** @preempt_timeout_us: preemption timeout in micro-seconds */
+ /** @sched_props.preempt_timeout_us: preemption timeout in micro-seconds */
u32 preempt_timeout_us;
} sched_props;
@@ -118,11 +118,11 @@ struct xe_guc_submit_exec_queue_snapshot {
/** @guc: GuC Engine Snapshot */
struct {
- /** @wqi_head: work queue item head */
+ /** @guc.wqi_head: work queue item head */
u32 wqi_head;
- /** @wqi_tail: work queue item tail */
+ /** @guc.wqi_tail: work queue item tail */
u32 wqi_tail;
- /** @id: GuC id for this exec_queue */
+ /** @guc.id: GuC id for this exec_queue */
u16 id;
} guc;
@@ -133,13 +133,13 @@ struct xe_guc_submit_exec_queue_snapshot {
bool parallel_execution;
/** @parallel: snapshot of the useful parallel scratch */
struct {
- /** @wq_desc: Workqueue description */
+ /** @parallel.wq_desc: Workqueue description */
struct {
- /** @head: Workqueue Head */
+ /** @parallel.wq_desc.head: Workqueue Head */
u32 head;
- /** @tail: Workqueue Tail */
+ /** @parallel.wq_desc.tail: Workqueue Tail */
u32 tail;
- /** @status: Workqueue Status */
+ /** @parallel.wq_desc.status: Workqueue Status */
u32 status;
} wq_desc;
/** @wq: Workqueue Items */
diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h
index cd80802e89..edcd1a950b 100644
--- a/drivers/gpu/drm/xe/xe_guc_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_types.h
@@ -15,9 +15,23 @@
#include "xe_guc_fwif.h"
#include "xe_guc_log_types.h"
#include "xe_guc_pc_types.h"
+#include "xe_guc_relay_types.h"
#include "xe_uc_fw_types.h"
/**
+ * struct xe_guc_db_mgr - GuC Doorbells Manager.
+ *
+ * Note: GuC Doorbells Manager is relying on &xe_guc::submission_state.lock
+ * to protect its members.
+ */
+struct xe_guc_db_mgr {
+ /** @count: number of doorbells to manage */
+ unsigned int count;
+ /** @bitmap: bitmap to track allocated doorbells */
+ unsigned long *bitmap;
+};
+
+/**
* struct xe_guc - Graphic micro controller
*/
struct xe_guc {
@@ -31,45 +45,50 @@ struct xe_guc {
struct xe_guc_ct ct;
/** @pc: GuC Power Conservation */
struct xe_guc_pc pc;
+ /** @dbm: GuC Doorbell Manager */
+ struct xe_guc_db_mgr dbm;
/** @submission_state: GuC submission state */
struct {
- /** @exec_queue_lookup: Lookup an xe_engine from guc_id */
+ /** @submission_state.exec_queue_lookup: Lookup an xe_engine from guc_id */
struct xarray exec_queue_lookup;
- /** @guc_ids: used to allocate new guc_ids, single-lrc */
+ /** @submission_state.guc_ids: used to allocate new guc_ids, single-lrc */
struct ida guc_ids;
- /** @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc */
+ /** @submission_state.guc_ids_bitmap: used to allocate new guc_ids, multi-lrc */
unsigned long *guc_ids_bitmap;
- /** @stopped: submissions are stopped */
+ /** @submission_state.stopped: submissions are stopped */
atomic_t stopped;
- /** @lock: protects submission state */
+ /** @submission_state.lock: protects submission state */
struct mutex lock;
- /** @suspend: suspend fence state */
+ /** @submission_state.suspend: suspend fence state */
struct {
- /** @lock: suspend fences lock */
+ /** @submission_state.suspend.lock: suspend fences lock */
spinlock_t lock;
- /** @context: suspend fences context */
+ /** @submission_state.suspend.context: suspend fences context */
u64 context;
- /** @seqno: suspend fences seqno */
+ /** @submission_state.suspend.seqno: suspend fences seqno */
u32 seqno;
} suspend;
#ifdef CONFIG_PROVE_LOCKING
#define NUM_SUBMIT_WQ 256
- /** @submit_wq_pool: submission ordered workqueues pool */
+ /** @submission_state.submit_wq_pool: submission ordered workqueues pool */
struct workqueue_struct *submit_wq_pool[NUM_SUBMIT_WQ];
- /** @submit_wq_idx: submission ordered workqueue index */
+ /** @submission_state.submit_wq_idx: submission ordered workqueue index */
int submit_wq_idx;
#endif
- /** @enabled: submission is enabled */
+ /** @submission_state.enabled: submission is enabled */
bool enabled;
} submission_state;
/** @hwconfig: Hardware config state */
struct {
- /** @bo: buffer object of the hardware config */
+ /** @hwconfig.bo: buffer object of the hardware config */
struct xe_bo *bo;
- /** @size: size of the hardware config */
+ /** @hwconfig.size: size of the hardware config */
u32 size;
} hwconfig;
+ /** @relay: GuC Relay Communication used in SR-IOV */
+ struct xe_guc_relay relay;
+
/**
* @notify_reg: Register which is written to notify GuC of H2G messages
*/
diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.c b/drivers/gpu/drm/xe/xe_heci_gsc.c
index bfdd33b9b2..1c9d38b6f5 100644
--- a/drivers/gpu/drm/xe/xe_heci_gsc.c
+++ b/drivers/gpu/drm/xe/xe_heci_gsc.c
@@ -29,7 +29,7 @@ static void heci_gsc_irq_unmask(struct irq_data *d)
/* generic irq handling */
}
-static struct irq_chip heci_gsc_irq_chip = {
+static const struct irq_chip heci_gsc_irq_chip = {
.name = "gsc_irq_chip",
.irq_mask = heci_gsc_irq_mask,
.irq_unmask = heci_gsc_irq_unmask,
diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c
index 01b2d0bd26..6b9b1cbedd 100644
--- a/drivers/gpu/drm/xe/xe_huc.c
+++ b/drivers/gpu/drm/xe/xe_huc.c
@@ -105,6 +105,25 @@ out:
return ret;
}
+int xe_huc_init_post_hwconfig(struct xe_huc *huc)
+{
+ struct xe_tile *tile = gt_to_tile(huc_to_gt(huc));
+ struct xe_device *xe = huc_to_xe(huc);
+ int ret;
+
+ if (!IS_DGFX(huc_to_xe(huc)))
+ return 0;
+
+ if (!xe_uc_fw_is_loadable(&huc->fw))
+ return 0;
+
+ ret = xe_managed_bo_reinit_in_vram(xe, tile, &huc->fw.bo);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
int xe_huc_upload(struct xe_huc *huc)
{
if (!xe_uc_fw_is_loadable(&huc->fw))
diff --git a/drivers/gpu/drm/xe/xe_huc.h b/drivers/gpu/drm/xe/xe_huc.h
index 5320172302..3ab56cc14b 100644
--- a/drivers/gpu/drm/xe/xe_huc.h
+++ b/drivers/gpu/drm/xe/xe_huc.h
@@ -17,6 +17,7 @@ enum xe_huc_auth_types {
};
int xe_huc_init(struct xe_huc *huc);
+int xe_huc_init_post_hwconfig(struct xe_huc *huc);
int xe_huc_upload(struct xe_huc *huc);
int xe_huc_auth(struct xe_huc *huc, enum xe_huc_auth_types type);
bool xe_huc_is_authenticated(struct xe_huc *huc, enum xe_huc_auth_types type);
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 1fa5cf5eea..b5e83ea172 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -25,6 +25,7 @@
#include "xe_reg_sr.h"
#include "xe_rtp.h"
#include "xe_sched_job.h"
+#include "xe_sriov.h"
#include "xe_tuning.h"
#include "xe_uc_fw.h"
#include "xe_wa.h"
@@ -34,6 +35,7 @@ struct engine_info {
const char *name;
unsigned int class : 8;
unsigned int instance : 8;
+ unsigned int irq_offset : 8;
enum xe_force_wake_domains domain;
u32 mmio_base;
};
@@ -43,6 +45,7 @@ static const struct engine_info engine_infos[] = {
.name = "rcs0",
.class = XE_ENGINE_CLASS_RENDER,
.instance = 0,
+ .irq_offset = ilog2(INTR_RCS0),
.domain = XE_FW_RENDER,
.mmio_base = RENDER_RING_BASE,
},
@@ -50,6 +53,7 @@ static const struct engine_info engine_infos[] = {
.name = "bcs0",
.class = XE_ENGINE_CLASS_COPY,
.instance = 0,
+ .irq_offset = ilog2(INTR_BCS(0)),
.domain = XE_FW_RENDER,
.mmio_base = BLT_RING_BASE,
},
@@ -57,6 +61,7 @@ static const struct engine_info engine_infos[] = {
.name = "bcs1",
.class = XE_ENGINE_CLASS_COPY,
.instance = 1,
+ .irq_offset = ilog2(INTR_BCS(1)),
.domain = XE_FW_RENDER,
.mmio_base = XEHPC_BCS1_RING_BASE,
},
@@ -64,6 +69,7 @@ static const struct engine_info engine_infos[] = {
.name = "bcs2",
.class = XE_ENGINE_CLASS_COPY,
.instance = 2,
+ .irq_offset = ilog2(INTR_BCS(2)),
.domain = XE_FW_RENDER,
.mmio_base = XEHPC_BCS2_RING_BASE,
},
@@ -71,6 +77,7 @@ static const struct engine_info engine_infos[] = {
.name = "bcs3",
.class = XE_ENGINE_CLASS_COPY,
.instance = 3,
+ .irq_offset = ilog2(INTR_BCS(3)),
.domain = XE_FW_RENDER,
.mmio_base = XEHPC_BCS3_RING_BASE,
},
@@ -78,6 +85,7 @@ static const struct engine_info engine_infos[] = {
.name = "bcs4",
.class = XE_ENGINE_CLASS_COPY,
.instance = 4,
+ .irq_offset = ilog2(INTR_BCS(4)),
.domain = XE_FW_RENDER,
.mmio_base = XEHPC_BCS4_RING_BASE,
},
@@ -85,6 +93,7 @@ static const struct engine_info engine_infos[] = {
.name = "bcs5",
.class = XE_ENGINE_CLASS_COPY,
.instance = 5,
+ .irq_offset = ilog2(INTR_BCS(5)),
.domain = XE_FW_RENDER,
.mmio_base = XEHPC_BCS5_RING_BASE,
},
@@ -92,12 +101,14 @@ static const struct engine_info engine_infos[] = {
.name = "bcs6",
.class = XE_ENGINE_CLASS_COPY,
.instance = 6,
+ .irq_offset = ilog2(INTR_BCS(6)),
.domain = XE_FW_RENDER,
.mmio_base = XEHPC_BCS6_RING_BASE,
},
[XE_HW_ENGINE_BCS7] = {
.name = "bcs7",
.class = XE_ENGINE_CLASS_COPY,
+ .irq_offset = ilog2(INTR_BCS(7)),
.instance = 7,
.domain = XE_FW_RENDER,
.mmio_base = XEHPC_BCS7_RING_BASE,
@@ -106,6 +117,7 @@ static const struct engine_info engine_infos[] = {
.name = "bcs8",
.class = XE_ENGINE_CLASS_COPY,
.instance = 8,
+ .irq_offset = ilog2(INTR_BCS8),
.domain = XE_FW_RENDER,
.mmio_base = XEHPC_BCS8_RING_BASE,
},
@@ -114,6 +126,7 @@ static const struct engine_info engine_infos[] = {
.name = "vcs0",
.class = XE_ENGINE_CLASS_VIDEO_DECODE,
.instance = 0,
+ .irq_offset = 32 + ilog2(INTR_VCS(0)),
.domain = XE_FW_MEDIA_VDBOX0,
.mmio_base = BSD_RING_BASE,
},
@@ -121,6 +134,7 @@ static const struct engine_info engine_infos[] = {
.name = "vcs1",
.class = XE_ENGINE_CLASS_VIDEO_DECODE,
.instance = 1,
+ .irq_offset = 32 + ilog2(INTR_VCS(1)),
.domain = XE_FW_MEDIA_VDBOX1,
.mmio_base = BSD2_RING_BASE,
},
@@ -128,6 +142,7 @@ static const struct engine_info engine_infos[] = {
.name = "vcs2",
.class = XE_ENGINE_CLASS_VIDEO_DECODE,
.instance = 2,
+ .irq_offset = 32 + ilog2(INTR_VCS(2)),
.domain = XE_FW_MEDIA_VDBOX2,
.mmio_base = BSD3_RING_BASE,
},
@@ -135,6 +150,7 @@ static const struct engine_info engine_infos[] = {
.name = "vcs3",
.class = XE_ENGINE_CLASS_VIDEO_DECODE,
.instance = 3,
+ .irq_offset = 32 + ilog2(INTR_VCS(3)),
.domain = XE_FW_MEDIA_VDBOX3,
.mmio_base = BSD4_RING_BASE,
},
@@ -142,6 +158,7 @@ static const struct engine_info engine_infos[] = {
.name = "vcs4",
.class = XE_ENGINE_CLASS_VIDEO_DECODE,
.instance = 4,
+ .irq_offset = 32 + ilog2(INTR_VCS(4)),
.domain = XE_FW_MEDIA_VDBOX4,
.mmio_base = XEHP_BSD5_RING_BASE,
},
@@ -149,6 +166,7 @@ static const struct engine_info engine_infos[] = {
.name = "vcs5",
.class = XE_ENGINE_CLASS_VIDEO_DECODE,
.instance = 5,
+ .irq_offset = 32 + ilog2(INTR_VCS(5)),
.domain = XE_FW_MEDIA_VDBOX5,
.mmio_base = XEHP_BSD6_RING_BASE,
},
@@ -156,6 +174,7 @@ static const struct engine_info engine_infos[] = {
.name = "vcs6",
.class = XE_ENGINE_CLASS_VIDEO_DECODE,
.instance = 6,
+ .irq_offset = 32 + ilog2(INTR_VCS(6)),
.domain = XE_FW_MEDIA_VDBOX6,
.mmio_base = XEHP_BSD7_RING_BASE,
},
@@ -163,6 +182,7 @@ static const struct engine_info engine_infos[] = {
.name = "vcs7",
.class = XE_ENGINE_CLASS_VIDEO_DECODE,
.instance = 7,
+ .irq_offset = 32 + ilog2(INTR_VCS(7)),
.domain = XE_FW_MEDIA_VDBOX7,
.mmio_base = XEHP_BSD8_RING_BASE,
},
@@ -170,6 +190,7 @@ static const struct engine_info engine_infos[] = {
.name = "vecs0",
.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
.instance = 0,
+ .irq_offset = 32 + ilog2(INTR_VECS(0)),
.domain = XE_FW_MEDIA_VEBOX0,
.mmio_base = VEBOX_RING_BASE,
},
@@ -177,6 +198,7 @@ static const struct engine_info engine_infos[] = {
.name = "vecs1",
.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
.instance = 1,
+ .irq_offset = 32 + ilog2(INTR_VECS(1)),
.domain = XE_FW_MEDIA_VEBOX1,
.mmio_base = VEBOX2_RING_BASE,
},
@@ -184,6 +206,7 @@ static const struct engine_info engine_infos[] = {
.name = "vecs2",
.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
.instance = 2,
+ .irq_offset = 32 + ilog2(INTR_VECS(2)),
.domain = XE_FW_MEDIA_VEBOX2,
.mmio_base = XEHP_VEBOX3_RING_BASE,
},
@@ -191,6 +214,7 @@ static const struct engine_info engine_infos[] = {
.name = "vecs3",
.class = XE_ENGINE_CLASS_VIDEO_ENHANCE,
.instance = 3,
+ .irq_offset = 32 + ilog2(INTR_VECS(3)),
.domain = XE_FW_MEDIA_VEBOX3,
.mmio_base = XEHP_VEBOX4_RING_BASE,
},
@@ -198,6 +222,7 @@ static const struct engine_info engine_infos[] = {
.name = "ccs0",
.class = XE_ENGINE_CLASS_COMPUTE,
.instance = 0,
+ .irq_offset = ilog2(INTR_CCS(0)),
.domain = XE_FW_RENDER,
.mmio_base = COMPUTE0_RING_BASE,
},
@@ -205,6 +230,7 @@ static const struct engine_info engine_infos[] = {
.name = "ccs1",
.class = XE_ENGINE_CLASS_COMPUTE,
.instance = 1,
+ .irq_offset = ilog2(INTR_CCS(1)),
.domain = XE_FW_RENDER,
.mmio_base = COMPUTE1_RING_BASE,
},
@@ -212,6 +238,7 @@ static const struct engine_info engine_infos[] = {
.name = "ccs2",
.class = XE_ENGINE_CLASS_COMPUTE,
.instance = 2,
+ .irq_offset = ilog2(INTR_CCS(2)),
.domain = XE_FW_RENDER,
.mmio_base = COMPUTE2_RING_BASE,
},
@@ -219,6 +246,7 @@ static const struct engine_info engine_infos[] = {
.name = "ccs3",
.class = XE_ENGINE_CLASS_COMPUTE,
.instance = 3,
+ .irq_offset = ilog2(INTR_CCS(3)),
.domain = XE_FW_RENDER,
.mmio_base = COMPUTE3_RING_BASE,
},
@@ -289,6 +317,19 @@ static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_gt *gt,
xe_rtp_match_first_render_or_compute(gt, hwe);
}
+static bool xe_rtp_cfeg_wmtp_disabled(const struct xe_gt *gt,
+ const struct xe_hw_engine *hwe)
+{
+ if (GRAPHICS_VER(gt_to_xe(gt)) < 20)
+ return false;
+
+ if (hwe->class != XE_ENGINE_CLASS_COMPUTE &&
+ hwe->class != XE_ENGINE_CLASS_RENDER)
+ return false;
+
+ return xe_mmio_read32(hwe->gt, XEHP_FUSE4) & CFEG_WMTP_DISABLE;
+}
+
void
xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
{
@@ -319,6 +360,14 @@ xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE,
RCU_MODE_FIXED_SLICE_CCS_MODE))
},
+ /* Disable WMTP if HW doesn't support it */
+ { XE_RTP_NAME("DISABLE_WMTP_ON_UNSUPPORTED_HW"),
+ XE_RTP_RULES(FUNC(xe_rtp_cfeg_wmtp_disabled)),
+ XE_RTP_ACTIONS(FIELD_SET(CS_CHICKEN1(0),
+ PREEMPT_GPGPU_LEVEL_MASK,
+ PREEMPT_GPGPU_THREAD_GROUP_LEVEL)),
+ XE_RTP_ENTRY_FLAG(FOREACH_ENGINE)
+ },
{}
};
@@ -397,6 +446,7 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
hwe->class = info->class;
hwe->instance = info->instance;
hwe->mmio_base = info->mmio_base;
+ hwe->irq_offset = info->irq_offset;
hwe->domain = info->domain;
hwe->name = info->name;
hwe->fence_irq = &gt->fence_irq[info->class];
@@ -700,7 +750,7 @@ struct xe_hw_engine_snapshot *
xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
{
struct xe_hw_engine_snapshot *snapshot;
- int len;
+ u64 val;
if (!xe_hw_engine_is_valid(hwe))
return NULL;
@@ -710,11 +760,7 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
if (!snapshot)
return NULL;
- len = strlen(hwe->name) + 1;
- snapshot->name = kzalloc(len, GFP_ATOMIC);
- if (snapshot->name)
- strscpy(snapshot->name, hwe->name, len);
-
+ snapshot->name = kstrdup(hwe->name, GFP_ATOMIC);
snapshot->class = hwe->class;
snapshot->logical_instance = hwe->logical_instance;
snapshot->forcewake.domain = hwe->domain;
@@ -722,19 +768,35 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
hwe->domain);
snapshot->mmio_base = hwe->mmio_base;
- snapshot->reg.ring_hwstam = hw_engine_mmio_read32(hwe, RING_HWSTAM(0));
- snapshot->reg.ring_hws_pga = hw_engine_mmio_read32(hwe,
- RING_HWS_PGA(0));
- snapshot->reg.ring_execlist_status_lo =
+ /* no more VF accessible data below this point */
+ if (IS_SRIOV_VF(gt_to_xe(hwe->gt)))
+ return snapshot;
+
+ snapshot->reg.ring_execlist_status =
hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0));
- snapshot->reg.ring_execlist_status_hi =
- hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0));
- snapshot->reg.ring_execlist_sq_contents_lo =
- hw_engine_mmio_read32(hwe,
- RING_EXECLIST_SQ_CONTENTS_LO(0));
- snapshot->reg.ring_execlist_sq_contents_hi =
- hw_engine_mmio_read32(hwe,
- RING_EXECLIST_SQ_CONTENTS_HI(0));
+ val = hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0));
+ snapshot->reg.ring_execlist_status |= val << 32;
+
+ snapshot->reg.ring_execlist_sq_contents =
+ hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_LO(0));
+ val = hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_HI(0));
+ snapshot->reg.ring_execlist_sq_contents |= val << 32;
+
+ snapshot->reg.ring_acthd = hw_engine_mmio_read32(hwe, RING_ACTHD(0));
+ val = hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0));
+ snapshot->reg.ring_acthd |= val << 32;
+
+ snapshot->reg.ring_bbaddr = hw_engine_mmio_read32(hwe, RING_BBADDR(0));
+ val = hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0));
+ snapshot->reg.ring_bbaddr |= val << 32;
+
+ snapshot->reg.ring_dma_fadd =
+ hw_engine_mmio_read32(hwe, RING_DMA_FADD(0));
+ val = hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0));
+ snapshot->reg.ring_dma_fadd |= val << 32;
+
+ snapshot->reg.ring_hwstam = hw_engine_mmio_read32(hwe, RING_HWSTAM(0));
+ snapshot->reg.ring_hws_pga = hw_engine_mmio_read32(hwe, RING_HWS_PGA(0));
snapshot->reg.ring_start = hw_engine_mmio_read32(hwe, RING_START(0));
snapshot->reg.ring_head =
hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR;
@@ -748,16 +810,6 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe)
snapshot->reg.ring_esr = hw_engine_mmio_read32(hwe, RING_ESR(0));
snapshot->reg.ring_emr = hw_engine_mmio_read32(hwe, RING_EMR(0));
snapshot->reg.ring_eir = hw_engine_mmio_read32(hwe, RING_EIR(0));
- snapshot->reg.ring_acthd_udw =
- hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0));
- snapshot->reg.ring_acthd = hw_engine_mmio_read32(hwe, RING_ACTHD(0));
- snapshot->reg.ring_bbaddr_udw =
- hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0));
- snapshot->reg.ring_bbaddr = hw_engine_mmio_read32(hwe, RING_BBADDR(0));
- snapshot->reg.ring_dma_fadd_udw =
- hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0));
- snapshot->reg.ring_dma_fadd =
- hw_engine_mmio_read32(hwe, RING_DMA_FADD(0));
snapshot->reg.ipehr = hw_engine_mmio_read32(hwe, RING_IPEHR(0));
if (snapshot->class == XE_ENGINE_CLASS_COMPUTE)
@@ -786,33 +838,25 @@ void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot,
snapshot->forcewake.domain, snapshot->forcewake.ref);
drm_printf(p, "\tHWSTAM: 0x%08x\n", snapshot->reg.ring_hwstam);
drm_printf(p, "\tRING_HWS_PGA: 0x%08x\n", snapshot->reg.ring_hws_pga);
- drm_printf(p, "\tRING_EXECLIST_STATUS_LO: 0x%08x\n",
- snapshot->reg.ring_execlist_status_lo);
- drm_printf(p, "\tRING_EXECLIST_STATUS_HI: 0x%08x\n",
- snapshot->reg.ring_execlist_status_hi);
- drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_LO: 0x%08x\n",
- snapshot->reg.ring_execlist_sq_contents_lo);
- drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_HI: 0x%08x\n",
- snapshot->reg.ring_execlist_sq_contents_hi);
+ drm_printf(p, "\tRING_EXECLIST_STATUS: 0x%016llx\n",
+ snapshot->reg.ring_execlist_status);
+ drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS: 0x%016llx\n",
+ snapshot->reg.ring_execlist_sq_contents);
drm_printf(p, "\tRING_START: 0x%08x\n", snapshot->reg.ring_start);
- drm_printf(p, "\tRING_HEAD: 0x%08x\n", snapshot->reg.ring_head);
- drm_printf(p, "\tRING_TAIL: 0x%08x\n", snapshot->reg.ring_tail);
+ drm_printf(p, "\tRING_HEAD: 0x%08x\n", snapshot->reg.ring_head);
+ drm_printf(p, "\tRING_TAIL: 0x%08x\n", snapshot->reg.ring_tail);
drm_printf(p, "\tRING_CTL: 0x%08x\n", snapshot->reg.ring_ctl);
drm_printf(p, "\tRING_MI_MODE: 0x%08x\n", snapshot->reg.ring_mi_mode);
drm_printf(p, "\tRING_MODE: 0x%08x\n",
snapshot->reg.ring_mode);
- drm_printf(p, "\tRING_IMR: 0x%08x\n", snapshot->reg.ring_imr);
- drm_printf(p, "\tRING_ESR: 0x%08x\n", snapshot->reg.ring_esr);
- drm_printf(p, "\tRING_EMR: 0x%08x\n", snapshot->reg.ring_emr);
- drm_printf(p, "\tRING_EIR: 0x%08x\n", snapshot->reg.ring_eir);
- drm_printf(p, "\tACTHD: 0x%08x_%08x\n", snapshot->reg.ring_acthd_udw,
- snapshot->reg.ring_acthd);
- drm_printf(p, "\tBBADDR: 0x%08x_%08x\n", snapshot->reg.ring_bbaddr_udw,
- snapshot->reg.ring_bbaddr);
- drm_printf(p, "\tDMA_FADDR: 0x%08x_%08x\n",
- snapshot->reg.ring_dma_fadd_udw,
- snapshot->reg.ring_dma_fadd);
- drm_printf(p, "\tIPEHR: 0x%08x\n\n", snapshot->reg.ipehr);
+ drm_printf(p, "\tRING_IMR: 0x%08x\n", snapshot->reg.ring_imr);
+ drm_printf(p, "\tRING_ESR: 0x%08x\n", snapshot->reg.ring_esr);
+ drm_printf(p, "\tRING_EMR: 0x%08x\n", snapshot->reg.ring_emr);
+ drm_printf(p, "\tRING_EIR: 0x%08x\n", snapshot->reg.ring_eir);
+ drm_printf(p, "\tACTHD: 0x%016llx\n", snapshot->reg.ring_acthd);
+ drm_printf(p, "\tBBADDR: 0x%016llx\n", snapshot->reg.ring_bbaddr);
+ drm_printf(p, "\tDMA_FADDR: 0x%016llx\n", snapshot->reg.ring_dma_fadd);
+ drm_printf(p, "\tIPEHR: 0x%08x\n", snapshot->reg.ipehr);
if (snapshot->class == XE_ENGINE_CLASS_COMPUTE)
drm_printf(p, "\tRCU_MODE: 0x%08x\n",
snapshot->reg.rcu_mode);
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
index e49bc14f0e..2345fb42fa 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c
@@ -73,7 +73,7 @@ static ssize_t job_timeout_max_show(struct kobject *kobj,
return sprintf(buf, "%u\n", eclass->sched_props.job_timeout_max);
}
-static struct kobj_attribute job_timeout_max_attr =
+static const struct kobj_attribute job_timeout_max_attr =
__ATTR(job_timeout_max, 0644, job_timeout_max_show, job_timeout_max_store);
static ssize_t job_timeout_min_store(struct kobject *kobj,
@@ -109,7 +109,7 @@ static ssize_t job_timeout_min_show(struct kobject *kobj,
return sprintf(buf, "%u\n", eclass->sched_props.job_timeout_min);
}
-static struct kobj_attribute job_timeout_min_attr =
+static const struct kobj_attribute job_timeout_min_attr =
__ATTR(job_timeout_min, 0644, job_timeout_min_show, job_timeout_min_store);
static ssize_t job_timeout_store(struct kobject *kobj,
@@ -142,7 +142,7 @@ static ssize_t job_timeout_show(struct kobject *kobj,
return sprintf(buf, "%u\n", eclass->sched_props.job_timeout_ms);
}
-static struct kobj_attribute job_timeout_attr =
+static const struct kobj_attribute job_timeout_attr =
__ATTR(job_timeout_ms, 0644, job_timeout_show, job_timeout_store);
static ssize_t job_timeout_default(struct kobject *kobj,
@@ -153,7 +153,7 @@ static ssize_t job_timeout_default(struct kobject *kobj,
return sprintf(buf, "%u\n", eclass->defaults.job_timeout_ms);
}
-static struct kobj_attribute job_timeout_def =
+static const struct kobj_attribute job_timeout_def =
__ATTR(job_timeout_ms, 0444, job_timeout_default, NULL);
static ssize_t job_timeout_min_default(struct kobject *kobj,
@@ -164,7 +164,7 @@ static ssize_t job_timeout_min_default(struct kobject *kobj,
return sprintf(buf, "%u\n", eclass->defaults.job_timeout_min);
}
-static struct kobj_attribute job_timeout_min_def =
+static const struct kobj_attribute job_timeout_min_def =
__ATTR(job_timeout_min, 0444, job_timeout_min_default, NULL);
static ssize_t job_timeout_max_default(struct kobject *kobj,
@@ -175,7 +175,7 @@ static ssize_t job_timeout_max_default(struct kobject *kobj,
return sprintf(buf, "%u\n", eclass->defaults.job_timeout_max);
}
-static struct kobj_attribute job_timeout_max_def =
+static const struct kobj_attribute job_timeout_max_def =
__ATTR(job_timeout_max, 0444, job_timeout_max_default, NULL);
static ssize_t timeslice_duration_store(struct kobject *kobj,
@@ -234,7 +234,7 @@ static ssize_t timeslice_duration_max_show(struct kobject *kobj,
return sprintf(buf, "%u\n", eclass->sched_props.timeslice_max);
}
-static struct kobj_attribute timeslice_duration_max_attr =
+static const struct kobj_attribute timeslice_duration_max_attr =
__ATTR(timeslice_duration_max, 0644, timeslice_duration_max_show,
timeslice_duration_max_store);
@@ -272,7 +272,7 @@ static ssize_t timeslice_duration_min_show(struct kobject *kobj,
return sprintf(buf, "%u\n", eclass->sched_props.timeslice_min);
}
-static struct kobj_attribute timeslice_duration_min_attr =
+static const struct kobj_attribute timeslice_duration_min_attr =
__ATTR(timeslice_duration_min, 0644, timeslice_duration_min_show,
timeslice_duration_min_store);
@@ -284,7 +284,7 @@ static ssize_t timeslice_duration_show(struct kobject *kobj,
return sprintf(buf, "%u\n", eclass->sched_props.timeslice_us);
}
-static struct kobj_attribute timeslice_duration_attr =
+static const struct kobj_attribute timeslice_duration_attr =
__ATTR(timeslice_duration_us, 0644, timeslice_duration_show,
timeslice_duration_store);
@@ -296,7 +296,7 @@ static ssize_t timeslice_default(struct kobject *kobj,
return sprintf(buf, "%u\n", eclass->defaults.timeslice_us);
}
-static struct kobj_attribute timeslice_duration_def =
+static const struct kobj_attribute timeslice_duration_def =
__ATTR(timeslice_duration_us, 0444, timeslice_default, NULL);
static ssize_t timeslice_min_default(struct kobject *kobj,
@@ -307,7 +307,7 @@ static ssize_t timeslice_min_default(struct kobject *kobj,
return sprintf(buf, "%u\n", eclass->defaults.timeslice_min);
}
-static struct kobj_attribute timeslice_duration_min_def =
+static const struct kobj_attribute timeslice_duration_min_def =
__ATTR(timeslice_duration_min, 0444, timeslice_min_default, NULL);
static ssize_t timeslice_max_default(struct kobject *kobj,
@@ -318,7 +318,7 @@ static ssize_t timeslice_max_default(struct kobject *kobj,
return sprintf(buf, "%u\n", eclass->defaults.timeslice_max);
}
-static struct kobj_attribute timeslice_duration_max_def =
+static const struct kobj_attribute timeslice_duration_max_def =
__ATTR(timeslice_duration_max, 0444, timeslice_max_default, NULL);
static ssize_t preempt_timeout_store(struct kobject *kobj,
@@ -351,7 +351,7 @@ static ssize_t preempt_timeout_show(struct kobject *kobj,
return sprintf(buf, "%u\n", eclass->sched_props.preempt_timeout_us);
}
-static struct kobj_attribute preempt_timeout_attr =
+static const struct kobj_attribute preempt_timeout_attr =
__ATTR(preempt_timeout_us, 0644, preempt_timeout_show, preempt_timeout_store);
static ssize_t preempt_timeout_default(struct kobject *kobj,
@@ -363,7 +363,7 @@ static ssize_t preempt_timeout_default(struct kobject *kobj,
return sprintf(buf, "%u\n", eclass->defaults.preempt_timeout_us);
}
-static struct kobj_attribute preempt_timeout_def =
+static const struct kobj_attribute preempt_timeout_def =
__ATTR(preempt_timeout_us, 0444, preempt_timeout_default, NULL);
static ssize_t preempt_timeout_min_default(struct kobject *kobj,
@@ -375,7 +375,7 @@ static ssize_t preempt_timeout_min_default(struct kobject *kobj,
return sprintf(buf, "%u\n", eclass->defaults.preempt_timeout_min);
}
-static struct kobj_attribute preempt_timeout_min_def =
+static const struct kobj_attribute preempt_timeout_min_def =
__ATTR(preempt_timeout_min, 0444, preempt_timeout_min_default, NULL);
static ssize_t preempt_timeout_max_default(struct kobject *kobj,
@@ -387,7 +387,7 @@ static ssize_t preempt_timeout_max_default(struct kobject *kobj,
return sprintf(buf, "%u\n", eclass->defaults.preempt_timeout_max);
}
-static struct kobj_attribute preempt_timeout_max_def =
+static const struct kobj_attribute preempt_timeout_max_def =
__ATTR(preempt_timeout_max, 0444, preempt_timeout_max_default, NULL);
static ssize_t preempt_timeout_max_store(struct kobject *kobj,
@@ -423,7 +423,7 @@ static ssize_t preempt_timeout_max_show(struct kobject *kobj,
return sprintf(buf, "%u\n", eclass->sched_props.preempt_timeout_max);
}
-static struct kobj_attribute preempt_timeout_max_attr =
+static const struct kobj_attribute preempt_timeout_max_attr =
__ATTR(preempt_timeout_max, 0644, preempt_timeout_max_show,
preempt_timeout_max_store);
@@ -460,7 +460,7 @@ static ssize_t preempt_timeout_min_show(struct kobject *kobj,
return sprintf(buf, "%u\n", eclass->sched_props.preempt_timeout_min);
}
-static struct kobj_attribute preempt_timeout_min_attr =
+static const struct kobj_attribute preempt_timeout_min_attr =
__ATTR(preempt_timeout_min, 0644, preempt_timeout_min_show,
preempt_timeout_min_store);
@@ -477,7 +477,7 @@ static const struct attribute *defaults[] = {
NULL
};
-static const struct attribute *files[] = {
+static const struct attribute * const files[] = {
&job_timeout_attr.attr,
&job_timeout_min_attr.attr,
&job_timeout_max_attr.attr,
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h
index 39908dec04..d7f828c76c 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_types.h
+++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h
@@ -79,23 +79,23 @@ struct xe_hw_engine_class_intf {
* @defaults: default scheduling properties
*/
struct {
- /** @set_job_timeout: Set job timeout in ms for engine */
+ /** @sched_props.set_job_timeout: Set job timeout in ms for engine */
u32 job_timeout_ms;
- /** @job_timeout_min: Min job timeout in ms for engine */
+ /** @sched_props.job_timeout_min: Min job timeout in ms for engine */
u32 job_timeout_min;
- /** @job_timeout_max: Max job timeout in ms for engine */
+ /** @sched_props.job_timeout_max: Max job timeout in ms for engine */
u32 job_timeout_max;
- /** @timeslice_us: timeslice period in micro-seconds */
+ /** @sched_props.timeslice_us: timeslice period in micro-seconds */
u32 timeslice_us;
- /** @timeslice_min: min timeslice period in micro-seconds */
+ /** @sched_props.timeslice_min: min timeslice period in micro-seconds */
u32 timeslice_min;
- /** @timeslice_max: max timeslice period in micro-seconds */
+ /** @sched_props.timeslice_max: max timeslice period in micro-seconds */
u32 timeslice_max;
- /** @preempt_timeout_us: preemption timeout in micro-seconds */
+ /** @sched_props.preempt_timeout_us: preemption timeout in micro-seconds */
u32 preempt_timeout_us;
- /** @preempt_timeout_min: min preemption timeout in micro-seconds */
+ /** @sched_props.preempt_timeout_min: min preemption timeout in micro-seconds */
u32 preempt_timeout_min;
- /** @preempt_timeout_max: max preemption timeout in micro-seconds */
+ /** @sched_props.preempt_timeout_max: max preemption timeout in micro-seconds */
u32 preempt_timeout_max;
} sched_props, defaults;
};
@@ -116,6 +116,8 @@ struct xe_hw_engine {
u16 instance;
/** @logical_instance: logical instance of this hw engine */
u16 logical_instance;
+ /** @irq_offset: IRQ offset of this hw engine */
+ u16 irq_offset;
/** @mmio_base: MMIO base address of this hw engine*/
u32 mmio_base;
/**
@@ -162,62 +164,52 @@ struct xe_hw_engine_snapshot {
u16 logical_instance;
/** @forcewake: Force Wake information snapshot */
struct {
- /** @domain: force wake domain of this hw engine */
+ /** @forcewake.domain: force wake domain of this hw engine */
enum xe_force_wake_domains domain;
- /** @ref: Forcewake ref for the above domain */
+ /** @forcewake.ref: Forcewake ref for the above domain */
int ref;
} forcewake;
/** @mmio_base: MMIO base address of this hw engine*/
u32 mmio_base;
/** @reg: Useful MMIO register snapshot */
struct {
- /** @ring_hwstam: RING_HWSTAM */
+ /** @reg.ring_execlist_status: RING_EXECLIST_STATUS */
+ u64 ring_execlist_status;
+ /** @reg.ring_execlist_sq_contents: RING_EXECLIST_SQ_CONTENTS */
+ u64 ring_execlist_sq_contents;
+ /** @reg.ring_acthd: RING_ACTHD */
+ u64 ring_acthd;
+ /** @reg.ring_bbaddr: RING_BBADDR */
+ u64 ring_bbaddr;
+ /** @reg.ring_dma_fadd: RING_DMA_FADD */
+ u64 ring_dma_fadd;
+ /** @reg.ring_hwstam: RING_HWSTAM */
u32 ring_hwstam;
- /** @ring_hws_pga: RING_HWS_PGA */
+ /** @reg.ring_hws_pga: RING_HWS_PGA */
u32 ring_hws_pga;
- /** @ring_execlist_status_lo: RING_EXECLIST_STATUS_LO */
- u32 ring_execlist_status_lo;
- /** @ring_execlist_status_hi: RING_EXECLIST_STATUS_HI */
- u32 ring_execlist_status_hi;
- /** @ring_execlist_sq_contents_lo: RING_EXECLIST_SQ_CONTENTS */
- u32 ring_execlist_sq_contents_lo;
- /** @ring_execlist_sq_contents_hi: RING_EXECLIST_SQ_CONTENTS + 4 */
- u32 ring_execlist_sq_contents_hi;
- /** @ring_start: RING_START */
+ /** @reg.ring_start: RING_START */
u32 ring_start;
- /** @ring_head: RING_HEAD */
+ /** @reg.ring_head: RING_HEAD */
u32 ring_head;
- /** @ring_tail: RING_TAIL */
+ /** @reg.ring_tail: RING_TAIL */
u32 ring_tail;
- /** @ring_ctl: RING_CTL */
+ /** @reg.ring_ctl: RING_CTL */
u32 ring_ctl;
- /** @ring_mi_mode: RING_MI_MODE */
+ /** @reg.ring_mi_mode: RING_MI_MODE */
u32 ring_mi_mode;
- /** @ring_mode: RING_MODE */
+ /** @reg.ring_mode: RING_MODE */
u32 ring_mode;
- /** @ring_imr: RING_IMR */
+ /** @reg.ring_imr: RING_IMR */
u32 ring_imr;
- /** @ring_esr: RING_ESR */
+ /** @reg.ring_esr: RING_ESR */
u32 ring_esr;
- /** @ring_emr: RING_EMR */
+ /** @reg.ring_emr: RING_EMR */
u32 ring_emr;
- /** @ring_eir: RING_EIR */
+ /** @reg.ring_eir: RING_EIR */
u32 ring_eir;
- /** @ring_acthd_udw: RING_ACTHD_UDW */
- u32 ring_acthd_udw;
- /** @ring_acthd: RING_ACTHD */
- u32 ring_acthd;
- /** @ring_bbaddr_udw: RING_BBADDR_UDW */
- u32 ring_bbaddr_udw;
- /** @ring_bbaddr: RING_BBADDR */
- u32 ring_bbaddr;
- /** @ring_dma_fadd_udw: RING_DMA_FADD_UDW */
- u32 ring_dma_fadd_udw;
- /** @ring_dma_fadd: RING_DMA_FADD */
- u32 ring_dma_fadd;
- /** @ipehr: IPEHR */
+ /** @reg.ipehr: IPEHR */
u32 ipehr;
- /** @rcu_mode: RCU_MODE */
+ /** @reg.rcu_mode: RCU_MODE */
u32 rcu_mode;
} reg;
};
diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
index a6f43446c7..9ac7fbe201 100644
--- a/drivers/gpu/drm/xe/xe_hwmon.c
+++ b/drivers/gpu/drm/xe/xe_hwmon.c
@@ -10,12 +10,14 @@
#include <drm/drm_managed.h>
#include "regs/xe_gt_regs.h"
#include "regs/xe_mchbar_regs.h"
+#include "regs/xe_pcode_regs.h"
#include "xe_device.h"
#include "xe_gt.h"
#include "xe_hwmon.h"
#include "xe_mmio.h"
#include "xe_pcode.h"
#include "xe_pcode_api.h"
+#include "xe_sriov.h"
enum xe_hwmon_reg {
REG_PKG_RAPL_LIMIT,
@@ -77,32 +79,32 @@ static u32 xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg)
switch (hwmon_reg) {
case REG_PKG_RAPL_LIMIT:
- if (xe->info.platform == XE_DG2)
- reg = PCU_CR_PACKAGE_RAPL_LIMIT;
- else if (xe->info.platform == XE_PVC)
+ if (xe->info.platform == XE_PVC)
reg = PVC_GT0_PACKAGE_RAPL_LIMIT;
+ else if (xe->info.platform == XE_DG2)
+ reg = PCU_CR_PACKAGE_RAPL_LIMIT;
break;
case REG_PKG_POWER_SKU:
- if (xe->info.platform == XE_DG2)
- reg = PCU_CR_PACKAGE_POWER_SKU;
- else if (xe->info.platform == XE_PVC)
+ if (xe->info.platform == XE_PVC)
reg = PVC_GT0_PACKAGE_POWER_SKU;
+ else if (xe->info.platform == XE_DG2)
+ reg = PCU_CR_PACKAGE_POWER_SKU;
break;
case REG_PKG_POWER_SKU_UNIT:
- if (xe->info.platform == XE_DG2)
- reg = PCU_CR_PACKAGE_POWER_SKU_UNIT;
- else if (xe->info.platform == XE_PVC)
+ if (xe->info.platform == XE_PVC)
reg = PVC_GT0_PACKAGE_POWER_SKU_UNIT;
+ else if (xe->info.platform == XE_DG2)
+ reg = PCU_CR_PACKAGE_POWER_SKU_UNIT;
break;
case REG_GT_PERF_STATUS:
if (xe->info.platform == XE_DG2)
reg = GT_PERF_STATUS;
break;
case REG_PKG_ENERGY_STATUS:
- if (xe->info.platform == XE_DG2)
- reg = PCU_CR_PACKAGE_ENERGY_STATUS;
- else if (xe->info.platform == XE_PVC)
+ if (xe->info.platform == XE_PVC)
reg = PVC_GT0_PLATFORM_ENERGY_STATUS;
+ else if (xe->info.platform == XE_DG2)
+ reg = PCU_CR_PACKAGE_ENERGY_STATUS;
break;
default:
drm_warn(&xe->drm, "Unknown xe hwmon reg id: %d\n", hwmon_reg);
@@ -402,7 +404,7 @@ static const struct attribute_group *hwmon_groups[] = {
NULL
};
-static const struct hwmon_channel_info *hwmon_info[] = {
+static const struct hwmon_channel_info * const hwmon_info[] = {
HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_CRIT),
HWMON_CHANNEL_INFO(curr, HWMON_C_CRIT),
HWMON_CHANNEL_INFO(in, HWMON_I_INPUT),
@@ -745,6 +747,10 @@ void xe_hwmon_register(struct xe_device *xe)
if (!IS_DGFX(xe))
return;
+ /* hwmon is not available on VFs */
+ if (IS_SRIOV_VF(xe))
+ return;
+
hwmon = devm_kzalloc(dev, sizeof(*hwmon), GFP_KERNEL);
if (!hwmon)
return;
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index d1f5ba4bb7..2f5d179e0d 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -9,15 +9,18 @@
#include <drm/drm_managed.h>
+#include "display/xe_display.h"
#include "regs/xe_gt_regs.h"
#include "regs/xe_regs.h"
#include "xe_device.h"
-#include "xe_display.h"
#include "xe_drv.h"
+#include "xe_gsc_proxy.h"
#include "xe_gt.h"
#include "xe_guc.h"
#include "xe_hw_engine.h"
+#include "xe_memirq.h"
#include "xe_mmio.h"
+#include "xe_sriov.h"
/*
* Interrupt registers for a unit are always consecutive and ordered
@@ -129,6 +132,7 @@ void xe_irq_enable_hwe(struct xe_gt *gt)
u32 ccs_mask, bcs_mask;
u32 irqs, dmask, smask;
u32 gsc_mask = 0;
+ u32 heci_mask = 0;
if (xe_device_uc_enabled(xe)) {
irqs = GT_RENDER_USER_INTERRUPT |
@@ -178,14 +182,23 @@ void xe_irq_enable_hwe(struct xe_gt *gt)
xe_mmio_write32(gt, VCS2_VCS3_INTR_MASK, ~dmask);
xe_mmio_write32(gt, VECS0_VECS1_INTR_MASK, ~dmask);
- if (xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_OTHER))
+ /*
+ * the heci2 interrupt is enabled via the same register as the
+ * GSCCS interrupts, but it has its own mask register.
+ */
+ if (xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_OTHER)) {
gsc_mask = irqs;
- else if (HAS_HECI_GSCFI(xe))
+ heci_mask = GSC_IRQ_INTF(1);
+ } else if (HAS_HECI_GSCFI(xe)) {
gsc_mask = GSC_IRQ_INTF(1);
+ }
+
if (gsc_mask) {
- xe_mmio_write32(gt, GUNIT_GSC_INTR_ENABLE, gsc_mask);
+ xe_mmio_write32(gt, GUNIT_GSC_INTR_ENABLE, gsc_mask | heci_mask);
xe_mmio_write32(gt, GUNIT_GSC_INTR_MASK, ~gsc_mask);
}
+ if (heci_mask)
+ xe_mmio_write32(gt, HECI2_RSVD_INTR_MASK, ~(heci_mask << 16));
}
}
@@ -232,6 +245,8 @@ gt_other_irq_handler(struct xe_gt *gt, const u8 instance, const u16 iir)
return xe_guc_irq_handler(&gt->uc.guc, iir);
if (instance == OTHER_MEDIA_GUC_INSTANCE && xe_gt_is_media_type(gt))
return xe_guc_irq_handler(&gt->uc.guc, iir);
+ if (instance == OTHER_GSC_HECI2_INSTANCE && xe_gt_is_media_type(gt))
+ return xe_gsc_proxy_irq_handler(&gt->uc.gsc, iir);
if (instance != OTHER_GUC_INSTANCE &&
instance != OTHER_MEDIA_GUC_INSTANCE) {
@@ -249,15 +264,23 @@ static struct xe_gt *pick_engine_gt(struct xe_tile *tile,
if (MEDIA_VER(xe) < 13)
return tile->primary_gt;
- if (class == XE_ENGINE_CLASS_VIDEO_DECODE ||
- class == XE_ENGINE_CLASS_VIDEO_ENHANCE)
+ switch (class) {
+ case XE_ENGINE_CLASS_VIDEO_DECODE:
+ case XE_ENGINE_CLASS_VIDEO_ENHANCE:
return tile->media_gt;
-
- if (class == XE_ENGINE_CLASS_OTHER &&
- (instance == OTHER_MEDIA_GUC_INSTANCE || instance == OTHER_GSC_INSTANCE))
- return tile->media_gt;
-
- return tile->primary_gt;
+ case XE_ENGINE_CLASS_OTHER:
+ switch (instance) {
+ case OTHER_MEDIA_GUC_INSTANCE:
+ case OTHER_GSC_INSTANCE:
+ case OTHER_GSC_HECI2_INSTANCE:
+ return tile->media_gt;
+ default:
+ break;
+ };
+ fallthrough;
+ default:
+ return tile->primary_gt;
+ }
}
static void gt_irq_handler(struct xe_tile *tile,
@@ -419,7 +442,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
* irq as device is inaccessible.
*/
if (master_ctl == REG_GENMASK(31, 0)) {
- dev_dbg(tile_to_xe(tile)->drm.dev,
+ drm_dbg(&tile_to_xe(tile)->drm,
"Ignore this IRQ as device might be in DPC containment.\n");
return IRQ_HANDLED;
}
@@ -484,6 +507,7 @@ static void gt_irq_reset(struct xe_tile *tile)
HAS_HECI_GSCFI(tile_to_xe(tile))) {
xe_mmio_write32(mmio, GUNIT_GSC_INTR_ENABLE, 0);
xe_mmio_write32(mmio, GUNIT_GSC_INTR_MASK, ~0);
+ xe_mmio_write32(mmio, HECI2_RSVD_INTR_MASK, ~0);
}
xe_mmio_write32(mmio, GPM_WGBOXPERF_INTR_ENABLE, 0);
@@ -498,6 +522,9 @@ static void xelp_irq_reset(struct xe_tile *tile)
gt_irq_reset(tile);
+ if (IS_SRIOV_VF(tile_to_xe(tile)))
+ return;
+
mask_and_disable(tile, PCU_IRQ_OFFSET);
}
@@ -508,6 +535,9 @@ static void dg1_irq_reset(struct xe_tile *tile)
gt_irq_reset(tile);
+ if (IS_SRIOV_VF(tile_to_xe(tile)))
+ return;
+
mask_and_disable(tile, PCU_IRQ_OFFSET);
}
@@ -518,11 +548,34 @@ static void dg1_irq_reset_mstr(struct xe_tile *tile)
xe_mmio_write32(mmio, GFX_MSTR_IRQ, ~0);
}
+static void vf_irq_reset(struct xe_device *xe)
+{
+ struct xe_tile *tile;
+ unsigned int id;
+
+ xe_assert(xe, IS_SRIOV_VF(xe));
+
+ if (GRAPHICS_VERx100(xe) < 1210)
+ xelp_intr_disable(xe);
+ else
+ xe_assert(xe, xe_device_has_memirq(xe));
+
+ for_each_tile(tile, xe, id) {
+ if (xe_device_has_memirq(xe))
+ xe_memirq_reset(&tile->sriov.vf.memirq);
+ else
+ gt_irq_reset(tile);
+ }
+}
+
static void xe_irq_reset(struct xe_device *xe)
{
struct xe_tile *tile;
u8 id;
+ if (IS_SRIOV_VF(xe))
+ return vf_irq_reset(xe);
+
for_each_tile(tile, xe, id) {
if (GRAPHICS_VERx100(xe) >= 1210)
dg1_irq_reset(tile);
@@ -545,8 +598,26 @@ static void xe_irq_reset(struct xe_device *xe)
}
}
+static void vf_irq_postinstall(struct xe_device *xe)
+{
+ struct xe_tile *tile;
+ unsigned int id;
+
+ for_each_tile(tile, xe, id)
+ if (xe_device_has_memirq(xe))
+ xe_memirq_postinstall(&tile->sriov.vf.memirq);
+
+ if (GRAPHICS_VERx100(xe) < 1210)
+ xelp_intr_enable(xe, true);
+ else
+ xe_assert(xe, xe_device_has_memirq(xe));
+}
+
static void xe_irq_postinstall(struct xe_device *xe)
{
+ if (IS_SRIOV_VF(xe))
+ return vf_irq_postinstall(xe);
+
xe_display_irq_postinstall(xe, xe_root_mmio_gt(xe));
/*
@@ -563,8 +634,30 @@ static void xe_irq_postinstall(struct xe_device *xe)
xelp_intr_enable(xe, true);
}
+static irqreturn_t vf_mem_irq_handler(int irq, void *arg)
+{
+ struct xe_device *xe = arg;
+ struct xe_tile *tile;
+ unsigned int id;
+
+ spin_lock(&xe->irq.lock);
+ if (!xe->irq.enabled) {
+ spin_unlock(&xe->irq.lock);
+ return IRQ_NONE;
+ }
+ spin_unlock(&xe->irq.lock);
+
+ for_each_tile(tile, xe, id)
+ xe_memirq_handler(&tile->sriov.vf.memirq);
+
+ return IRQ_HANDLED;
+}
+
static irq_handler_t xe_irq_handler(struct xe_device *xe)
{
+ if (IS_SRIOV_VF(xe) && xe_device_has_memirq(xe))
+ return vf_mem_irq_handler;
+
if (GRAPHICS_VERx100(xe) >= 1210)
return dg1_irq_handler;
else
@@ -590,8 +683,9 @@ static void irq_uninstall(struct drm_device *drm, void *arg)
int xe_irq_install(struct xe_device *xe)
{
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+ unsigned int irq_flags = PCI_IRQ_MSIX;
irq_handler_t irq_handler;
- int err, irq;
+ int err, irq, nvec;
irq_handler = xe_irq_handler(xe);
if (!irq_handler) {
@@ -601,7 +695,19 @@ int xe_irq_install(struct xe_device *xe)
xe_irq_reset(xe);
- err = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI | PCI_IRQ_MSIX);
+ nvec = pci_msix_vec_count(pdev);
+ if (nvec <= 0) {
+ if (nvec == -EINVAL) {
+ /* MSIX capability is not supported in the device, using MSI */
+ irq_flags = PCI_IRQ_MSI;
+ nvec = 1;
+ } else {
+ drm_err(&xe->drm, "MSIX: Failed getting count\n");
+ return nvec;
+ }
+ }
+
+ err = pci_alloc_irq_vectors(pdev, nvec, nvec, irq_flags);
if (err < 0) {
drm_err(&xe->drm, "MSI/MSIX: Failed to enable support %d\n", err);
return err;
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 72f04a656e..57066faf57 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -19,6 +19,8 @@
#include "xe_gt_printk.h"
#include "xe_hw_fence.h"
#include "xe_map.h"
+#include "xe_memirq.h"
+#include "xe_sriov.h"
#include "xe_vm.h"
#define LRC_VALID (1 << 0)
@@ -529,6 +531,27 @@ static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
/* TODO: Timestamp */
}
+static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe)
+{
+ struct xe_memirq *memirq = &gt_to_tile(hwe->gt)->sriov.vf.memirq;
+ struct xe_device *xe = gt_to_xe(hwe->gt);
+
+ if (!IS_SRIOV_VF(xe) || !xe_device_has_memirq(xe))
+ return;
+
+ regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM |
+ MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT;
+ regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr;
+ regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq);
+
+ regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) |
+ MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED;
+ regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr;
+ regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq);
+ regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr;
+ regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq);
+}
+
static int lrc_ring_mi_mode(struct xe_hw_engine *hwe)
{
struct xe_device *xe = gt_to_xe(hwe->gt);
@@ -664,6 +687,7 @@ static void *empty_lrc_data(struct xe_hw_engine *hwe)
regs = data + LRC_PPHWSP_SIZE;
set_offsets(regs, reg_offsets(xe, hwe->class), hwe);
set_context_control(regs, hwe);
+ set_memory_based_intr(regs, hwe);
reset_stop_ring(regs, hwe);
return data;
@@ -953,6 +977,20 @@ static int dump_mi_command(struct drm_printer *p,
drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]);
return numdw;
+ case MI_LOAD_REGISTER_MEM & MI_OPCODE:
+ drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n",
+ inst_header,
+ dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "",
+ dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : "");
+ if (numdw == 4)
+ drm_printf(p, " - %#6x = %#010llx\n",
+ dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2])));
+ else
+ drm_printf(p, " - %*ph (%s)\n",
+ (int)sizeof(u32) * (numdw - 1), dw + 1,
+ numdw < 4 ? "truncated" : "malformed");
+ return numdw;
+
case MI_FORCE_WAKEUP:
drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header);
return numdw;
diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h
index 7822033606..24f20ed66f 100644
--- a/drivers/gpu/drm/xe/xe_lrc_types.h
+++ b/drivers/gpu/drm/xe/xe_lrc_types.h
@@ -28,11 +28,11 @@ struct xe_lrc {
/** @ring: submission ring state */
struct {
- /** @size: size of submission ring */
+ /** @ring.size: size of submission ring */
u32 size;
- /** @tail: tail of submission ring */
+ /** @ring.tail: tail of submission ring */
u32 tail;
- /** @old_tail: shadow of tail */
+ /** @ring.old_tail: shadow of tail */
u32 old_tail;
} ring;
diff --git a/drivers/gpu/drm/xe/xe_memirq.c b/drivers/gpu/drm/xe/xe_memirq.c
new file mode 100644
index 0000000000..76e95535d7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_memirq.c
@@ -0,0 +1,430 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "regs/xe_gt_regs.h"
+#include "regs/xe_guc_regs.h"
+#include "regs/xe_regs.h"
+
+#include "xe_assert.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_device_types.h"
+#include "xe_gt.h"
+#include "xe_gt_printk.h"
+#include "xe_guc.h"
+#include "xe_hw_engine.h"
+#include "xe_map.h"
+#include "xe_memirq.h"
+#include "xe_sriov.h"
+#include "xe_sriov_printk.h"
+
+#define memirq_assert(m, condition) xe_tile_assert(memirq_to_tile(m), condition)
+#define memirq_debug(m, msg...) xe_sriov_dbg_verbose(memirq_to_xe(m), "MEMIRQ: " msg)
+
+static struct xe_tile *memirq_to_tile(struct xe_memirq *memirq)
+{
+ return container_of(memirq, struct xe_tile, sriov.vf.memirq);
+}
+
+static struct xe_device *memirq_to_xe(struct xe_memirq *memirq)
+{
+ return tile_to_xe(memirq_to_tile(memirq));
+}
+
+static const char *guc_name(struct xe_guc *guc)
+{
+ return xe_gt_is_media_type(guc_to_gt(guc)) ? "media GuC" : "GuC";
+}
+
+/**
+ * DOC: Memory Based Interrupts
+ *
+ * MMIO register based interrupts infrastructure used for non-virtualized mode
+ * or SRIOV-8 (which supports 8 Virtual Functions) does not scale efficiently
+ * to allow delivering interrupts to a large number of Virtual machines or
+ * containers. Memory based interrupt status reporting provides an efficient
+ * and scalable infrastructure.
+ *
+ * For memory based interrupt status reporting hardware sequence is:
+ * * Engine writes the interrupt event to memory
+ * (Pointer to memory location is provided by SW. This memory surface must
+ * be mapped to system memory and must be marked as un-cacheable (UC) on
+ * Graphics IP Caches)
+ * * Engine triggers an interrupt to host.
+ */
+
+/**
+ * DOC: Memory Based Interrupts Page Layout
+ *
+ * `Memory Based Interrupts`_ requires three different objects, which are
+ * called "page" in the specs, even if they aren't page-sized or aligned.
+ *
+ * To simplify the code we allocate a single page size object and then use
+ * offsets to embedded "pages". The address of those "pages" are then
+ * programmed in the HW via LRI and LRM in the context image.
+ *
+ * - _`Interrupt Status Report Page`: this page contains the interrupt
+ * status vectors for each unit. Each bit in the interrupt vectors is
+ * converted to a byte, with the byte being set to 0xFF when an
+ * interrupt is triggered; interrupt vectors are 16b big so each unit
+ * gets 16B. One space is reserved for each bit in one of the
+ * GT_INTR_DWx registers, so this object needs a total of 1024B.
+ * This object needs to be 4KiB aligned.
+ *
+ * - _`Interrupt Source Report Page`: this is the equivalent of the
+ * GEN11_GT_INTR_DWx registers, with each bit in those registers being
+ * mapped to a byte here. The offsets are the same, just bytes instead
+ * of bits. This object needs to be cacheline aligned.
+ *
+ * - Interrupt Mask: the HW needs a location to fetch the interrupt
+ * mask vector to be used by the LRM in the context, so we just use
+ * the next available space in the interrupt page.
+ *
+ * ::
+ *
+ * 0x0000 +===========+ <== Interrupt Status Report Page
+ * | |
+ * | | ____ +----+----------------+
+ * | | / | 0 | USER INTERRUPT |
+ * +-----------+ __/ | 1 | |
+ * | HWE(n) | __ | | CTX SWITCH |
+ * +-----------+ \ | | WAIT SEMAPHORE |
+ * | | \____ | 15 | |
+ * | | +----+----------------+
+ * | |
+ * 0x0400 +===========+ <== Interrupt Source Report Page
+ * | HWE(0) |
+ * | HWE(1) |
+ * | |
+ * | HWE(x) |
+ * 0x0440 +===========+ <== Interrupt Enable Mask
+ * | |
+ * | |
+ * +-----------+
+ */
+
+static void __release_xe_bo(struct drm_device *drm, void *arg)
+{
+ struct xe_bo *bo = arg;
+
+ xe_bo_unpin_map_no_vm(bo);
+}
+
+static int memirq_alloc_pages(struct xe_memirq *memirq)
+{
+ struct xe_device *xe = memirq_to_xe(memirq);
+ struct xe_tile *tile = memirq_to_tile(memirq);
+ struct xe_bo *bo;
+ int err;
+
+ BUILD_BUG_ON(!IS_ALIGNED(XE_MEMIRQ_SOURCE_OFFSET, SZ_64));
+ BUILD_BUG_ON(!IS_ALIGNED(XE_MEMIRQ_STATUS_OFFSET, SZ_4K));
+
+ /* XXX: convert to managed bo */
+ bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K,
+ ttm_bo_type_kernel,
+ XE_BO_CREATE_SYSTEM_BIT |
+ XE_BO_CREATE_GGTT_BIT |
+ XE_BO_NEEDS_UC |
+ XE_BO_NEEDS_CPU_ACCESS);
+ if (IS_ERR(bo)) {
+ err = PTR_ERR(bo);
+ goto out;
+ }
+
+ memirq_assert(memirq, !xe_bo_is_vram(bo));
+ memirq_assert(memirq, !memirq->bo);
+
+ iosys_map_memset(&bo->vmap, 0, 0, SZ_4K);
+
+ memirq->bo = bo;
+ memirq->source = IOSYS_MAP_INIT_OFFSET(&bo->vmap, XE_MEMIRQ_SOURCE_OFFSET);
+ memirq->status = IOSYS_MAP_INIT_OFFSET(&bo->vmap, XE_MEMIRQ_STATUS_OFFSET);
+ memirq->mask = IOSYS_MAP_INIT_OFFSET(&bo->vmap, XE_MEMIRQ_ENABLE_OFFSET);
+
+ memirq_assert(memirq, !memirq->source.is_iomem);
+ memirq_assert(memirq, !memirq->status.is_iomem);
+ memirq_assert(memirq, !memirq->mask.is_iomem);
+
+ memirq_debug(memirq, "page offsets: source %#x status %#x\n",
+ xe_memirq_source_ptr(memirq), xe_memirq_status_ptr(memirq));
+
+ return drmm_add_action_or_reset(&xe->drm, __release_xe_bo, memirq->bo);
+
+out:
+ xe_sriov_err(memirq_to_xe(memirq),
+ "Failed to allocate memirq page (%pe)\n", ERR_PTR(err));
+ return err;
+}
+
+static void memirq_set_enable(struct xe_memirq *memirq, bool enable)
+{
+ iosys_map_wr(&memirq->mask, 0, u32, enable ? GENMASK(15, 0) : 0);
+
+ memirq->enabled = enable;
+}
+
+/**
+ * xe_memirq_init - Initialize data used by `Memory Based Interrupts`_.
+ * @memirq: the &xe_memirq to initialize
+ *
+ * Allocate `Interrupt Source Report Page`_ and `Interrupt Status Report Page`_
+ * used by `Memory Based Interrupts`_.
+ *
+ * These allocations are managed and will be implicitly released on unload.
+ *
+ * Note: This function shall be called only by the VF driver.
+ *
+ * If this function fails then VF driver won't be able to operate correctly.
+ * If `Memory Based Interrupts`_ are not used this function will return 0.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_memirq_init(struct xe_memirq *memirq)
+{
+ struct xe_device *xe = memirq_to_xe(memirq);
+ int err;
+
+ memirq_assert(memirq, IS_SRIOV_VF(xe));
+
+ if (!xe_device_has_memirq(xe))
+ return 0;
+
+ err = memirq_alloc_pages(memirq);
+ if (unlikely(err))
+ return err;
+
+ /* we need to start with all irqs enabled */
+ memirq_set_enable(memirq, true);
+
+ return 0;
+}
+
+/**
+ * xe_memirq_source_ptr - Get GGTT's offset of the `Interrupt Source Report Page`_.
+ * @memirq: the &xe_memirq to query
+ *
+ * Shall be called only on VF driver when `Memory Based Interrupts`_ are used
+ * and xe_memirq_init() didn't fail.
+ *
+ * Return: GGTT's offset of the `Interrupt Source Report Page`_.
+ */
+u32 xe_memirq_source_ptr(struct xe_memirq *memirq)
+{
+ memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq)));
+ memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq)));
+ memirq_assert(memirq, memirq->bo);
+
+ return xe_bo_ggtt_addr(memirq->bo) + XE_MEMIRQ_SOURCE_OFFSET;
+}
+
+/**
+ * xe_memirq_status_ptr - Get GGTT's offset of the `Interrupt Status Report Page`_.
+ * @memirq: the &xe_memirq to query
+ *
+ * Shall be called only on VF driver when `Memory Based Interrupts`_ are used
+ * and xe_memirq_init() didn't fail.
+ *
+ * Return: GGTT's offset of the `Interrupt Status Report Page`_.
+ */
+u32 xe_memirq_status_ptr(struct xe_memirq *memirq)
+{
+ memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq)));
+ memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq)));
+ memirq_assert(memirq, memirq->bo);
+
+ return xe_bo_ggtt_addr(memirq->bo) + XE_MEMIRQ_STATUS_OFFSET;
+}
+
+/**
+ * xe_memirq_enable_ptr - Get GGTT's offset of the Interrupt Enable Mask.
+ * @memirq: the &xe_memirq to query
+ *
+ * Shall be called only on VF driver when `Memory Based Interrupts`_ are used
+ * and xe_memirq_init() didn't fail.
+ *
+ * Return: GGTT's offset of the Interrupt Enable Mask.
+ */
+u32 xe_memirq_enable_ptr(struct xe_memirq *memirq)
+{
+ memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq)));
+ memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq)));
+ memirq_assert(memirq, memirq->bo);
+
+ return xe_bo_ggtt_addr(memirq->bo) + XE_MEMIRQ_ENABLE_OFFSET;
+}
+
+/**
+ * xe_memirq_init_guc - Prepare GuC for `Memory Based Interrupts`_.
+ * @memirq: the &xe_memirq
+ * @guc: the &xe_guc to setup
+ *
+ * Register `Interrupt Source Report Page`_ and `Interrupt Status Report Page`_
+ * to be used by the GuC when `Memory Based Interrupts`_ are required.
+ *
+ * Shall be called only on VF driver when `Memory Based Interrupts`_ are used
+ * and xe_memirq_init() didn't fail.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_memirq_init_guc(struct xe_memirq *memirq, struct xe_guc *guc)
+{
+ bool is_media = xe_gt_is_media_type(guc_to_gt(guc));
+ u32 offset = is_media ? ilog2(INTR_MGUC) : ilog2(INTR_GUC);
+ u32 source, status;
+ int err;
+
+ memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq)));
+ memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq)));
+ memirq_assert(memirq, memirq->bo);
+
+ source = xe_memirq_source_ptr(memirq) + offset;
+ status = xe_memirq_status_ptr(memirq) + offset * SZ_16;
+
+ err = xe_guc_self_cfg64(guc, GUC_KLV_SELF_CFG_MEMIRQ_SOURCE_ADDR_KEY,
+ source);
+ if (unlikely(err))
+ goto failed;
+
+ err = xe_guc_self_cfg64(guc, GUC_KLV_SELF_CFG_MEMIRQ_STATUS_ADDR_KEY,
+ status);
+ if (unlikely(err))
+ goto failed;
+
+ return 0;
+
+failed:
+ xe_sriov_err(memirq_to_xe(memirq),
+ "Failed to setup report pages in %s (%pe)\n",
+ guc_name(guc), ERR_PTR(err));
+ return err;
+}
+
+/**
+ * xe_memirq_reset - Disable processing of `Memory Based Interrupts`_.
+ * @memirq: struct xe_memirq
+ *
+ * This is part of the driver IRQ setup flow.
+ *
+ * This function shall only be used by the VF driver on platforms that use
+ * `Memory Based Interrupts`_.
+ */
+void xe_memirq_reset(struct xe_memirq *memirq)
+{
+ memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq)));
+ memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq)));
+
+ if (memirq->bo)
+ memirq_set_enable(memirq, false);
+}
+
+/**
+ * xe_memirq_postinstall - Enable processing of `Memory Based Interrupts`_.
+ * @memirq: the &xe_memirq
+ *
+ * This is part of the driver IRQ setup flow.
+ *
+ * This function shall only be used by the VF driver on platforms that use
+ * `Memory Based Interrupts`_.
+ */
+void xe_memirq_postinstall(struct xe_memirq *memirq)
+{
+ memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq)));
+ memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq)));
+
+ if (memirq->bo)
+ memirq_set_enable(memirq, true);
+}
+
+static bool memirq_received(struct xe_memirq *memirq, struct iosys_map *vector,
+ u16 offset, const char *name)
+{
+ u8 value;
+
+ value = iosys_map_rd(vector, offset, u8);
+ if (value) {
+ if (value != 0xff)
+ xe_sriov_err_ratelimited(memirq_to_xe(memirq),
+ "Unexpected memirq value %#x from %s at %u\n",
+ value, name, offset);
+ iosys_map_wr(vector, offset, u8, 0x00);
+ }
+
+ return value;
+}
+
+static void memirq_dispatch_engine(struct xe_memirq *memirq, struct iosys_map *status,
+ struct xe_hw_engine *hwe)
+{
+ memirq_debug(memirq, "STATUS %s %*ph\n", hwe->name, 16, status->vaddr);
+
+ if (memirq_received(memirq, status, ilog2(GT_RENDER_USER_INTERRUPT), hwe->name))
+ xe_hw_engine_handle_irq(hwe, GT_RENDER_USER_INTERRUPT);
+}
+
+static void memirq_dispatch_guc(struct xe_memirq *memirq, struct iosys_map *status,
+ struct xe_guc *guc)
+{
+ const char *name = guc_name(guc);
+
+ memirq_debug(memirq, "STATUS %s %*ph\n", name, 16, status->vaddr);
+
+ if (memirq_received(memirq, status, ilog2(GUC_INTR_GUC2HOST), name))
+ xe_guc_irq_handler(guc, GUC_INTR_GUC2HOST);
+}
+
+/**
+ * xe_memirq_handler - The `Memory Based Interrupts`_ Handler.
+ * @memirq: the &xe_memirq
+ *
+ * This function reads and dispatches `Memory Based Interrupts`.
+ */
+void xe_memirq_handler(struct xe_memirq *memirq)
+{
+ struct xe_device *xe = memirq_to_xe(memirq);
+ struct xe_tile *tile = memirq_to_tile(memirq);
+ struct xe_hw_engine *hwe;
+ enum xe_hw_engine_id id;
+ struct iosys_map map;
+ unsigned int gtid;
+ struct xe_gt *gt;
+
+ if (!memirq->bo)
+ return;
+
+ memirq_assert(memirq, !memirq->source.is_iomem);
+ memirq_debug(memirq, "SOURCE %*ph\n", 32, memirq->source.vaddr);
+ memirq_debug(memirq, "SOURCE %*ph\n", 32, memirq->source.vaddr + 32);
+
+ for_each_gt(gt, xe, gtid) {
+ if (gt->tile != tile)
+ continue;
+
+ for_each_hw_engine(hwe, gt, id) {
+ if (memirq_received(memirq, &memirq->source, hwe->irq_offset, "SRC")) {
+ map = IOSYS_MAP_INIT_OFFSET(&memirq->status,
+ hwe->irq_offset * SZ_16);
+ memirq_dispatch_engine(memirq, &map, hwe);
+ }
+ }
+ }
+
+ /* GuC and media GuC (if present) must be checked separately */
+
+ if (memirq_received(memirq, &memirq->source, ilog2(INTR_GUC), "SRC")) {
+ map = IOSYS_MAP_INIT_OFFSET(&memirq->status, ilog2(INTR_GUC) * SZ_16);
+ memirq_dispatch_guc(memirq, &map, &tile->primary_gt->uc.guc);
+ }
+
+ if (!tile->media_gt)
+ return;
+
+ if (memirq_received(memirq, &memirq->source, ilog2(INTR_MGUC), "SRC")) {
+ map = IOSYS_MAP_INIT_OFFSET(&memirq->status, ilog2(INTR_MGUC) * SZ_16);
+ memirq_dispatch_guc(memirq, &map, &tile->media_gt->uc.guc);
+ }
+}
diff --git a/drivers/gpu/drm/xe/xe_memirq.h b/drivers/gpu/drm/xe/xe_memirq.h
new file mode 100644
index 0000000000..2d40d03c30
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_memirq.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_MEMIRQ_H_
+#define _XE_MEMIRQ_H_
+
+#include <linux/types.h>
+
+struct xe_guc;
+struct xe_memirq;
+
+int xe_memirq_init(struct xe_memirq *memirq);
+
+u32 xe_memirq_source_ptr(struct xe_memirq *memirq);
+u32 xe_memirq_status_ptr(struct xe_memirq *memirq);
+u32 xe_memirq_enable_ptr(struct xe_memirq *memirq);
+
+void xe_memirq_reset(struct xe_memirq *memirq);
+void xe_memirq_postinstall(struct xe_memirq *memirq);
+void xe_memirq_handler(struct xe_memirq *memirq);
+
+int xe_memirq_init_guc(struct xe_memirq *memirq, struct xe_guc *guc);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_memirq_types.h b/drivers/gpu/drm/xe/xe_memirq_types.h
new file mode 100644
index 0000000000..625b6b8736
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_memirq_types.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_MEMIRQ_TYPES_H_
+#define _XE_MEMIRQ_TYPES_H_
+
+#include <linux/iosys-map.h>
+
+struct xe_bo;
+
+/* ISR */
+#define XE_MEMIRQ_STATUS_OFFSET 0x0
+/* IIR */
+#define XE_MEMIRQ_SOURCE_OFFSET 0x400
+/* IMR */
+#define XE_MEMIRQ_ENABLE_OFFSET 0x440
+
+/**
+ * struct xe_memirq - Data used by the `Memory Based Interrupts`_.
+ *
+ * @bo: buffer object with `Memory Based Interrupts Page Layout`_.
+ * @source: iosys pointer to `Interrupt Source Report Page`_.
+ * @status: iosys pointer to `Interrupt Status Report Page`_.
+ * @mask: iosys pointer to Interrupt Enable Mask.
+ * @enabled: internal flag used to control processing of the interrupts.
+ */
+struct xe_memirq {
+ struct xe_bo *bo;
+ struct iosys_map source;
+ struct iosys_map status;
+ struct iosys_map mask;
+ bool enabled;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 7a6ad3469d..aca519f5b8 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -12,7 +12,8 @@
#include <drm/ttm/ttm_tt.h>
#include <drm/xe_drm.h>
-#include "generated/xe_wa_oob.h"
+#include <generated/xe_wa_oob.h>
+
#include "instructions/xe_mi_commands.h"
#include "regs/xe_gpu_commands.h"
#include "tests/xe_test.h"
@@ -32,7 +33,6 @@
#include "xe_sync.h"
#include "xe_trace.h"
#include "xe_vm.h"
-#include "xe_wa.h"
/**
* struct xe_migrate - migrate context.
@@ -71,6 +71,16 @@ struct xe_migrate {
#define NUM_KERNEL_PDE 17
#define NUM_PT_SLOTS 32
#define LEVEL0_PAGE_TABLE_ENCODE_SIZE SZ_2M
+#define MAX_NUM_PTE 512
+
+/*
+ * Although MI_STORE_DATA_IMM's "length" field is 10-bits, 0x3FE is the largest
+ * legal value accepted. Since that instruction field is always stored in
+ * (val-2) format, this translates to 0x400 dwords for the true maximum length
+ * of the instruction. Subtracting the instruction header (1 dword) and
+ * address (2 dwords), that leaves 0x3FD dwords (0x1FE qwords) for PTE values.
+ */
+#define MAX_PTE_PER_SDI 0x1FE
/**
* xe_tile_migrate_engine() - Get this tile's migrate engine.
@@ -288,10 +298,6 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
}
/*
- * Due to workaround 16017236439, odd instance hardware copy engines are
- * faster than even instance ones.
- * This function returns the mask involving all fast copy engines and the
- * reserved copy engine to be used as logical mask for migrate engine.
* Including the reserved copy engine is required to avoid deadlocks due to
* migrate jobs servicing the faults gets stuck behind the job that faulted.
*/
@@ -305,8 +311,7 @@ static u32 xe_migrate_usm_logical_mask(struct xe_gt *gt)
if (hwe->class != XE_ENGINE_CLASS_COPY)
continue;
- if (!XE_WA(gt, 16017236439) ||
- xe_gt_is_usm_hwe(gt, hwe) || hwe->instance & 1)
+ if (xe_gt_is_usm_hwe(gt, hwe))
logical_mask |= BIT(hwe->logical_instance);
}
@@ -357,10 +362,14 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
if (!hwe || !logical_mask)
return ERR_PTR(-EINVAL);
+ /*
+ * XXX: Currently only reserving 1 (likely slow) BCS instance on
+ * PVC, may want to revisit if performance is needed.
+ */
m->q = xe_exec_queue_create(xe, vm, logical_mask, 1, hwe,
EXEC_QUEUE_FLAG_KERNEL |
EXEC_QUEUE_FLAG_PERMANENT |
- EXEC_QUEUE_FLAG_HIGH_PRIORITY);
+ EXEC_QUEUE_FLAG_HIGH_PRIORITY, 0);
} else {
m->q = xe_exec_queue_create_class(xe, primary_gt, vm,
XE_ENGINE_CLASS_COPY,
@@ -451,13 +460,13 @@ static u32 pte_update_size(struct xe_migrate *m,
} else {
/* Clip L0 to available size */
u64 size = min(*L0, (u64)avail_pts * SZ_2M);
- u64 num_4k_pages = DIV_ROUND_UP(size, XE_PAGE_SIZE);
+ u32 num_4k_pages = (size + XE_PAGE_SIZE - 1) >> XE_PTE_SHIFT;
*L0 = size;
*L0_ofs = xe_migrate_vm_addr(pt_ofs, 0);
/* MI_STORE_DATA_IMM */
- cmds += 3 * DIV_ROUND_UP(num_4k_pages, 0x1ff);
+ cmds += 3 * DIV_ROUND_UP(num_4k_pages, MAX_PTE_PER_SDI);
/* PDE qwords */
cmds += num_4k_pages * 2;
@@ -492,7 +501,7 @@ static void emit_pte(struct xe_migrate *m,
ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE);
while (ptes) {
- u32 chunk = min(0x1ffU, ptes);
+ u32 chunk = min(MAX_PTE_PER_SDI, ptes);
bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk);
bb->cs[bb->len++] = ofs;
@@ -1111,7 +1120,7 @@ static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs,
* This shouldn't be possible in practice.. might change when 16K
* pages are used. Hence the assert.
*/
- xe_tile_assert(tile, update->qwords <= 0x1ff);
+ xe_tile_assert(tile, update->qwords < MAX_NUM_PTE);
if (!ppgtt_ofs)
ppgtt_ofs = xe_migrate_vram_ofs(tile_to_xe(tile),
xe_bo_addr(update->pt_bo, 0,
@@ -1120,7 +1129,7 @@ static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs,
do {
u64 addr = ppgtt_ofs + ofs * 8;
- chunk = min(update->qwords, 0x1ffU);
+ chunk = min(size, MAX_PTE_PER_SDI);
/* Ensure populatefn can do memset64 by aligning bb->cs */
if (!(bb->len & 1))
@@ -1299,7 +1308,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
batch_size = 6 + num_updates * 2;
for (i = 0; i < num_updates; i++) {
- u32 num_cmds = DIV_ROUND_UP(updates[i].qwords, 0x1ff);
+ u32 num_cmds = DIV_ROUND_UP(updates[i].qwords, MAX_PTE_PER_SDI);
/* align noop + MI_STORE_DATA_IMM cmd prefix */
batch_size += 4 * num_cmds + updates[i].qwords * 2;
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 02f7808f28..7ba2477452 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -20,6 +20,7 @@
#include "xe_gt_mcr.h"
#include "xe_macros.h"
#include "xe_module.h"
+#include "xe_sriov.h"
#include "xe_tile.h"
#define XEHP_MTCFG_ADDR XE_REG(0x101800)
@@ -363,13 +364,19 @@ static int xe_verify_lmem_ready(struct xe_device *xe)
{
struct xe_gt *gt = xe_root_mmio_gt(xe);
+ if (!IS_DGFX(xe))
+ return 0;
+
+ if (IS_SRIOV_VF(xe))
+ return 0;
+
/*
* The boot firmware initializes local memory and assesses its health.
* If memory training fails, the punit will have been instructed to
* keep the GT powered down; we won't be able to communicate with it
* and we should not continue with driver initialization.
*/
- if (IS_DGFX(xe) && !(xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT)) {
+ if (!(xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT)) {
drm_err(&xe->drm, "VRAM not initialized by firmware\n");
return -ENODEV;
}
diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index ef79552e4f..609d997b3e 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -13,6 +13,7 @@
#include "xe_gt_mcr.h"
#include "xe_mmio.h"
#include "xe_platform_types.h"
+#include "xe_sriov.h"
#include "xe_step_types.h"
#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
@@ -290,18 +291,6 @@ static const struct xe_mocs_entry dg2_mocs_desc[] = {
MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
};
-static const struct xe_mocs_entry dg2_mocs_desc_g10_ax[] = {
- /* Wa_14011441408: Set Go to Memory for MOCS#0 */
- MOCS_ENTRY(0, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
- /* UC - Coherent; GO:Memory */
- MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
- /* UC - Non-Coherent; GO:Memory */
- MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)),
-
- /* WB - LC */
- MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
-};
-
static const struct xe_mocs_entry pvc_mocs_desc[] = {
/* Error */
MOCS_ENTRY(0, 0, L3_3_WB),
@@ -409,15 +398,8 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
info->unused_entries_index = 1;
break;
case XE_DG2:
- if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G10 &&
- xe->info.step.graphics >= STEP_A0 &&
- xe->info.step.graphics <= STEP_B0) {
- info->size = ARRAY_SIZE(dg2_mocs_desc_g10_ax);
- info->table = dg2_mocs_desc_g10_ax;
- } else {
- info->size = ARRAY_SIZE(dg2_mocs_desc);
- info->table = dg2_mocs_desc;
- }
+ info->size = ARRAY_SIZE(dg2_mocs_desc);
+ info->table = dg2_mocs_desc;
info->uc_index = 1;
info->n_entries = XELP_NUM_MOCS_ENTRIES;
info->unused_entries_index = 3;
@@ -558,6 +540,9 @@ void xe_mocs_init(struct xe_gt *gt)
struct xe_mocs_info table;
unsigned int flags;
+ if (IS_SRIOV_VF(gt_to_xe(gt)))
+ return;
+
/*
* MOCS settings are split between "GLOB_MOCS" and/or "LNCFCMOCS"
* registers depending on platform.
diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
index 1ff6bc79e7..e148934d55 100644
--- a/drivers/gpu/drm/xe/xe_pat.c
+++ b/drivers/gpu/drm/xe/xe_pat.c
@@ -13,6 +13,7 @@
#include "xe_gt.h"
#include "xe_gt_mcr.h"
#include "xe_mmio.h"
+#include "xe_sriov.h"
#define _PAT_ATS 0x47fc
#define _PAT_INDEX(index) _PICK_EVEN_2RANGES(index, 8, \
@@ -433,6 +434,10 @@ void xe_pat_init_early(struct xe_device *xe)
drm_err(&xe->drm, "Missing PAT table for platform with graphics version %d.%02d!\n",
GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100);
}
+
+ /* VFs can't program nor dump PAT settings */
+ if (IS_SRIOV_VF(xe))
+ xe->pat.ops = NULL;
}
void xe_pat_init(struct xe_gt *gt)
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index dcc5ded155..557f2d88a8 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -15,9 +15,9 @@
#include <drm/drm_drv.h>
#include <drm/xe_pciids.h>
+#include "display/xe_display.h"
#include "regs/xe_gt_regs.h"
#include "xe_device.h"
-#include "xe_display.h"
#include "xe_drv.h"
#include "xe_gt.h"
#include "xe_macros.h"
@@ -165,7 +165,7 @@ static const struct xe_graphics_desc graphics_xelpg = {
.has_asid = 1, \
.has_flat_ccs = 1, \
.has_range_tlb_invalidation = 1, \
- .has_usm = 0 /* FIXME: implementation missing */, \
+ .has_usm = 1, \
.va_bits = 48, \
.vm_max_level = 4, \
.hw_engine_mask = \
@@ -340,14 +340,14 @@ static const struct xe_device_desc lnl_desc = {
__diag_pop();
/* Map of GMD_ID values to graphics IP */
-static struct gmdid_map graphics_ip_map[] = {
+static const struct gmdid_map graphics_ip_map[] = {
{ 1270, &graphics_xelpg },
{ 1271, &graphics_xelpg },
{ 2004, &graphics_xe2 },
};
/* Map of GMD_ID values to media IP */
-static struct gmdid_map media_ip_map[] = {
+static const struct gmdid_map media_ip_map[] = {
{ 1300, &media_xelpmp },
{ 2000, &media_xe2 },
};
@@ -774,6 +774,8 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
str_yes_no(xe_device_has_sriov(xe)),
xe_sriov_mode_to_string(xe_device_sriov_mode(xe)));
+ xe_pm_init_early(xe);
+
err = xe_device_probe(xe);
if (err)
return err;
diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c
index b324dc2a5d..81f4ae2ea0 100644
--- a/drivers/gpu/drm/xe/xe_pcode.c
+++ b/drivers/gpu/drm/xe/xe_pcode.c
@@ -10,6 +10,7 @@
#include <drm/drm_managed.h>
+#include "xe_device.h"
#include "xe_gt.h"
#include "xe_mmio.h"
#include "xe_pcode_api.h"
@@ -43,8 +44,6 @@ static int pcode_mailbox_status(struct xe_gt *gt)
[PCODE_ERROR_MASK] = {-EPROTO, "Unknown"},
};
- lockdep_assert_held(&gt->pcode.lock);
-
err = xe_mmio_read32(gt, PCODE_MAILBOX) & PCODE_ERROR_MASK;
if (err) {
drm_err(&gt_to_xe(gt)->drm, "PCODE Mailbox failed: %d %s", err,
@@ -55,17 +54,15 @@ static int pcode_mailbox_status(struct xe_gt *gt)
return 0;
}
-static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1,
- unsigned int timeout_ms, bool return_data,
- bool atomic)
+static int __pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1,
+ unsigned int timeout_ms, bool return_data,
+ bool atomic)
{
int err;
if (gt_to_xe(gt)->info.skip_pcode)
return 0;
- lockdep_assert_held(&gt->pcode.lock);
-
if ((xe_mmio_read32(gt, PCODE_MAILBOX) & PCODE_READY) != 0)
return -EAGAIN;
@@ -87,6 +84,18 @@ static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1,
return pcode_mailbox_status(gt);
}
+static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1,
+ unsigned int timeout_ms, bool return_data,
+ bool atomic)
+{
+ if (gt_to_xe(gt)->info.skip_pcode)
+ return 0;
+
+ lockdep_assert_held(&gt->pcode.lock);
+
+ return __pcode_mailbox_rw(gt, mbox, data0, data1, timeout_ms, return_data, atomic);
+}
+
int xe_pcode_write_timeout(struct xe_gt *gt, u32 mbox, u32 data, int timeout)
{
int err;
@@ -109,15 +118,19 @@ int xe_pcode_read(struct xe_gt *gt, u32 mbox, u32 *val, u32 *val1)
return err;
}
-static int xe_pcode_try_request(struct xe_gt *gt, u32 mbox,
- u32 request, u32 reply_mask, u32 reply,
- u32 *status, bool atomic, int timeout_us)
+static int pcode_try_request(struct xe_gt *gt, u32 mbox,
+ u32 request, u32 reply_mask, u32 reply,
+ u32 *status, bool atomic, int timeout_us, bool locked)
{
int slept, wait = 10;
for (slept = 0; slept < timeout_us; slept += wait) {
- *status = pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true,
- atomic);
+ if (locked)
+ *status = pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true,
+ atomic);
+ else
+ *status = __pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true,
+ atomic);
if ((*status == 0) && ((request & reply_mask) == reply))
return 0;
@@ -158,8 +171,8 @@ int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request,
mutex_lock(&gt->pcode.lock);
- ret = xe_pcode_try_request(gt, mbox, request, reply_mask, reply, &status,
- false, timeout_base_ms * 1000);
+ ret = pcode_try_request(gt, mbox, request, reply_mask, reply, &status,
+ false, timeout_base_ms * 1000, true);
if (!ret)
goto out;
@@ -177,8 +190,8 @@ int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request,
"PCODE timeout, retrying with preemption disabled\n");
drm_WARN_ON_ONCE(&gt_to_xe(gt)->drm, timeout_base_ms > 1);
preempt_disable();
- ret = xe_pcode_try_request(gt, mbox, request, reply_mask, reply, &status,
- true, timeout_base_ms * 1000);
+ ret = pcode_try_request(gt, mbox, request, reply_mask, reply, &status,
+ true, 50 * 1000, true);
preempt_enable();
out:
@@ -238,59 +251,71 @@ unlock:
}
/**
- * xe_pcode_init - Ensure PCODE is initialized
- * @gt: gt instance
+ * xe_pcode_ready - Ensure PCODE is initialized
+ * @xe: xe instance
+ * @locked: true if lock held, false otherwise
*
- * This function ensures that PCODE is properly initialized. To be called during
- * probe and resume paths.
+ * PCODE init mailbox is polled only on root gt of root tile
+ * as the root tile provides the initialization is complete only
+ * after all the tiles have completed the initialization.
+ * Called only on early probe without locks and with locks in
+ * resume path.
*
- * It returns 0 on success, and -error number on failure.
+ * Returns 0 on success, and -error number on failure.
*/
-int xe_pcode_init(struct xe_gt *gt)
+int xe_pcode_ready(struct xe_device *xe, bool locked)
{
u32 status, request = DGFX_GET_INIT_STATUS;
+ struct xe_gt *gt = xe_root_mmio_gt(xe);
int timeout_us = 180000000; /* 3 min */
int ret;
- if (gt_to_xe(gt)->info.skip_pcode)
+ if (xe->info.skip_pcode)
return 0;
- if (!IS_DGFX(gt_to_xe(gt)))
+ if (!IS_DGFX(xe))
return 0;
- mutex_lock(&gt->pcode.lock);
- ret = xe_pcode_try_request(gt, DGFX_PCODE_STATUS, request,
- DGFX_INIT_STATUS_COMPLETE,
- DGFX_INIT_STATUS_COMPLETE,
- &status, false, timeout_us);
- mutex_unlock(&gt->pcode.lock);
+ if (locked)
+ mutex_lock(&gt->pcode.lock);
+
+ ret = pcode_try_request(gt, DGFX_PCODE_STATUS, request,
+ DGFX_INIT_STATUS_COMPLETE,
+ DGFX_INIT_STATUS_COMPLETE,
+ &status, false, timeout_us, locked);
+
+ if (locked)
+ mutex_unlock(&gt->pcode.lock);
if (ret)
- drm_err(&gt_to_xe(gt)->drm,
+ drm_err(&xe->drm,
"PCODE initialization timedout after: 3 min\n");
return ret;
}
/**
- * xe_pcode_probe - Prepare xe_pcode and also ensure PCODE is initialized.
+ * xe_pcode_init: initialize components of PCODE
* @gt: gt instance
*
- * This function initializes the xe_pcode component, and when needed, it ensures
- * that PCODE has properly performed its initialization and it is really ready
- * to go. To be called once only during probe.
- *
- * It returns 0 on success, and -error number on failure.
+ * This function initializes the xe_pcode component.
+ * To be called once only during probe.
*/
-int xe_pcode_probe(struct xe_gt *gt)
+void xe_pcode_init(struct xe_gt *gt)
{
drmm_mutex_init(&gt_to_xe(gt)->drm, &gt->pcode.lock);
+}
- if (gt_to_xe(gt)->info.skip_pcode)
- return 0;
-
- if (!IS_DGFX(gt_to_xe(gt)))
- return 0;
-
- return xe_pcode_init(gt);
+/**
+ * xe_pcode_probe_early: initializes PCODE
+ * @xe: xe instance
+ *
+ * This function checks the initialization status of PCODE
+ * To be called once only during early probe without locks.
+ *
+ * Returns 0 on success, error code otherwise
+ */
+int xe_pcode_probe_early(struct xe_device *xe)
+{
+ return xe_pcode_ready(xe, false);
}
diff --git a/drivers/gpu/drm/xe/xe_pcode.h b/drivers/gpu/drm/xe/xe_pcode.h
index 08cb1d047c..3f54c6d2a5 100644
--- a/drivers/gpu/drm/xe/xe_pcode.h
+++ b/drivers/gpu/drm/xe/xe_pcode.h
@@ -8,9 +8,11 @@
#include <linux/types.h>
struct xe_gt;
+struct xe_device;
-int xe_pcode_probe(struct xe_gt *gt);
-int xe_pcode_init(struct xe_gt *gt);
+void xe_pcode_init(struct xe_gt *gt);
+int xe_pcode_probe_early(struct xe_device *xe);
+int xe_pcode_ready(struct xe_device *xe, bool locked);
int xe_pcode_init_min_freq_table(struct xe_gt *gt, u32 min_gt_freq,
u32 max_gt_freq);
int xe_pcode_read(struct xe_gt *gt, u32 mbox, u32 *val, u32 *val1);
diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h
index 5935cfe302..f153ce96f6 100644
--- a/drivers/gpu/drm/xe/xe_pcode_api.h
+++ b/drivers/gpu/drm/xe/xe_pcode_api.h
@@ -42,6 +42,13 @@
#define POWER_SETUP_I1_SHIFT 6 /* 10.6 fixed point format */
#define POWER_SETUP_I1_DATA_MASK REG_GENMASK(15, 0)
+#define PCODE_FREQUENCY_CONFIG 0x6e
+/* Frequency Config Sub Commands (param1) */
+#define PCODE_MBOX_FC_SC_READ_FUSED_P0 0x0
+#define PCODE_MBOX_FC_SC_READ_FUSED_PN 0x1
+/* Domain IDs (param2) */
+#define PCODE_MBOX_DOMAIN_HBM 0x2
+
struct pcode_err_decode {
int errno;
const char *str;
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index b429c2876a..944cf4d760 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -10,11 +10,11 @@
#include <drm/drm_managed.h>
#include <drm/ttm/ttm_placement.h>
+#include "display/xe_display.h"
#include "xe_bo.h"
#include "xe_bo_evict.h"
#include "xe_device.h"
#include "xe_device_sysfs.h"
-#include "xe_display.h"
#include "xe_ggtt.h"
#include "xe_gt.h"
#include "xe_guc.h"
@@ -54,13 +54,15 @@ int xe_pm_suspend(struct xe_device *xe)
u8 id;
int err;
+ drm_dbg(&xe->drm, "Suspending device\n");
+
for_each_gt(gt, xe, id)
xe_gt_suspend_prepare(gt);
/* FIXME: Super racey... */
err = xe_bo_evict_all(xe);
if (err)
- return err;
+ goto err;
xe_display_pm_suspend(xe);
@@ -68,7 +70,7 @@ int xe_pm_suspend(struct xe_device *xe)
err = xe_gt_suspend(gt);
if (err) {
xe_display_pm_resume(xe);
- return err;
+ goto err;
}
}
@@ -76,7 +78,11 @@ int xe_pm_suspend(struct xe_device *xe)
xe_display_pm_suspend_late(xe);
+ drm_dbg(&xe->drm, "Device suspended\n");
return 0;
+err:
+ drm_dbg(&xe->drm, "Device suspend failed %d\n", err);
+ return err;
}
/**
@@ -92,14 +98,14 @@ int xe_pm_resume(struct xe_device *xe)
u8 id;
int err;
+ drm_dbg(&xe->drm, "Resuming device\n");
+
for_each_tile(tile, xe, id)
xe_wa_apply_tile_workarounds(tile);
- for_each_gt(gt, xe, id) {
- err = xe_pcode_init(gt);
- if (err)
- return err;
- }
+ err = xe_pcode_ready(xe, true);
+ if (err)
+ return err;
xe_display_pm_resume_early(xe);
@@ -109,7 +115,7 @@ int xe_pm_resume(struct xe_device *xe)
*/
err = xe_bo_restore_kernel(xe);
if (err)
- return err;
+ goto err;
xe_irq_resume(xe);
@@ -120,22 +126,35 @@ int xe_pm_resume(struct xe_device *xe)
err = xe_bo_restore_user(xe);
if (err)
- return err;
+ goto err;
+ drm_dbg(&xe->drm, "Device resumed\n");
return 0;
+err:
+ drm_dbg(&xe->drm, "Device resume failed %d\n", err);
+ return err;
}
-static bool xe_pm_pci_d3cold_capable(struct pci_dev *pdev)
+static bool xe_pm_pci_d3cold_capable(struct xe_device *xe)
{
+ struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
struct pci_dev *root_pdev;
root_pdev = pcie_find_root_port(pdev);
if (!root_pdev)
return false;
- /* D3Cold requires PME capability and _PR3 power resource */
- if (!pci_pme_capable(root_pdev, PCI_D3cold) || !pci_pr3_present(root_pdev))
+ /* D3Cold requires PME capability */
+ if (!pci_pme_capable(root_pdev, PCI_D3cold)) {
+ drm_dbg(&xe->drm, "d3cold: PME# not supported\n");
+ return false;
+ }
+
+ /* D3Cold requires _PR3 power resource */
+ if (!pci_pr3_present(root_pdev)) {
+ drm_dbg(&xe->drm, "d3cold: ACPI _PR3 not present\n");
return false;
+ }
return true;
}
@@ -163,17 +182,21 @@ static void xe_pm_runtime_init(struct xe_device *xe)
pm_runtime_put(dev);
}
-void xe_pm_init(struct xe_device *xe)
+void xe_pm_init_early(struct xe_device *xe)
{
- struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+ INIT_LIST_HEAD(&xe->mem_access.vram_userfault.list);
+ drmm_mutex_init(&xe->drm, &xe->mem_access.vram_userfault.lock);
+}
+void xe_pm_init(struct xe_device *xe)
+{
/* For now suspend/resume is only allowed with GuC */
if (!xe_device_uc_enabled(xe))
return;
drmm_mutex_init(&xe->drm, &xe->d3cold.lock);
- xe->d3cold.capable = xe_pm_pci_d3cold_capable(pdev);
+ xe->d3cold.capable = xe_pm_pci_d3cold_capable(xe);
if (xe->d3cold.capable) {
xe_device_sysfs_init(xe);
@@ -214,6 +237,7 @@ struct task_struct *xe_pm_read_callback_task(struct xe_device *xe)
int xe_pm_runtime_suspend(struct xe_device *xe)
{
+ struct xe_bo *bo, *on;
struct xe_gt *gt;
u8 id;
int err = 0;
@@ -247,6 +271,16 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
*/
lock_map_acquire(&xe_device_mem_access_lockdep_map);
+ /*
+ * Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify
+ * also checks and delets bo entry from user fault list.
+ */
+ mutex_lock(&xe->mem_access.vram_userfault.lock);
+ list_for_each_entry_safe(bo, on,
+ &xe->mem_access.vram_userfault.list, vram_userfault_link)
+ xe_bo_runtime_pm_release_mmap_offset(bo);
+ mutex_unlock(&xe->mem_access.vram_userfault.lock);
+
if (xe->d3cold.allowed) {
err = xe_bo_evict_all(xe);
if (err)
@@ -286,11 +320,9 @@ int xe_pm_runtime_resume(struct xe_device *xe)
xe->d3cold.power_lost = xe_guc_in_reset(&gt->uc.guc);
if (xe->d3cold.allowed && xe->d3cold.power_lost) {
- for_each_gt(gt, xe, id) {
- err = xe_pcode_init(gt);
- if (err)
- goto out;
- }
+ err = xe_pcode_ready(xe, true);
+ if (err)
+ goto out;
/*
* This only restores pinned memory which is the memory
@@ -330,7 +362,7 @@ int xe_pm_runtime_put(struct xe_device *xe)
int xe_pm_runtime_get_if_active(struct xe_device *xe)
{
- return pm_runtime_get_if_active(xe->drm.dev, true);
+ return pm_runtime_get_if_active(xe->drm.dev);
}
void xe_pm_assert_unbounded_bridge(struct xe_device *xe)
diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h
index 6b9031f7af..64a97c6726 100644
--- a/drivers/gpu/drm/xe/xe_pm.h
+++ b/drivers/gpu/drm/xe/xe_pm.h
@@ -20,6 +20,7 @@ struct xe_device;
int xe_pm_suspend(struct xe_device *xe);
int xe_pm_resume(struct xe_device *xe);
+void xe_pm_init_early(struct xe_device *xe);
void xe_pm_init(struct xe_device *xe);
void xe_pm_runtime_fini(struct xe_device *xe);
int xe_pm_runtime_suspend(struct xe_device *xe);
diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c
index 7bce2a3326..7d50c6e89d 100644
--- a/drivers/gpu/drm/xe/xe_preempt_fence.c
+++ b/drivers/gpu/drm/xe/xe_preempt_fence.c
@@ -49,7 +49,7 @@ static bool preempt_fence_enable_signaling(struct dma_fence *fence)
struct xe_exec_queue *q = pfence->q;
pfence->error = q->ops->suspend(q);
- queue_work(system_unbound_wq, &pfence->preempt_work);
+ queue_work(q->vm->xe->preempt_fence_wq, &pfence->preempt_work);
return true;
}
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index c1a833b1cb..4efc8c1a3d 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -877,8 +877,7 @@ static void xe_pt_commit_bind(struct xe_vma *vma,
static int
xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma,
- struct xe_vm_pgtable_update *entries, u32 *num_entries,
- bool rebind)
+ struct xe_vm_pgtable_update *entries, u32 *num_entries)
{
int err;
@@ -1136,8 +1135,7 @@ static int invalidation_fence_init(struct xe_gt *gt,
spin_lock_irq(&gt->tlb_invalidation.lock);
dma_fence_init(&ifence->base.base, &invalidation_fence_ops,
&gt->tlb_invalidation.lock,
- gt->tlb_invalidation.fence_context,
- ++gt->tlb_invalidation.fence_seqno);
+ dma_fence_context_alloc(1), 1);
spin_unlock_irq(&gt->tlb_invalidation.lock);
INIT_LIST_HEAD(&ifence->base.link);
@@ -1234,9 +1232,16 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
"Preparing bind, with range [%llx...%llx) engine %p.\n",
xe_vma_start(vma), xe_vma_end(vma), q);
- err = xe_pt_prepare_bind(tile, vma, entries, &num_entries, rebind);
+ err = xe_pt_prepare_bind(tile, vma, entries, &num_entries);
if (err)
goto err;
+
+ err = dma_resv_reserve_fences(xe_vm_resv(vm), 1);
+ if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
+ err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1);
+ if (err)
+ goto err;
+
xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries));
xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries);
@@ -1579,6 +1584,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu
struct dma_fence *fence = NULL;
struct invalidation_fence *ifence;
struct xe_range_fence *rfence;
+ int err;
LLIST_HEAD(deferred);
@@ -1596,6 +1602,12 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu
xe_pt_calc_rfence_interval(vma, &unbind_pt_update, entries,
num_entries);
+ err = dma_resv_reserve_fences(xe_vm_resv(vm), 1);
+ if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
+ err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1);
+ if (err)
+ return ERR_PTR(err);
+
ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
if (!ifence)
return ERR_PTR(-ENOMEM);
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 1a6f2f51ba..075f9eaef0 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -198,7 +198,7 @@ static int query_engines(struct xe_device *xe,
return -EINVAL;
}
- engines = kmalloc(size, GFP_KERNEL);
+ engines = kzalloc(size, GFP_KERNEL);
if (!engines)
return -ENOMEM;
@@ -212,14 +212,10 @@ static int query_engines(struct xe_device *xe,
engines->engines[i].instance.engine_instance =
hwe->logical_instance;
engines->engines[i].instance.gt_id = gt->info.id;
- engines->engines[i].instance.pad = 0;
- memset(engines->engines[i].reserved, 0,
- sizeof(engines->engines[i].reserved));
i++;
}
- engines->pad = 0;
engines->num_engines = i;
if (copy_to_user(query_ptr, engines, size)) {
@@ -520,6 +516,49 @@ static int query_gt_topology(struct xe_device *xe,
return 0;
}
+static int
+query_uc_fw_version(struct xe_device *xe, struct drm_xe_device_query *query)
+{
+ struct drm_xe_query_uc_fw_version __user *query_ptr = u64_to_user_ptr(query->data);
+ size_t size = sizeof(struct drm_xe_query_uc_fw_version);
+ struct drm_xe_query_uc_fw_version resp;
+ struct xe_uc_fw_version *version = NULL;
+
+ if (query->size == 0) {
+ query->size = size;
+ return 0;
+ } else if (XE_IOCTL_DBG(xe, query->size != size)) {
+ return -EINVAL;
+ }
+
+ if (copy_from_user(&resp, query_ptr, size))
+ return -EFAULT;
+
+ if (XE_IOCTL_DBG(xe, resp.pad || resp.pad2 || resp.reserved))
+ return -EINVAL;
+
+ switch (resp.uc_type) {
+ case XE_QUERY_UC_TYPE_GUC_SUBMISSION: {
+ struct xe_guc *guc = &xe->tiles[0].primary_gt->uc.guc;
+
+ version = &guc->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY];
+ break;
+ }
+ default:
+ return -EINVAL;
+ }
+
+ resp.branch_ver = 0;
+ resp.major_ver = version->major;
+ resp.minor_ver = version->minor;
+ resp.patch_ver = version->patch;
+
+ if (copy_to_user(query_ptr, &resp, size))
+ return -EFAULT;
+
+ return 0;
+}
+
static int (* const xe_query_funcs[])(struct xe_device *xe,
struct drm_xe_device_query *query) = {
query_engines,
@@ -529,6 +568,7 @@ static int (* const xe_query_funcs[])(struct xe_device *xe,
query_hwconfig,
query_gt_topology,
query_engine_cycles,
+ query_uc_fw_version,
};
int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c
index 87adefb560..440ac572f6 100644
--- a/drivers/gpu/drm/xe/xe_reg_sr.c
+++ b/drivers/gpu/drm/xe/xe_reg_sr.c
@@ -231,7 +231,7 @@ void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe)
if (err)
goto err_force_wake;
- p = drm_debug_printer(KBUILD_MODNAME);
+ p = drm_dbg_printer(&xe->drm, DRM_UT_DRIVER, NULL);
xa_for_each(&sr->xa, reg, entry) {
if (slot == RING_MAX_NONPRIV_SLOTS) {
xe_gt_err(gt,
diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c
index e66ae1bdaf..3fa2ece7d2 100644
--- a/drivers/gpu/drm/xe/xe_reg_whitelist.c
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c
@@ -7,9 +7,11 @@
#include "regs/xe_engine_regs.h"
#include "regs/xe_gt_regs.h"
+#include "regs/xe_regs.h"
#include "xe_gt_types.h"
#include "xe_platform_types.h"
#include "xe_rtp.h"
+#include "xe_step.h"
#undef XE_REG_MCR
#define XE_REG_MCR(...) XE_REG(__VA_ARGS__, .mcr = 1)
@@ -56,6 +58,12 @@ static const struct xe_rtp_entry_sr register_whitelist[] = {
RING_FORCE_TO_NONPRIV_DENY,
XE_RTP_ACTION_FLAG(ENGINE_BASE)))
},
+ { XE_RTP_NAME("16020183090"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0),
+ ENGINE_CLASS(RENDER)),
+ XE_RTP_ACTIONS(WHITELIST(CSBE_DEBUG_STATUS(RENDER_RING_BASE), 0))
+ },
+
{}
};
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index 68495f8858..820c8d73c4 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -5,7 +5,8 @@
#include "xe_ring_ops.h"
-#include "generated/xe_wa_oob.h"
+#include <generated/xe_wa_oob.h>
+
#include "instructions/xe_mi_commands.h"
#include "regs/xe_engine_regs.h"
#include "regs/xe_gpu_commands.h"
@@ -78,6 +79,16 @@ static int emit_store_imm_ggtt(u32 addr, u32 value, u32 *dw, int i)
return i;
}
+static int emit_flush_dw(u32 *dw, int i)
+{
+ dw[i++] = MI_FLUSH_DW | MI_FLUSH_IMM_DW;
+ dw[i++] = 0;
+ dw[i++] = 0;
+ dw[i++] = 0;
+
+ return i;
+}
+
static int emit_flush_imm_ggtt(u32 addr, u32 value, bool invalidate_tlb,
u32 *dw, int i)
{
@@ -113,6 +124,19 @@ static int emit_flush_invalidate(u32 flag, u32 *dw, int i)
return i;
}
+static int
+emit_pipe_control(u32 *dw, int i, u32 bit_group_0, u32 bit_group_1, u32 offset, u32 value)
+{
+ dw[i++] = GFX_OP_PIPE_CONTROL(6) | bit_group_0;
+ dw[i++] = bit_group_1;
+ dw[i++] = offset;
+ dw[i++] = 0;
+ dw[i++] = value;
+ dw[i++] = 0;
+
+ return i;
+}
+
static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw,
int i)
{
@@ -131,14 +155,7 @@ static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw,
flags &= ~mask_flags;
- dw[i++] = GFX_OP_PIPE_CONTROL(6);
- dw[i++] = flags;
- dw[i++] = LRC_PPHWSP_SCRATCH_ADDR;
- dw[i++] = 0;
- dw[i++] = 0;
- dw[i++] = 0;
-
- return i;
+ return emit_pipe_control(dw, i, 0, flags, LRC_PPHWSP_SCRATCH_ADDR, 0);
}
static int emit_store_imm_ppgtt_posted(u64 addr, u64 value,
@@ -174,14 +191,7 @@ static int emit_render_cache_flush(struct xe_sched_job *job, u32 *dw, int i)
else if (job->q->class == XE_ENGINE_CLASS_COMPUTE)
flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
- dw[i++] = GFX_OP_PIPE_CONTROL(6) | PIPE_CONTROL0_HDC_PIPELINE_FLUSH;
- dw[i++] = flags;
- dw[i++] = 0;
- dw[i++] = 0;
- dw[i++] = 0;
- dw[i++] = 0;
-
- return i;
+ return emit_pipe_control(dw, i, PIPE_CONTROL0_HDC_PIPELINE_FLUSH, flags, 0, 0);
}
static int emit_pipe_control_to_ring_end(struct xe_hw_engine *hwe, u32 *dw, int i)
@@ -189,14 +199,9 @@ static int emit_pipe_control_to_ring_end(struct xe_hw_engine *hwe, u32 *dw, int
if (hwe->class != XE_ENGINE_CLASS_RENDER)
return i;
- if (XE_WA(hwe->gt, 16020292621)) {
- dw[i++] = GFX_OP_PIPE_CONTROL(6);
- dw[i++] = PIPE_CONTROL_LRI_POST_SYNC;
- dw[i++] = RING_NOPID(hwe->mmio_base).addr;
- dw[i++] = 0;
- dw[i++] = 0;
- dw[i++] = 0;
- }
+ if (XE_WA(hwe->gt, 16020292621))
+ i = emit_pipe_control(dw, i, 0, PIPE_CONTROL_LRI_POST_SYNC,
+ RING_NOPID(hwe->mmio_base).addr, 0);
return i;
}
@@ -204,16 +209,13 @@ static int emit_pipe_control_to_ring_end(struct xe_hw_engine *hwe, u32 *dw, int
static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw,
int i)
{
- dw[i++] = GFX_OP_PIPE_CONTROL(6);
- dw[i++] = (stall_only ? PIPE_CONTROL_CS_STALL :
- PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_CS_STALL) |
- PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_QW_WRITE;
- dw[i++] = addr;
- dw[i++] = 0;
- dw[i++] = value;
- dw[i++] = 0; /* We're thrashing one extra dword. */
+ u32 flags = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_GLOBAL_GTT_IVB |
+ PIPE_CONTROL_QW_WRITE;
- return i;
+ if (!stall_only)
+ flags |= PIPE_CONTROL_FLUSH_ENABLE;
+
+ return emit_pipe_control(dw, i, 0, flags, addr, value);
}
static u32 get_ppgtt_flag(struct xe_sched_job *job)
@@ -241,10 +243,12 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc
i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
- if (job->user_fence.used)
+ if (job->user_fence.used) {
+ i = emit_flush_dw(dw, i);
i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
job->user_fence.value,
dw, i);
+ }
i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i);
@@ -300,10 +304,12 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
- if (job->user_fence.used)
+ if (job->user_fence.used) {
+ i = emit_flush_dw(dw, i);
i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
job->user_fence.value,
dw, i);
+ }
i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i);
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
index d07b9ee0ea..b0c7fa4693 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.c
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -287,3 +287,41 @@ int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm)
return drm_sched_job_add_dependency(&job->drm, fence);
}
+
+struct xe_sched_job_snapshot *
+xe_sched_job_snapshot_capture(struct xe_sched_job *job)
+{
+ struct xe_exec_queue *q = job->q;
+ struct xe_device *xe = q->gt->tile->xe;
+ struct xe_sched_job_snapshot *snapshot;
+ size_t len = sizeof(*snapshot) + (sizeof(u64) * q->width);
+ u16 i;
+
+ snapshot = kzalloc(len, GFP_ATOMIC);
+ if (!snapshot)
+ return NULL;
+
+ snapshot->batch_addr_len = q->width;
+ for (i = 0; i < q->width; i++)
+ snapshot->batch_addr[i] = xe_device_uncanonicalize_addr(xe, job->batch_addr[i]);
+
+ return snapshot;
+}
+
+void xe_sched_job_snapshot_free(struct xe_sched_job_snapshot *snapshot)
+{
+ kfree(snapshot);
+}
+
+void
+xe_sched_job_snapshot_print(struct xe_sched_job_snapshot *snapshot,
+ struct drm_printer *p)
+{
+ u16 i;
+
+ if (!snapshot)
+ return;
+
+ for (i = 0; i < snapshot->batch_addr_len; i++)
+ drm_printf(p, "batch_addr[%u]: 0x%016llx\n", i, snapshot->batch_addr[i]);
+}
diff --git a/drivers/gpu/drm/xe/xe_sched_job.h b/drivers/gpu/drm/xe/xe_sched_job.h
index 34f475ba7f..f1a660648c 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.h
+++ b/drivers/gpu/drm/xe/xe_sched_job.h
@@ -8,6 +8,7 @@
#include "xe_sched_job_types.h"
+struct drm_printer;
struct xe_vm;
#define XE_SCHED_HANG_LIMIT 1
@@ -77,4 +78,8 @@ xe_sched_job_add_migrate_flush(struct xe_sched_job *job, u32 flags)
bool xe_sched_job_is_migration(struct xe_exec_queue *q);
+struct xe_sched_job_snapshot *xe_sched_job_snapshot_capture(struct xe_sched_job *job);
+void xe_sched_job_snapshot_free(struct xe_sched_job_snapshot *snapshot);
+void xe_sched_job_snapshot_print(struct xe_sched_job_snapshot *snapshot, struct drm_printer *p);
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h
index 46ead63459..5e12724219 100644
--- a/drivers/gpu/drm/xe/xe_sched_job_types.h
+++ b/drivers/gpu/drm/xe/xe_sched_job_types.h
@@ -30,11 +30,11 @@ struct xe_sched_job {
struct dma_fence *fence;
/** @user_fence: write back value when BB is complete */
struct {
- /** @used: user fence is used */
+ /** @user_fence.used: user fence is used */
bool used;
- /** @addr: address to write to */
+ /** @user_fence.addr: address to write to */
u64 addr;
- /** @value: write back value */
+ /** @user_fence.value: write back value */
u64 value;
} user_fence;
/** @migrate_flush_flags: Additional flush flags for migration jobs */
@@ -45,4 +45,9 @@ struct xe_sched_job {
u64 batch_addr[];
};
+struct xe_sched_job_snapshot {
+ u16 batch_addr_len;
+ u64 batch_addr[];
+};
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c
index 42a0e0c917..f295d91886 100644
--- a/drivers/gpu/drm/xe/xe_sriov.c
+++ b/drivers/gpu/drm/xe/xe_sriov.c
@@ -3,6 +3,8 @@
* Copyright © 2023 Intel Corporation
*/
+#include <drm/drm_managed.h>
+
#include "xe_assert.h"
#include "xe_sriov.h"
@@ -53,3 +55,33 @@ void xe_sriov_probe_early(struct xe_device *xe, bool has_sriov)
drm_info(&xe->drm, "Running in %s mode\n",
xe_sriov_mode_to_string(xe_device_sriov_mode(xe)));
}
+
+static void fini_sriov(struct drm_device *drm, void *arg)
+{
+ struct xe_device *xe = arg;
+
+ destroy_workqueue(xe->sriov.wq);
+ xe->sriov.wq = NULL;
+}
+
+/**
+ * xe_sriov_init - Initialize SR-IOV specific data.
+ * @xe: the &xe_device to initialize
+ *
+ * In this function we create dedicated workqueue that will be used
+ * by the SR-IOV specific workers.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_init(struct xe_device *xe)
+{
+ if (!IS_SRIOV(xe))
+ return 0;
+
+ xe_assert(xe, !xe->sriov.wq);
+ xe->sriov.wq = alloc_workqueue("xe-sriov-wq", 0, 0);
+ if (!xe->sriov.wq)
+ return -ENOMEM;
+
+ return drmm_add_action_or_reset(&xe->drm, fini_sriov, xe);
+}
diff --git a/drivers/gpu/drm/xe/xe_sriov.h b/drivers/gpu/drm/xe/xe_sriov.h
index 5af73a3172..1545552162 100644
--- a/drivers/gpu/drm/xe/xe_sriov.h
+++ b/drivers/gpu/drm/xe/xe_sriov.h
@@ -13,6 +13,7 @@
const char *xe_sriov_mode_to_string(enum xe_sriov_mode mode);
void xe_sriov_probe_early(struct xe_device *xe, bool has_sriov);
+int xe_sriov_init(struct xe_device *xe);
static inline enum xe_sriov_mode xe_device_sriov_mode(struct xe_device *xe)
{
diff --git a/drivers/gpu/drm/xe/xe_sriov_types.h b/drivers/gpu/drm/xe/xe_sriov_types.h
index 999a4311b9..1a138108d1 100644
--- a/drivers/gpu/drm/xe/xe_sriov_types.h
+++ b/drivers/gpu/drm/xe/xe_sriov_types.h
@@ -9,6 +9,18 @@
#include <linux/build_bug.h>
/**
+ * VFID - Virtual Function Identifier
+ * @n: VF number
+ *
+ * Helper macro to represent Virtual Function (VF) Identifier.
+ * VFID(0) is used as alias to the PFID that represents Physical Function.
+ *
+ * Note: According to PCI spec, SR-IOV VF's numbers are 1-based (VF1, VF2, ...).
+ */
+#define VFID(n) (n)
+#define PFID VFID(0)
+
+/**
* enum xe_sriov_mode - SR-IOV mode
* @XE_SRIOV_MODE_NONE: bare-metal mode (non-virtualized)
* @XE_SRIOV_MODE_PF: SR-IOV Physical Function (PF) mode
diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs.c b/drivers/gpu/drm/xe/xe_tile_sysfs.c
index 0f8d3e7fce..0662968d7b 100644
--- a/drivers/gpu/drm/xe/xe_tile_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_tile_sysfs.c
@@ -9,6 +9,7 @@
#include "xe_tile.h"
#include "xe_tile_sysfs.h"
+#include "xe_vram_freq.h"
static void xe_tile_sysfs_kobj_release(struct kobject *kobj)
{
@@ -50,6 +51,8 @@ void xe_tile_sysfs_init(struct xe_tile *tile)
tile->sysfs = &kt->base;
+ xe_vram_freq_sysfs_init(tile);
+
err = drmm_add_action_or_reset(&xe->drm, tile_sysfs_fini, tile);
if (err)
drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index e5d7d5e2be..3107d2a124 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -11,7 +11,8 @@
#include <drm/ttm/ttm_placement.h>
#include <drm/ttm/ttm_range_manager.h>
-#include "generated/xe_wa_oob.h"
+#include <generated/xe_wa_oob.h>
+
#include "regs/xe_gt_regs.h"
#include "regs/xe_regs.h"
#include "xe_bo.h"
@@ -19,6 +20,7 @@
#include "xe_gt.h"
#include "xe_mmio.h"
#include "xe_res_cursor.h"
+#include "xe_sriov.h"
#include "xe_ttm_stolen_mgr.h"
#include "xe_ttm_vram_mgr.h"
#include "xe_wa.h"
@@ -205,7 +207,9 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe)
u64 stolen_size, io_size, pgsize;
int err;
- if (IS_DGFX(xe))
+ if (IS_SRIOV_VF(xe))
+ stolen_size = 0;
+ else if (IS_DGFX(xe))
stolen_size = detect_bar2_dgfx(xe, mgr);
else if (GRAPHICS_VERx100(xe) >= 1270)
stolen_size = detect_bar2_integrated(xe, mgr);
diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c
index 53ccd338fd..5c83c75bc4 100644
--- a/drivers/gpu/drm/xe/xe_tuning.c
+++ b/drivers/gpu/drm/xe/xe_tuning.c
@@ -37,7 +37,14 @@ static const struct xe_rtp_entry_sr gt_tunings[] = {
XE_RTP_ACTIONS(FIELD_SET(XE2LPM_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
},
-
+ { XE_RTP_NAME("Tuning: Compression Overfetch"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2004, XE_RTP_END_VERSION_UNDEFINED)),
+ XE_RTP_ACTIONS(CLR(CCCHKNREG1, ENCOMPPERFFIX)),
+ },
+ { XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2004, XE_RTP_END_VERSION_UNDEFINED)),
+ XE_RTP_ACTIONS(SET(L3SQCREG3, COMPPWOVERFETCHEN))
+ },
{}
};
diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
index 25e1ddfd2f..7033f8c1b4 100644
--- a/drivers/gpu/drm/xe/xe_uc.c
+++ b/drivers/gpu/drm/xe/xe_uc.c
@@ -7,8 +7,10 @@
#include "xe_device.h"
#include "xe_gsc.h"
+#include "xe_gsc_proxy.h"
#include "xe_gt.h"
#include "xe_guc.h"
+#include "xe_guc_db_mgr.h"
#include "xe_guc_pc.h"
#include "xe_guc_submit.h"
#include "xe_huc.h"
@@ -30,13 +32,15 @@ uc_to_xe(struct xe_uc *uc)
/* Should be called once at driver load only */
int xe_uc_init(struct xe_uc *uc)
{
+ struct xe_device *xe = uc_to_xe(uc);
int ret;
+ xe_device_mem_access_get(xe);
+
/*
* We call the GuC/HuC/GSC init functions even if GuC submission is off
* to correctly move our tracking of the FW state to "disabled".
*/
-
ret = xe_guc_init(&uc->guc);
if (ret)
goto err;
@@ -50,7 +54,7 @@ int xe_uc_init(struct xe_uc *uc)
goto err;
if (!xe_device_uc_enabled(uc_to_xe(uc)))
- return 0;
+ goto err;
ret = xe_wopcm_init(&uc->wopcm);
if (ret)
@@ -60,9 +64,17 @@ int xe_uc_init(struct xe_uc *uc)
if (ret)
goto err;
+ ret = xe_guc_db_mgr_init(&uc->guc.dbm, ~0);
+ if (ret)
+ goto err;
+
+ xe_device_mem_access_put(xe);
+
return 0;
err:
+ xe_device_mem_access_put(xe);
+
return ret;
}
@@ -88,6 +100,10 @@ int xe_uc_init_post_hwconfig(struct xe_uc *uc)
if (err)
return err;
+ err = xe_huc_init_post_hwconfig(&uc->huc);
+ if (err)
+ return err;
+
return xe_gsc_init_post_hwconfig(&uc->gsc);
}
@@ -256,3 +272,16 @@ int xe_uc_suspend(struct xe_uc *uc)
return xe_guc_suspend(&uc->guc);
}
+
+/**
+ * xe_uc_remove() - Clean up the UC structures before driver removal
+ * @uc: the UC object
+ *
+ * This function should only act on objects/structures that must be cleaned
+ * before the driver removal callback is complete and therefore can't be
+ * deferred to a drmm action.
+ */
+void xe_uc_remove(struct xe_uc *uc)
+{
+ xe_gsc_remove(&uc->gsc);
+}
diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h
index 5d5110c0c8..e4d4e3c99f 100644
--- a/drivers/gpu/drm/xe/xe_uc.h
+++ b/drivers/gpu/drm/xe/xe_uc.h
@@ -20,5 +20,6 @@ int xe_uc_stop(struct xe_uc *uc);
int xe_uc_start(struct xe_uc *uc);
int xe_uc_suspend(struct xe_uc *uc);
int xe_uc_sanitize_reset(struct xe_uc *uc);
+void xe_uc_remove(struct xe_uc *uc);
#endif
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 9dff96dfe4..a9d25b3fa6 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -92,6 +92,7 @@ struct uc_fw_entry {
const char *path;
u16 major;
u16 minor;
+ u16 patch;
bool full_ver_required;
};
};
@@ -102,14 +103,15 @@ struct fw_blobs_by_type {
};
#define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \
- fw_def(METEORLAKE, major_ver(i915, guc, mtl, 70, 7)) \
- fw_def(DG2, major_ver(i915, guc, dg2, 70, 5)) \
- fw_def(DG1, major_ver(i915, guc, dg1, 70, 5)) \
- fw_def(ALDERLAKE_N, major_ver(i915, guc, tgl, 70, 5)) \
- fw_def(ALDERLAKE_P, major_ver(i915, guc, adlp, 70, 5)) \
- fw_def(ALDERLAKE_S, major_ver(i915, guc, tgl, 70, 5)) \
- fw_def(ROCKETLAKE, major_ver(i915, guc, tgl, 70, 5)) \
- fw_def(TIGERLAKE, major_ver(i915, guc, tgl, 70, 5))
+ fw_def(LUNARLAKE, major_ver(xe, guc, lnl, 70, 19, 2)) \
+ fw_def(METEORLAKE, major_ver(i915, guc, mtl, 70, 19, 2)) \
+ fw_def(DG2, major_ver(i915, guc, dg2, 70, 19, 2)) \
+ fw_def(DG1, major_ver(i915, guc, dg1, 70, 19, 2)) \
+ fw_def(ALDERLAKE_N, major_ver(i915, guc, tgl, 70, 19, 2)) \
+ fw_def(ALDERLAKE_P, major_ver(i915, guc, adlp, 70, 19, 2)) \
+ fw_def(ALDERLAKE_S, major_ver(i915, guc, tgl, 70, 19, 2)) \
+ fw_def(ROCKETLAKE, major_ver(i915, guc, tgl, 70, 19, 2)) \
+ fw_def(TIGERLAKE, major_ver(i915, guc, tgl, 70, 19, 2))
#define XE_HUC_FIRMWARE_DEFS(fw_def, mmp_ver, no_ver) \
fw_def(METEORLAKE, no_ver(i915, huc_gsc, mtl)) \
@@ -121,24 +123,24 @@ struct fw_blobs_by_type {
/* for the GSC FW we match the compatibility version and not the release one */
#define XE_GSC_FIRMWARE_DEFS(fw_def, major_ver) \
- fw_def(METEORLAKE, major_ver(i915, gsc, mtl, 1, 0))
+ fw_def(METEORLAKE, major_ver(i915, gsc, mtl, 1, 0, 0))
#define MAKE_FW_PATH(dir__, uc__, shortname__, version__) \
__stringify(dir__) "/" __stringify(shortname__) "_" __stringify(uc__) version__ ".bin"
#define fw_filename_mmp_ver(dir_, uc_, shortname_, a, b, c) \
MAKE_FW_PATH(dir_, uc_, shortname_, "_" __stringify(a ## . ## b ## . ## c))
-#define fw_filename_major_ver(dir_, uc_, shortname_, a, b) \
+#define fw_filename_major_ver(dir_, uc_, shortname_, a, b, c) \
MAKE_FW_PATH(dir_, uc_, shortname_, "_" __stringify(a))
#define fw_filename_no_ver(dir_, uc_, shortname_) \
MAKE_FW_PATH(dir_, uc_, shortname_, "")
#define uc_fw_entry_mmp_ver(dir_, uc_, shortname_, a, b, c) \
{ fw_filename_mmp_ver(dir_, uc_, shortname_, a, b, c), \
- a, b, true }
-#define uc_fw_entry_major_ver(dir_, uc_, shortname_, a, b) \
- { fw_filename_major_ver(dir_, uc_, shortname_, a, b), \
- a, b }
+ a, b, c, true }
+#define uc_fw_entry_major_ver(dir_, uc_, shortname_, a, b, c) \
+ { fw_filename_major_ver(dir_, uc_, shortname_, a, b, c), \
+ a, b, c }
#define uc_fw_entry_no_ver(dir_, uc_, shortname_) \
{ fw_filename_no_ver(dir_, uc_, shortname_), \
0, 0 }
@@ -221,6 +223,7 @@ uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw)
uc_fw->path = entries[i].path;
uc_fw->versions.wanted.major = entries[i].major;
uc_fw->versions.wanted.minor = entries[i].minor;
+ uc_fw->versions.wanted.patch = entries[i].patch;
uc_fw->full_ver_required = entries[i].full_ver_required;
if (uc_fw->type == XE_UC_FW_TYPE_GSC)
@@ -340,19 +343,22 @@ int xe_uc_fw_check_version_requirements(struct xe_uc_fw *uc_fw)
* Otherwise, at least the major version.
*/
if (wanted->major != found->major ||
- (uc_fw->full_ver_required && wanted->minor != found->minor)) {
- drm_notice(&xe->drm, "%s firmware %s: unexpected version: %u.%u != %u.%u\n",
+ (uc_fw->full_ver_required &&
+ ((wanted->minor != found->minor) ||
+ (wanted->patch != found->patch)))) {
+ drm_notice(&xe->drm, "%s firmware %s: unexpected version: %u.%u.%u != %u.%u.%u\n",
xe_uc_fw_type_repr(uc_fw->type), uc_fw->path,
- found->major, found->minor,
- wanted->major, wanted->minor);
+ found->major, found->minor, found->patch,
+ wanted->major, wanted->minor, wanted->patch);
goto fail;
}
- if (wanted->minor > found->minor) {
- drm_notice(&xe->drm, "%s firmware (%u.%u) is recommended, but only (%u.%u) was found in %s\n",
+ if (wanted->minor > found->minor ||
+ (wanted->minor == found->minor && wanted->patch > found->patch)) {
+ drm_notice(&xe->drm, "%s firmware (%u.%u.%u) is recommended, but only (%u.%u.%u) was found in %s\n",
xe_uc_fw_type_repr(uc_fw->type),
- wanted->major, wanted->minor,
- found->major, found->minor,
+ wanted->major, wanted->minor, wanted->patch,
+ found->major, found->minor, found->patch,
uc_fw->path);
drm_info(&xe->drm, "Consider updating your linux-firmware pkg or downloading from %s\n",
XE_UC_FIRMWARE_URL);
@@ -652,14 +658,18 @@ static int uc_fw_request(struct xe_uc_fw *uc_fw, const struct firmware **firmwar
xe_assert(xe, !uc_fw->path);
uc_fw_auto_select(xe, uc_fw);
+ uc_fw_override(uc_fw);
xe_uc_fw_change_status(uc_fw, uc_fw->path ?
XE_UC_FIRMWARE_SELECTED :
XE_UC_FIRMWARE_NOT_SUPPORTED);
- if (!xe_uc_fw_is_supported(uc_fw))
+ if (!xe_uc_fw_is_supported(uc_fw)) {
+ if (uc_fw->type == XE_UC_FW_TYPE_GUC) {
+ drm_err(&xe->drm, "No GuC firmware defined for platform\n");
+ return -ENOENT;
+ }
return 0;
-
- uc_fw_override(uc_fw);
+ }
/* an empty path means the firmware is disabled */
if (!xe_device_uc_enabled(xe) || !(*uc_fw->path)) {
diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h
index ee914a5d85..bc800b6968 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw_types.h
+++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h
@@ -124,11 +124,14 @@ struct xe_uc_fw {
/** @versions: FW versions wanted and found */
struct {
- /** @wanted: firmware version wanted by platform */
+ /** @versions.wanted: firmware version wanted by platform */
struct xe_uc_fw_version wanted;
- /** @wanted_type: type of firmware version wanted (release vs compatibility) */
+ /**
+ * @versions.wanted_type: type of firmware version wanted
+ * (release vs compatibility)
+ */
enum xe_uc_fw_version_types wanted_type;
- /** @found: fw versions found in firmware blob */
+ /** @versions.found: fw versions found in firmware blob */
struct xe_uc_fw_version found[XE_UC_FW_VER_TYPE_COUNT];
} versions;
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index a2024247be..32cd0c978a 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -13,11 +13,14 @@
#include <drm/ttm/ttm_execbuf_util.h>
#include <drm/ttm/ttm_tt.h>
#include <drm/xe_drm.h>
+#include <linux/ascii85.h>
#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/mm.h>
#include <linux/swap.h>
+#include <generated/xe_wa_oob.h>
+
#include "xe_assert.h"
#include "xe_bo.h"
#include "xe_device.h"
@@ -34,7 +37,6 @@
#include "xe_res_cursor.h"
#include "xe_sync.h"
#include "xe_trace.h"
-#include "generated/xe_wa_oob.h"
#include "xe_wa.h"
static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
@@ -480,17 +482,53 @@ static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
return 0;
}
+/**
+ * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas
+ * @vm: The vm for which we are rebinding.
+ * @exec: The struct drm_exec with the locked GEM objects.
+ * @num_fences: The number of fences to reserve for the operation, not
+ * including rebinds and validations.
+ *
+ * Validates all evicted gem objects and rebinds their vmas. Note that
+ * rebindings may cause evictions and hence the validation-rebind
+ * sequence is rerun until there are no more objects to validate.
+ *
+ * Return: 0 on success, negative error code on error. In particular,
+ * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if
+ * the drm_exec transaction needs to be restarted.
+ */
+int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
+ unsigned int num_fences)
+{
+ struct drm_gem_object *obj;
+ unsigned long index;
+ int ret;
+
+ do {
+ ret = drm_gpuvm_validate(&vm->gpuvm, exec);
+ if (ret)
+ return ret;
+
+ ret = xe_vm_rebind(vm, false);
+ if (ret)
+ return ret;
+ } while (!list_empty(&vm->gpuvm.evict.list));
+
+ drm_exec_for_each_locked_object(exec, index, obj) {
+ ret = dma_resv_reserve_fences(obj->resv, num_fences);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
bool *done)
{
int err;
- /*
- * 1 fence for each preempt fence plus a fence for each tile from a
- * possible rebind
- */
- err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, vm->preempt.num_exec_queues +
- vm->xe->info.tile_count);
+ err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0);
if (err)
return err;
@@ -505,7 +543,7 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
return 0;
}
- err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, vm->preempt.num_exec_queues);
+ err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0);
if (err)
return err;
@@ -513,7 +551,13 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
if (err)
return err;
- return drm_gpuvm_validate(&vm->gpuvm, exec);
+ /*
+ * Add validation and rebinding to the locking loop since both can
+ * cause evictions which may require blocing dma_resv locks.
+ * The fence reservation here is intended for the new preempt fences
+ * we attach at the end of the rebind work.
+ */
+ return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues);
}
static void preempt_rebind_work_func(struct work_struct *w)
@@ -801,6 +845,7 @@ static void xe_vma_free(struct xe_vma *vma)
#define VMA_CREATE_FLAG_READ_ONLY BIT(0)
#define VMA_CREATE_FLAG_IS_NULL BIT(1)
+#define VMA_CREATE_FLAG_DUMPABLE BIT(2)
static struct xe_vma *xe_vma_create(struct xe_vm *vm,
struct xe_bo *bo,
@@ -813,6 +858,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
u8 id;
bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY);
bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL);
+ bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE);
xe_assert(vm->xe, start < end);
xe_assert(vm->xe, end < vm->size);
@@ -847,6 +893,8 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
vma->gpuva.va.range = end - start + 1;
if (read_only)
vma->gpuva.flags |= XE_VMA_READ_ONLY;
+ if (dumpable)
+ vma->gpuva.flags |= XE_VMA_DUMPABLE;
for_each_tile(tile, vm->xe, id)
vma->tile_mask |= 0x1 << id;
@@ -990,35 +1038,26 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
}
/**
- * xe_vm_prepare_vma() - drm_exec utility to lock a vma
+ * xe_vm_lock_vma() - drm_exec utility to lock a vma
* @exec: The drm_exec object we're currently locking for.
* @vma: The vma for witch we want to lock the vm resv and any attached
* object's resv.
- * @num_shared: The number of dma-fence slots to pre-allocate in the
- * objects' reservation objects.
*
* Return: 0 on success, negative error code on error. In particular
* may return -EDEADLK on WW transaction contention and -EINTR if
* an interruptible wait is terminated by a signal.
*/
-int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma,
- unsigned int num_shared)
+int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma)
{
struct xe_vm *vm = xe_vma_vm(vma);
struct xe_bo *bo = xe_vma_bo(vma);
int err;
XE_WARN_ON(!vm);
- if (num_shared)
- err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared);
- else
- err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
- if (!err && bo && !bo->vm) {
- if (num_shared)
- err = drm_exec_prepare_obj(exec, &bo->ttm.base, num_shared);
- else
- err = drm_exec_lock_obj(exec, &bo->ttm.base);
- }
+
+ err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
+ if (!err && bo && !bo->vm)
+ err = drm_exec_lock_obj(exec, &bo->ttm.base);
return err;
}
@@ -1030,7 +1069,7 @@ static void xe_vma_destroy_unlocked(struct xe_vma *vma)
drm_exec_init(&exec, 0, 0);
drm_exec_until_all_locked(&exec) {
- err = xe_vm_prepare_vma(&exec, vma, 0);
+ err = xe_vm_lock_vma(&exec, vma);
drm_exec_retry_on_contention(&exec);
if (XE_WARN_ON(err))
break;
@@ -1065,7 +1104,9 @@ static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
xe_assert(vm->xe, xe_vma_vm(vma) == vm);
lockdep_assert_held(&vm->lock);
+ mutex_lock(&vm->snap_mutex);
err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva);
+ mutex_unlock(&vm->snap_mutex);
XE_WARN_ON(err); /* Shouldn't be possible */
return err;
@@ -1076,7 +1117,9 @@ static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
xe_assert(vm->xe, xe_vma_vm(vma) == vm);
lockdep_assert_held(&vm->lock);
+ mutex_lock(&vm->snap_mutex);
drm_gpuva_remove(&vma->gpuva);
+ mutex_unlock(&vm->snap_mutex);
if (vm->usm.last_fault_vma == vma)
vm->usm.last_fault_vma = NULL;
}
@@ -1095,7 +1138,7 @@ static struct drm_gpuva_op *xe_vm_op_alloc(void)
static void xe_vm_free(struct drm_gpuvm *gpuvm);
-static struct drm_gpuvm_ops gpuvm_ops = {
+static const struct drm_gpuvm_ops gpuvm_ops = {
.op_alloc = xe_vm_op_alloc,
.vm_bo_validate = xe_gpuvm_validate,
.vm_free = xe_vm_free,
@@ -1303,6 +1346,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
vm->flags = flags;
init_rwsem(&vm->lock);
+ mutex_init(&vm->snap_mutex);
INIT_LIST_HEAD(&vm->rebind_list);
@@ -1428,6 +1472,7 @@ err_close:
return ERR_PTR(err);
err_no_resv:
+ mutex_destroy(&vm->snap_mutex);
for_each_tile(tile, xe, id)
xe_range_fence_tree_fini(&vm->rftree[id]);
kfree(vm);
@@ -1532,6 +1577,16 @@ void xe_vm_close_and_put(struct xe_vm *vm)
xe->usm.num_vm_in_fault_mode--;
else if (!(vm->flags & XE_VM_FLAG_MIGRATION))
xe->usm.num_vm_in_non_fault_mode--;
+
+ if (vm->usm.asid) {
+ void *lookup;
+
+ xe_assert(xe, xe->info.has_asid);
+ xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION));
+
+ lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
+ xe_assert(xe, lookup == vm);
+ }
mutex_unlock(&xe->usm.lock);
for_each_tile(tile, xe, id)
@@ -1547,21 +1602,17 @@ static void vm_destroy_work_func(struct work_struct *w)
struct xe_device *xe = vm->xe;
struct xe_tile *tile;
u8 id;
- void *lookup;
/* xe_vm_close_and_put was not called? */
xe_assert(xe, !vm->size);
- if (!(vm->flags & XE_VM_FLAG_MIGRATION)) {
- xe_device_mem_access_put(xe);
+ if (xe_vm_in_preempt_fence_mode(vm))
+ flush_work(&vm->preempt.rebind_work);
- if (xe->info.has_asid && vm->usm.asid) {
- mutex_lock(&xe->usm.lock);
- lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
- xe_assert(xe, lookup == vm);
- mutex_unlock(&xe->usm.lock);
- }
- }
+ mutex_destroy(&vm->snap_mutex);
+
+ if (!(vm->flags & XE_VM_FLAG_MIGRATION))
+ xe_device_mem_access_put(xe);
for_each_tile(tile, xe, id)
XE_WARN_ON(vm->pt_root[id]);
@@ -2163,6 +2214,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
if (__op->op == DRM_GPUVA_OP_MAP) {
op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
+ op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE;
op->map.pat_index = pat_index;
} else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
op->prefetch.region = prefetch_region;
@@ -2328,6 +2380,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
struct xe_sync_entry *syncs, u32 num_syncs,
struct list_head *ops_list, bool last)
{
+ struct xe_device *xe = vm->xe;
struct xe_vma_op *last_op = NULL;
struct drm_gpuva_op *__op;
int err = 0;
@@ -2356,6 +2409,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
{
flags |= op->map.is_null ?
VMA_CREATE_FLAG_IS_NULL : 0;
+ flags |= op->map.dumpable ?
+ VMA_CREATE_FLAG_DUMPABLE : 0;
vma = new_vma(vm, &op->base.map, op->map.pat_index,
flags);
@@ -2380,6 +2435,9 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
flags |= op->base.remap.unmap->va->flags &
DRM_GPUVA_SPARSE ?
VMA_CREATE_FLAG_IS_NULL : 0;
+ flags |= op->base.remap.unmap->va->flags &
+ XE_VMA_DUMPABLE ?
+ VMA_CREATE_FLAG_DUMPABLE : 0;
vma = new_vma(vm, op->base.remap.prev,
old->pat_index, flags);
@@ -2401,6 +2459,9 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
xe_vma_end(vma) -
xe_vma_start(old);
op->remap.start = xe_vma_end(vma);
+ vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx",
+ (ULL)op->remap.start,
+ (ULL)op->remap.range);
}
}
@@ -2411,6 +2472,9 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
flags |= op->base.remap.unmap->va->flags &
DRM_GPUVA_SPARSE ?
VMA_CREATE_FLAG_IS_NULL : 0;
+ flags |= op->base.remap.unmap->va->flags &
+ XE_VMA_DUMPABLE ?
+ VMA_CREATE_FLAG_DUMPABLE : 0;
vma = new_vma(vm, op->base.remap.next,
old->pat_index, flags);
@@ -2431,6 +2495,9 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
op->remap.range -=
xe_vma_end(old) -
xe_vma_start(vma);
+ vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx",
+ (ULL)op->remap.start,
+ (ULL)op->remap.range);
}
}
break;
@@ -2473,7 +2540,7 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
lockdep_assert_held_write(&vm->lock);
- err = xe_vm_prepare_vma(exec, vma, 1);
+ err = xe_vm_lock_vma(exec, vma);
if (err)
return err;
@@ -3284,3 +3351,168 @@ int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id)
return 0;
}
+
+struct xe_vm_snapshot {
+ unsigned long num_snaps;
+ struct {
+ u64 ofs, bo_ofs;
+ unsigned long len;
+ struct xe_bo *bo;
+ void *data;
+ struct mm_struct *mm;
+ } snap[];
+};
+
+struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm)
+{
+ unsigned long num_snaps = 0, i;
+ struct xe_vm_snapshot *snap = NULL;
+ struct drm_gpuva *gpuva;
+
+ if (!vm)
+ return NULL;
+
+ mutex_lock(&vm->snap_mutex);
+ drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
+ if (gpuva->flags & XE_VMA_DUMPABLE)
+ num_snaps++;
+ }
+
+ if (num_snaps)
+ snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT);
+ if (!snap)
+ goto out_unlock;
+
+ snap->num_snaps = num_snaps;
+ i = 0;
+ drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
+ struct xe_vma *vma = gpuva_to_vma(gpuva);
+ struct xe_bo *bo = vma->gpuva.gem.obj ?
+ gem_to_xe_bo(vma->gpuva.gem.obj) : NULL;
+
+ if (!(gpuva->flags & XE_VMA_DUMPABLE))
+ continue;
+
+ snap->snap[i].ofs = xe_vma_start(vma);
+ snap->snap[i].len = xe_vma_size(vma);
+ if (bo) {
+ snap->snap[i].bo = xe_bo_get(bo);
+ snap->snap[i].bo_ofs = xe_vma_bo_offset(vma);
+ } else if (xe_vma_is_userptr(vma)) {
+ struct mm_struct *mm =
+ to_userptr_vma(vma)->userptr.notifier.mm;
+
+ if (mmget_not_zero(mm))
+ snap->snap[i].mm = mm;
+ else
+ snap->snap[i].data = ERR_PTR(-EFAULT);
+
+ snap->snap[i].bo_ofs = xe_vma_userptr(vma);
+ } else {
+ snap->snap[i].data = ERR_PTR(-ENOENT);
+ }
+ i++;
+ }
+
+out_unlock:
+ mutex_unlock(&vm->snap_mutex);
+ return snap;
+}
+
+void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
+{
+ for (int i = 0; i < snap->num_snaps; i++) {
+ struct xe_bo *bo = snap->snap[i].bo;
+ struct iosys_map src;
+ int err;
+
+ if (IS_ERR(snap->snap[i].data))
+ continue;
+
+ snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER);
+ if (!snap->snap[i].data) {
+ snap->snap[i].data = ERR_PTR(-ENOMEM);
+ goto cleanup_bo;
+ }
+
+ if (bo) {
+ dma_resv_lock(bo->ttm.base.resv, NULL);
+ err = ttm_bo_vmap(&bo->ttm, &src);
+ if (!err) {
+ xe_map_memcpy_from(xe_bo_device(bo),
+ snap->snap[i].data,
+ &src, snap->snap[i].bo_ofs,
+ snap->snap[i].len);
+ ttm_bo_vunmap(&bo->ttm, &src);
+ }
+ dma_resv_unlock(bo->ttm.base.resv);
+ } else {
+ void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs;
+
+ kthread_use_mm(snap->snap[i].mm);
+ if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len))
+ err = 0;
+ else
+ err = -EFAULT;
+ kthread_unuse_mm(snap->snap[i].mm);
+
+ mmput(snap->snap[i].mm);
+ snap->snap[i].mm = NULL;
+ }
+
+ if (err) {
+ kvfree(snap->snap[i].data);
+ snap->snap[i].data = ERR_PTR(err);
+ }
+
+cleanup_bo:
+ xe_bo_put(bo);
+ snap->snap[i].bo = NULL;
+ }
+}
+
+void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p)
+{
+ unsigned long i, j;
+
+ for (i = 0; i < snap->num_snaps; i++) {
+ if (IS_ERR(snap->snap[i].data))
+ goto uncaptured;
+
+ drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len);
+ drm_printf(p, "[%llx].data: ",
+ snap->snap[i].ofs);
+
+ for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) {
+ u32 *val = snap->snap[i].data + j;
+ char dumped[ASCII85_BUFSZ];
+
+ drm_puts(p, ascii85_encode(*val, dumped));
+ }
+
+ drm_puts(p, "\n");
+ continue;
+
+uncaptured:
+ drm_printf(p, "Unable to capture range [%llx-%llx]: %li\n",
+ snap->snap[i].ofs, snap->snap[i].ofs + snap->snap[i].len - 1,
+ PTR_ERR(snap->snap[i].data));
+ }
+}
+
+void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
+{
+ unsigned long i;
+
+ if (!snap)
+ return;
+
+ for (i = 0; i < snap->num_snaps; i++) {
+ if (!IS_ERR(snap->snap[i].data))
+ kvfree(snap->snap[i].data);
+ xe_bo_put(snap->snap[i].bo);
+ if (snap->snap[i].mm)
+ mmput(snap->snap[i].mm);
+ }
+ kvfree(snap);
+}
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index 40188a28c5..306cd0934a 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -211,8 +211,6 @@ int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker);
int xe_vm_invalidate_vma(struct xe_vma *vma);
-extern struct ttm_device_funcs xe_ttm_funcs;
-
static inline void xe_vm_queue_rebind_worker(struct xe_vm *vm)
{
xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
@@ -244,8 +242,10 @@ bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end);
int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id);
-int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma,
- unsigned int num_shared);
+int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma);
+
+int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
+ unsigned int num_fences);
/**
* xe_vm_resv() - Return's the vm's reservation object
@@ -273,3 +273,8 @@ static inline void vm_dbg(const struct drm_device *dev,
{ /* noop */ }
#endif
#endif
+
+struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm);
+void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap);
+void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p);
+void xe_vm_snapshot_free(struct xe_vm_snapshot *snap);
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 4027da3a10..badf394508 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -32,6 +32,7 @@ struct xe_vm;
#define XE_VMA_PTE_1G (DRM_GPUVA_USERBITS << 7)
#define XE_VMA_PTE_64K (DRM_GPUVA_USERBITS << 8)
#define XE_VMA_PTE_COMPACT (DRM_GPUVA_USERBITS << 9)
+#define XE_VMA_DUMPABLE (DRM_GPUVA_USERBITS << 10)
/** struct xe_userptr - User pointer */
struct xe_userptr {
@@ -163,6 +164,11 @@ struct xe_vm {
* VM
*/
struct rw_semaphore lock;
+ /**
+ * @snap_mutex: Mutex used to guard insertions and removals from gpuva,
+ * so we can take a snapshot safely from devcoredump.
+ */
+ struct mutex snap_mutex;
/**
* @rebind_list: list of VMAs that need rebinding. Protected by the
@@ -184,30 +190,6 @@ struct xe_vm {
*/
struct xe_range_fence_tree rftree[XE_MAX_TILES_PER_DEVICE];
- /** @async_ops: async VM operations (bind / unbinds) */
- struct {
- /** @list: list of pending async VM ops */
- struct list_head pending;
- /** @work: worker to execute async VM ops */
- struct work_struct work;
- /** @lock: protects list of pending async VM ops and fences */
- spinlock_t lock;
- /** @fence: fence state */
- struct {
- /** @context: context of async fence */
- u64 context;
- /** @seqno: seqno of async fence */
- u32 seqno;
- } fence;
- /** @error: error state for async VM ops */
- int error;
- /**
- * @munmap_rebind_inflight: an munmap style VM bind is in the
- * middle of a set of ops which requires a rebind at the end.
- */
- bool munmap_rebind_inflight;
- } async_ops;
-
const struct xe_pt_ops *pt_ops;
/** @userptr: user pointer state */
@@ -296,6 +278,8 @@ struct xe_vma_op_map {
struct xe_vma *vma;
/** @is_null: is NULL binding */
bool is_null;
+ /** @dumpable: whether BO is dumped on GPU hang */
+ bool dumpable;
/** @pat_index: The pat index to use for this operation. */
u16 pat_index;
};
diff --git a/drivers/gpu/drm/xe/xe_vram_freq.c b/drivers/gpu/drm/xe/xe_vram_freq.c
new file mode 100644
index 0000000000..c5f6b5a5d1
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vram_freq.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+#include <linux/sysfs.h>
+#include <drm/drm_managed.h>
+
+#include "xe_gt_types.h"
+#include "xe_pcode.h"
+#include "xe_pcode_api.h"
+#include "xe_tile.h"
+#include "xe_tile_sysfs.h"
+#include "xe_vram_freq.h"
+
+/**
+ * DOC: Xe VRAM freq
+ *
+ * Provides sysfs entries for vram frequency in tile
+ *
+ * device/tile#/memory/freq0/max_freq - This is maximum frequency. This value is read-only as it
+ * is the fixed fuse point P0. It is not the system
+ * configuration.
+ * device/tile#/memory/freq0/min_freq - This is minimum frequency. This value is read-only as it
+ * is the fixed fuse point PN. It is not the system
+ * configuration.
+ */
+
+static struct xe_tile *dev_to_tile(struct device *dev)
+{
+ return kobj_to_tile(dev->kobj.parent);
+}
+
+static ssize_t max_freq_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct xe_tile *tile = dev_to_tile(dev);
+ struct xe_gt *gt = tile->primary_gt;
+ u32 val, mbox;
+ int err;
+
+ mbox = REG_FIELD_PREP(PCODE_MB_COMMAND, PCODE_FREQUENCY_CONFIG)
+ | REG_FIELD_PREP(PCODE_MB_PARAM1, PCODE_MBOX_FC_SC_READ_FUSED_P0)
+ | REG_FIELD_PREP(PCODE_MB_PARAM2, PCODE_MBOX_DOMAIN_HBM);
+
+ err = xe_pcode_read(gt, mbox, &val, NULL);
+ if (err)
+ return err;
+
+ /* data_out - Fused P0 for domain ID in units of 50 MHz */
+ val *= 50;
+
+ return sysfs_emit(buf, "%u\n", val);
+}
+static DEVICE_ATTR_RO(max_freq);
+
+static ssize_t min_freq_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct xe_tile *tile = dev_to_tile(dev);
+ struct xe_gt *gt = tile->primary_gt;
+ u32 val, mbox;
+ int err;
+
+ mbox = REG_FIELD_PREP(PCODE_MB_COMMAND, PCODE_FREQUENCY_CONFIG)
+ | REG_FIELD_PREP(PCODE_MB_PARAM1, PCODE_MBOX_FC_SC_READ_FUSED_PN)
+ | REG_FIELD_PREP(PCODE_MB_PARAM2, PCODE_MBOX_DOMAIN_HBM);
+
+ err = xe_pcode_read(gt, mbox, &val, NULL);
+ if (err)
+ return err;
+
+ /* data_out - Fused Pn for domain ID in units of 50 MHz */
+ val *= 50;
+
+ return sysfs_emit(buf, "%u\n", val);
+}
+static DEVICE_ATTR_RO(min_freq);
+
+static struct attribute *freq_attrs[] = {
+ &dev_attr_max_freq.attr,
+ &dev_attr_min_freq.attr,
+ NULL
+};
+
+static const struct attribute_group freq_group_attrs = {
+ .name = "freq0",
+ .attrs = freq_attrs,
+};
+
+static void vram_freq_sysfs_fini(struct drm_device *drm, void *arg)
+{
+ struct kobject *kobj = arg;
+
+ sysfs_remove_group(kobj, &freq_group_attrs);
+ kobject_put(kobj);
+}
+
+/**
+ * xe_vram_freq_sysfs_init - Initialize vram frequency sysfs component
+ * @tile: Xe Tile object
+ *
+ * It needs to be initialized after the main tile component is ready
+ */
+void xe_vram_freq_sysfs_init(struct xe_tile *tile)
+{
+ struct xe_device *xe = tile_to_xe(tile);
+ struct kobject *kobj;
+ int err;
+
+ if (xe->info.platform != XE_PVC)
+ return;
+
+ kobj = kobject_create_and_add("memory", tile->sysfs);
+ if (!kobj) {
+ drm_warn(&xe->drm, "failed to add memory directory, err: %d\n", -ENOMEM);
+ return;
+ }
+
+ err = sysfs_create_group(kobj, &freq_group_attrs);
+ if (err) {
+ kobject_put(kobj);
+ drm_warn(&xe->drm, "failed to register vram freq sysfs, err: %d\n", err);
+ return;
+ }
+
+ err = drmm_add_action_or_reset(&xe->drm, vram_freq_sysfs_fini, kobj);
+ if (err)
+ drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n",
+ __func__, err);
+}
diff --git a/drivers/gpu/drm/xe/xe_vram_freq.h b/drivers/gpu/drm/xe/xe_vram_freq.h
new file mode 100644
index 0000000000..cbe8c12fbd
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vram_freq.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_VRAM_FREQ_H_
+#define _XE_VRAM_FREQ_H_
+
+struct xe_tile;
+
+void xe_vram_freq_sysfs_init(struct xe_tile *tile);
+
+#endif /* _XE_VRAM_FREQ_H_ */
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 5f61dd87c5..a0264eedd4 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -9,7 +9,8 @@
#include <kunit/visibility.h>
#include <linux/compiler_types.h>
-#include "generated/xe_wa_oob.h"
+#include <generated/xe_wa_oob.h>
+
#include "regs/xe_engine_regs.h"
#include "regs/xe_gt_regs.h"
#include "regs/xe_regs.h"
@@ -125,13 +126,6 @@ static const struct xe_rtp_entry_sr gt_was[] = {
/* DG2 */
- { XE_RTP_NAME("16010515920"),
- XE_RTP_RULES(SUBPLATFORM(DG2, G10),
- GRAPHICS_STEP(A0, B0),
- ENGINE_CLASS(VIDEO_DECODE)),
- XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F18(0), ALNUNIT_CLKGATE_DIS)),
- XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
- },
{ XE_RTP_NAME("22010523718"),
XE_RTP_RULES(SUBPLATFORM(DG2, G10)),
XE_RTP_ACTIONS(SET(UNSLICE_UNIT_LEVEL_CLKGATE, CG3DDISCFEG_CLKGATE_DIS))
@@ -140,61 +134,6 @@ static const struct xe_rtp_entry_sr gt_was[] = {
XE_RTP_RULES(SUBPLATFORM(DG2, G10)),
XE_RTP_ACTIONS(SET(SUBSLICE_UNIT_LEVEL_CLKGATE, DSS_ROUTER_CLKGATE_DIS))
},
- { XE_RTP_NAME("14012362059"),
- XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
- XE_RTP_ACTIONS(SET(XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB))
- },
- { XE_RTP_NAME("14012362059"),
- XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0)),
- XE_RTP_ACTIONS(SET(XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB))
- },
- { XE_RTP_NAME("14010948348"),
- XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
- XE_RTP_ACTIONS(SET(UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS))
- },
- { XE_RTP_NAME("14011037102"),
- XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
- XE_RTP_ACTIONS(SET(UNSLCGCTL9444, LTCDD_CLKGATE_DIS))
- },
- { XE_RTP_NAME("14011371254"),
- XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
- XE_RTP_ACTIONS(SET(XEHP_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS))
- },
- { XE_RTP_NAME("14011431319"),
- XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
- XE_RTP_ACTIONS(SET(UNSLCGCTL9440,
- GAMTLBOACS_CLKGATE_DIS |
- GAMTLBVDBOX7_CLKGATE_DIS | GAMTLBVDBOX6_CLKGATE_DIS |
- GAMTLBVDBOX5_CLKGATE_DIS | GAMTLBVDBOX4_CLKGATE_DIS |
- GAMTLBVDBOX3_CLKGATE_DIS | GAMTLBVDBOX2_CLKGATE_DIS |
- GAMTLBVDBOX1_CLKGATE_DIS | GAMTLBVDBOX0_CLKGATE_DIS |
- GAMTLBKCR_CLKGATE_DIS | GAMTLBGUC_CLKGATE_DIS |
- GAMTLBBLT_CLKGATE_DIS),
- SET(UNSLCGCTL9444,
- GAMTLBGFXA0_CLKGATE_DIS | GAMTLBGFXA1_CLKGATE_DIS |
- GAMTLBCOMPA0_CLKGATE_DIS | GAMTLBCOMPA1_CLKGATE_DIS |
- GAMTLBCOMPB0_CLKGATE_DIS | GAMTLBCOMPB1_CLKGATE_DIS |
- GAMTLBCOMPC0_CLKGATE_DIS | GAMTLBCOMPC1_CLKGATE_DIS |
- GAMTLBCOMPD0_CLKGATE_DIS | GAMTLBCOMPD1_CLKGATE_DIS |
- GAMTLBMERT_CLKGATE_DIS |
- GAMTLBVEBOX3_CLKGATE_DIS | GAMTLBVEBOX2_CLKGATE_DIS |
- GAMTLBVEBOX1_CLKGATE_DIS | GAMTLBVEBOX0_CLKGATE_DIS))
- },
- { XE_RTP_NAME("14010569222"),
- XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
- XE_RTP_ACTIONS(SET(UNSLICE_UNIT_LEVEL_CLKGATE, GAMEDIA_CLKGATE_DIS))
- },
- { XE_RTP_NAME("14011028019"),
- XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
- XE_RTP_ACTIONS(SET(SSMCGCTL9530, RTFUNIT_CLKGATE_DIS))
- },
- { XE_RTP_NAME("14010680813"),
- XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
- XE_RTP_ACTIONS(SET(XEHP_GAMSTLB_CTRL,
- CONTROL_BLOCK_CLKGATE_DIS |
- EGRESS_BLOCK_CLKGATE_DIS |
- TAG_BLOCK_CLKGATE_DIS))
- },
{ XE_RTP_NAME("14014830051"),
XE_RTP_RULES(PLATFORM(DG2)),
XE_RTP_ACTIONS(CLR(SARB_CHICKEN1, COMP_CKN_IN))
@@ -212,10 +151,6 @@ static const struct xe_rtp_entry_sr gt_was[] = {
INVALIDATION_BROADCAST_MODE_DIS |
GLOBAL_INVALIDATION_MODE))
},
- { XE_RTP_NAME("14010648519"),
- XE_RTP_RULES(PLATFORM(DG2)),
- XE_RTP_ACTIONS(SET(XEHP_L3NODEARBCFG, XEHP_LNESPARE))
- },
/* PVC */
@@ -377,13 +312,6 @@ static const struct xe_rtp_entry_sr engine_was[] = {
POLYGON_TRIFAN_LINELOOP_DISABLE))
},
{ XE_RTP_NAME("22012826095, 22013059131"),
- XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(B0, C0),
- FUNC(xe_rtp_match_first_render_or_compute)),
- XE_RTP_ACTIONS(FIELD_SET(LSC_CHICKEN_BIT_0_UDW,
- MAXREQS_PER_BANK,
- REG_FIELD_PREP(MAXREQS_PER_BANK, 2)))
- },
- { XE_RTP_NAME("22012826095, 22013059131"),
XE_RTP_RULES(SUBPLATFORM(DG2, G11),
FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(FIELD_SET(LSC_CHICKEN_BIT_0_UDW,
@@ -391,27 +319,10 @@ static const struct xe_rtp_entry_sr engine_was[] = {
REG_FIELD_PREP(MAXREQS_PER_BANK, 2)))
},
{ XE_RTP_NAME("22013059131"),
- XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(B0, C0),
- FUNC(xe_rtp_match_first_render_or_compute)),
- XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, FORCE_1_SUB_MESSAGE_PER_FRAGMENT))
- },
- { XE_RTP_NAME("22013059131"),
XE_RTP_RULES(SUBPLATFORM(DG2, G11),
FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, FORCE_1_SUB_MESSAGE_PER_FRAGMENT))
},
- { XE_RTP_NAME("14010918519"),
- XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0),
- FUNC(xe_rtp_match_first_render_or_compute)),
- XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW,
- FORCE_SLM_FENCE_SCOPE_TO_TILE |
- FORCE_UGM_FENCE_SCOPE_TO_TILE,
- /*
- * Ignore read back as it always returns 0 in these
- * steps
- */
- .read_mask = 0))
- },
{ XE_RTP_NAME("14015227452"),
XE_RTP_RULES(PLATFORM(DG2),
FUNC(xe_rtp_match_first_render_or_compute)),
@@ -428,22 +339,12 @@ static const struct xe_rtp_entry_sr engine_was[] = {
FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, UGM_FRAGMENT_THRESHOLD_TO_3))
},
- { XE_RTP_NAME("16011620976, 22015475538"),
+ { XE_RTP_NAME("22015475538"),
XE_RTP_RULES(PLATFORM(DG2),
FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8))
},
{ XE_RTP_NAME("22012654132"),
- XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0),
- FUNC(xe_rtp_match_first_render_or_compute)),
- XE_RTP_ACTIONS(SET(CACHE_MODE_SS, ENABLE_PREFETCH_INTO_IC,
- /*
- * Register can't be read back for verification on
- * DG2 due to Wa_14012342262
- */
- .read_mask = 0))
- },
- { XE_RTP_NAME("22012654132"),
XE_RTP_RULES(SUBPLATFORM(DG2, G11),
FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(CACHE_MODE_SS, ENABLE_PREFETCH_INTO_IC,
@@ -461,68 +362,11 @@ static const struct xe_rtp_entry_sr engine_was[] = {
XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(ROW_CHICKEN2, DISABLE_READ_SUPPRESSION))
},
- { XE_RTP_NAME("14013392000"),
- XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0),
- ENGINE_CLASS(RENDER)),
- XE_RTP_ACTIONS(SET(ROW_CHICKEN2, ENABLE_LARGE_GRF_MODE))
- },
- { XE_RTP_NAME("14012419201"),
- XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0),
- ENGINE_CLASS(RENDER)),
- XE_RTP_ACTIONS(SET(ROW_CHICKEN4,
- DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX))
- },
- { XE_RTP_NAME("14012419201"),
- XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0),
- ENGINE_CLASS(RENDER)),
- XE_RTP_ACTIONS(SET(ROW_CHICKEN4,
- DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX))
- },
- { XE_RTP_NAME("1308578152"),
- XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(B0, C0),
- ENGINE_CLASS(RENDER),
- FUNC(xe_rtp_match_first_gslice_fused_off)),
- XE_RTP_ACTIONS(CLR(CS_DEBUG_MODE1(RENDER_RING_BASE),
- REPLAY_MODE_GRANULARITY))
- },
{ XE_RTP_NAME("22010960976, 14013347512"),
XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(CLR(XEHP_HDC_CHICKEN0,
LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK))
},
- { XE_RTP_NAME("1608949956, 14010198302"),
- XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)),
- XE_RTP_ACTIONS(SET(ROW_CHICKEN,
- MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE))
- },
- { XE_RTP_NAME("22010430635"),
- XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0),
- ENGINE_CLASS(RENDER)),
- XE_RTP_ACTIONS(SET(ROW_CHICKEN4,
- DISABLE_GRF_CLEAR))
- },
- { XE_RTP_NAME("14013202645"),
- XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(B0, C0),
- ENGINE_CLASS(RENDER)),
- XE_RTP_ACTIONS(SET(RT_CTRL, DIS_NULL_QUERY))
- },
- { XE_RTP_NAME("14013202645"),
- XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0),
- ENGINE_CLASS(RENDER)),
- XE_RTP_ACTIONS(SET(RT_CTRL, DIS_NULL_QUERY))
- },
- { XE_RTP_NAME("22012532006"),
- XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0),
- ENGINE_CLASS(RENDER)),
- XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7,
- DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA))
- },
- { XE_RTP_NAME("22012532006"),
- XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0),
- ENGINE_CLASS(RENDER)),
- XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7,
- DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA))
- },
{ XE_RTP_NAME("14015150844"),
XE_RTP_RULES(PLATFORM(DG2), FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(XEHP_HDC_CHICKEN0, DIS_ATOMIC_CHAINING_TYPED_WRITES,
@@ -612,7 +456,10 @@ static const struct xe_rtp_entry_sr engine_was[] = {
PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS,
XE_RTP_ACTION_FLAG(ENGINE_BASE)))
},
-
+ { XE_RTP_NAME("16018610683"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)),
+ XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, SLM_WMTP_RESTORE))
+ },
{}
};
@@ -652,21 +499,6 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
/* DG2 */
- { XE_RTP_NAME("16011186671"),
- XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0)),
- XE_RTP_ACTIONS(CLR(VFLSKPD, DIS_MULT_MISS_RD_SQUASH),
- SET(VFLSKPD, DIS_OVER_FETCH_CACHE))
- },
- { XE_RTP_NAME("14010469329"),
- XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
- XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN3,
- XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE))
- },
- { XE_RTP_NAME("14010698770, 22010613112, 22010465075"),
- XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)),
- XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN3,
- DISABLE_CPS_AWARE_COLOR_PIPE))
- },
{ XE_RTP_NAME("16013271637"),
XE_RTP_RULES(PLATFORM(DG2)),
XE_RTP_ACTIONS(SET(XEHP_SLICE_COMMON_ECO_CHICKEN1,
@@ -708,6 +540,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271)),
XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE))
},
+ { XE_RTP_NAME("14019877138"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271), ENGINE_CLASS(RENDER)),
+ XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT))
+ },
/* Xe2_LPG */
@@ -739,6 +575,11 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD))
},
+ { XE_RTP_NAME("16020183090"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0),
+ ENGINE_CLASS(RENDER)),
+ XE_RTP_ACTIONS(SET(INSTPM(RENDER_RING_BASE), ENABLE_SEMAPHORE_POLL_BIT))
+ },
{}
};
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index 727bdc4292..b138cbd51b 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -1,13 +1,8 @@
22012773006 GRAPHICS_VERSION_RANGE(1200, 1250)
-16011759253 SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)
14014475959 GRAPHICS_VERSION_RANGE(1270, 1271), GRAPHICS_STEP(A0, B0)
PLATFORM(DG2)
22011391025 PLATFORM(DG2)
-14012197797 PLATFORM(DG2), GRAPHICS_STEP(A0, B0)
-16011777198 SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0)
- SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0)
-22012727170 SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0)
- SUBPLATFORM(DG2, G11)
+22012727170 SUBPLATFORM(DG2, G11)
22012727685 SUBPLATFORM(DG2, G11)
16015675438 PLATFORM(PVC)
SUBPLATFORM(DG2, G10)
@@ -22,3 +17,8 @@
14019821291 MEDIA_VERSION_RANGE(1300, 2000)
14015076503 MEDIA_VERSION(1300)
16020292621 GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)
+14018913170 GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)
+ MEDIA_VERSION(2000), GRAPHICS_STEP(A0, A1)
+ GRAPHICS_VERSION_RANGE(1270, 1274)
+ MEDIA_VERSION(1300)
+ PLATFORM(DG2)
diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c
index a75eeba7bf..f697213392 100644
--- a/drivers/gpu/drm/xe/xe_wait_user_fence.c
+++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c
@@ -148,7 +148,7 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data,
if (q) {
if (q->ops->reset_status(q)) {
- drm_info(&xe->drm, "exec gueue reset detected\n");
+ drm_info(&xe->drm, "exec queue reset detected\n");
err = -EIO;
break;
}
diff --git a/drivers/gpu/drm/xe/xe_wopcm_types.h b/drivers/gpu/drm/xe/xe_wopcm_types.h
index 486d850c40..99d34837c4 100644
--- a/drivers/gpu/drm/xe/xe_wopcm_types.h
+++ b/drivers/gpu/drm/xe/xe_wopcm_types.h
@@ -16,9 +16,9 @@ struct xe_wopcm {
u32 size;
/** @guc: GuC WOPCM Region info */
struct {
- /** @base: GuC WOPCM base which is offset from WOPCM base */
+ /** @guc.base: GuC WOPCM base which is offset from WOPCM base */
u32 base;
- /** @size: Size of the GuC WOPCM region */
+ /** @guc.size: Size of the GuC WOPCM region */
u32 size;
} guc;
};