diff options
Diffstat (limited to 'drivers/gpu/drm/xe')
142 files changed, 6605 insertions, 1232 deletions
diff --git a/drivers/gpu/drm/xe/.kunitconfig b/drivers/gpu/drm/xe/.kunitconfig index 9590eac91a..ad4b9b4a9f 100644 --- a/drivers/gpu/drm/xe/.kunitconfig +++ b/drivers/gpu/drm/xe/.kunitconfig @@ -11,3 +11,8 @@ CONFIG_DRM_XE_DISPLAY=n CONFIG_EXPERT=y CONFIG_FB=y CONFIG_DRM_XE_KUNIT_TEST=y +CONFIG_LOCK_DEBUGGING_SUPPORT=y +CONFIG_PROVE_LOCKING=y +CONFIG_DEBUG_MUTEXES=y +CONFIG_LOCKDEP=y +CONFIG_DEBUG_LOCKDEP=y diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index e36ae1f0d8..1a556d087e 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_XE tristate "Intel Xe Graphics" - depends on DRM && PCI && MMU && (m || (y && KUNIT=y)) && 64BIT + depends on DRM && PCI && MMU && (m || (y && KUNIT=y)) select INTERVAL_TREE # we need shmfs for the swappable backing store, and in particular # the shmem_readpage() which depends upon tmpfs @@ -10,6 +10,7 @@ config DRM_XE select DRM_BUDDY select DRM_EXEC select DRM_KMS_HELPER + select DRM_KUNIT_TEST_HELPERS if DRM_XE_KUNIT_TEST != n select DRM_PANEL select DRM_SUBALLOC_HELPER select DRM_DISPLAY_DP_HELPER diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug index 549065f57a..df02e5d17d 100644 --- a/drivers/gpu/drm/xe/Kconfig.debug +++ b/drivers/gpu/drm/xe/Kconfig.debug @@ -76,7 +76,6 @@ config DRM_XE_KUNIT_TEST depends on DRM_XE && KUNIT && DEBUG_FS default KUNIT_ALL_TESTS select DRM_EXPORT_FOR_TESTS if m - select DRM_KUNIT_TEST_HELPERS help Choose this option to allow the driver to perform selftests under the kunit framework diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 1c9e6906b0..c29a850859 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -77,6 +77,7 @@ xe-y += xe_bb.o \ xe_ggtt.o \ xe_gpu_scheduler.o \ xe_gsc.o \ + xe_gsc_proxy.o \ xe_gsc_submit.o \ xe_gt.o \ xe_gt_ccs_mode.o \ @@ -93,6 +94,7 @@ xe-y += xe_bb.o \ xe_guc.o \ xe_guc_ads.o \ xe_guc_ct.o \ + xe_guc_db_mgr.o \ xe_guc_debugfs.o \ xe_guc_hwconfig.o \ xe_guc_log.o \ @@ -138,6 +140,7 @@ xe-y += xe_bb.o \ xe_uc_debugfs.o \ xe_uc_fw.o \ xe_vm.o \ + xe_vram_freq.o \ xe_wait_user_fence.o \ xe_wa.o \ xe_wopcm.o @@ -146,18 +149,25 @@ xe-y += xe_bb.o \ xe-$(CONFIG_HWMON) += xe_hwmon.o # graphics virtualization (SR-IOV) support -xe-y += xe_sriov.o +xe-y += \ + xe_guc_relay.o \ + xe_memirq.o \ + xe_sriov.o xe-$(CONFIG_PCI_IOV) += \ xe_lmtt.o \ xe_lmtt_2l.o \ xe_lmtt_ml.o +# include helpers for tests even when XE is built-in +ifdef CONFIG_DRM_XE_KUNIT_TEST +xe-y += tests/xe_kunit_helpers.o +endif + # i915 Display compat #defines and #includes subdir-ccflags-$(CONFIG_DRM_XE_DISPLAY) += \ -I$(srctree)/$(src)/display/ext \ -I$(srctree)/$(src)/compat-i915-headers \ - -I$(srctree)/drivers/gpu/drm/xe/display/ \ -I$(srctree)/drivers/gpu/drm/i915/display/ \ -Ddrm_i915_gem_object=xe_bo \ -Ddrm_i915_private=xe_device @@ -177,17 +187,17 @@ $(obj)/i915-display/%.o: $(srctree)/drivers/gpu/drm/i915/display/%.c FORCE # Display code specific to xe xe-$(CONFIG_DRM_XE_DISPLAY) += \ - xe_display.o \ - display/xe_fb_pin.o \ - display/xe_hdcp_gsc.o \ - display/xe_plane_initial.o \ - display/xe_display_rps.o \ + display/ext/i915_irq.o \ + display/ext/i915_utils.o \ + display/intel_fb_bo.o \ + display/intel_fbdev_fb.o \ + display/xe_display.o \ display/xe_display_misc.o \ + display/xe_display_rps.o \ display/xe_dsb_buffer.o \ - display/intel_fbdev_fb.o \ - display/intel_fb_bo.o \ - display/ext/i915_irq.o \ - display/ext/i915_utils.o + display/xe_fb_pin.o \ + display/xe_hdcp_gsc.o \ + display/xe_plane_initial.o # SOC code shared with i915 xe-$(CONFIG_DRM_XE_DISPLAY) += \ @@ -214,8 +224,6 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/intel_ddi.o \ i915-display/intel_ddi_buf_trans.o \ i915-display/intel_display.o \ - i915-display/intel_display_debugfs.o \ - i915-display/intel_display_debugfs_params.o \ i915-display/intel_display_device.o \ i915-display/intel_display_driver.o \ i915-display/intel_display_irq.o \ @@ -259,7 +267,6 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/intel_modeset_setup.o \ i915-display/intel_modeset_verify.o \ i915-display/intel_panel.o \ - i915-display/intel_pipe_crc.o \ i915-display/intel_pmdemand.o \ i915-display/intel_pps.o \ i915-display/intel_psr.o \ @@ -286,6 +293,13 @@ ifeq ($(CONFIG_DRM_FBDEV_EMULATION),y) xe-$(CONFIG_DRM_XE_DISPLAY) += i915-display/intel_fbdev.o endif +ifeq ($(CONFIG_DEBUG_FS),y) + xe-$(CONFIG_DRM_XE_DISPLAY) += \ + i915-display/intel_display_debugfs.o \ + i915-display/intel_display_debugfs_params.o \ + i915-display/intel_pipe_crc.o +endif + obj-$(CONFIG_DRM_XE) += xe.o obj-$(CONFIG_DRM_XE_KUNIT_TEST) += tests/ diff --git a/drivers/gpu/drm/xe/abi/gsc_proxy_commands_abi.h b/drivers/gpu/drm/xe/abi/gsc_proxy_commands_abi.h new file mode 100644 index 0000000000..80bbf06a3e --- /dev/null +++ b/drivers/gpu/drm/xe/abi/gsc_proxy_commands_abi.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _ABI_GSC_PROXY_COMMANDS_ABI_H +#define _ABI_GSC_PROXY_COMMANDS_ABI_H + +#include <linux/types.h> + +/* Heci client ID for proxy commands */ +#define HECI_MEADDRESS_PROXY 10 + +/* FW-defined proxy header */ +struct xe_gsc_proxy_header { + /* + * hdr: + * Bits 0-7: type of the proxy message (see enum xe_gsc_proxy_type) + * Bits 8-15: rsvd + * Bits 16-31: length in bytes of the payload following the proxy header + */ + u32 hdr; +#define GSC_PROXY_TYPE GENMASK(7, 0) +#define GSC_PROXY_PAYLOAD_LENGTH GENMASK(31, 16) + + u32 source; /* Source of the Proxy message */ + u32 destination; /* Destination of the Proxy message */ +#define GSC_PROXY_ADDRESSING_KMD 0x10000 +#define GSC_PROXY_ADDRESSING_GSC 0x20000 +#define GSC_PROXY_ADDRESSING_CSME 0x30000 + + u32 status; /* Command status */ +} __packed; + +/* FW-defined proxy types */ +enum xe_gsc_proxy_type { + GSC_PROXY_MSG_TYPE_PROXY_INVALID = 0, + GSC_PROXY_MSG_TYPE_PROXY_QUERY = 1, + GSC_PROXY_MSG_TYPE_PROXY_PAYLOAD = 2, + GSC_PROXY_MSG_TYPE_PROXY_END = 3, + GSC_PROXY_MSG_TYPE_PROXY_NOTIFICATION = 4, +}; + +#endif diff --git a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h new file mode 100644 index 0000000000..5496a58908 --- /dev/null +++ b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h @@ -0,0 +1,174 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _GUC_ACTIONS_PF_ABI_H +#define _GUC_ACTIONS_PF_ABI_H + +#include "guc_communication_ctb_abi.h" + +/** + * DOC: GUC2PF_RELAY_FROM_VF + * + * This message is used by the GuC firmware to forward a VF2PF `Relay Message`_ + * received from the Virtual Function (VF) driver to this Physical Function (PF) + * driver. + * + * This message is always sent as `CTB HXG Message`_. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_EVENT_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF` = 0x5100 | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | **VFID** - source VF identifier | + * +---+-------+--------------------------------------------------------------+ + * | 2 | 31:0 | **RELAY_ID** - VF/PF message ID | + * +---+-------+-----------------+--------------------------------------------+ + * | 3 | 31:0 | **RELAY_DATA1** | | + * +---+-------+-----------------+ | + * |...| | | [Embedded `Relay Message`_] | + * +---+-------+-----------------+ | + * | n | 31:0 | **RELAY_DATAx** | | + * +---+-------+-----------------+--------------------------------------------+ + */ +#define XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF 0x5100 + +#define GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN (GUC_HXG_EVENT_MSG_MIN_LEN + 2u) +#define GUC2PF_RELAY_FROM_VF_EVENT_MSG_MAX_LEN \ + (GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN + GUC_RELAY_MSG_MAX_LEN) +#define GUC2PF_RELAY_FROM_VF_EVENT_MSG_0_MBZ GUC_HXG_EVENT_MSG_0_DATA0 +#define GUC2PF_RELAY_FROM_VF_EVENT_MSG_1_VFID GUC_HXG_EVENT_MSG_n_DATAn +#define GUC2PF_RELAY_FROM_VF_EVENT_MSG_2_RELAY_ID GUC_HXG_EVENT_MSG_n_DATAn +#define GUC2PF_RELAY_FROM_VF_EVENT_MSG_3_RELAY_DATA1 GUC_HXG_EVENT_MSG_n_DATAn +#define GUC2PF_RELAY_FROM_VF_EVENT_MSG_n_RELAY_DATAx GUC_HXG_EVENT_MSG_n_DATAn +#define GUC2PF_RELAY_FROM_VF_EVENT_MSG_NUM_RELAY_DATA GUC_RELAY_MSG_MAX_LEN + +/** + * DOC: PF2GUC_RELAY_TO_VF + * + * This H2G message is used by the Physical Function (PF) driver to send embedded + * VF2PF `Relay Message`_ to the VF. + * + * This action message must be sent over CTB as `CTB HXG Message`_. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = `GUC_HXG_TYPE_FAST_REQUEST`_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`XE_GUC_ACTION_PF2GUC_RELAY_TO_VF` = 0x5101 | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | **VFID** - target VF identifier | + * +---+-------+--------------------------------------------------------------+ + * | 2 | 31:0 | **RELAY_ID** - VF/PF message ID | + * +---+-------+-----------------+--------------------------------------------+ + * | 3 | 31:0 | **RELAY_DATA1** | | + * +---+-------+-----------------+ | + * |...| | | [Embedded `Relay Message`_] | + * +---+-------+-----------------+ | + * | n | 31:0 | **RELAY_DATAx** | | + * +---+-------+-----------------+--------------------------------------------+ + */ +#define XE_GUC_ACTION_PF2GUC_RELAY_TO_VF 0x5101 + +#define PF2GUC_RELAY_TO_VF_REQUEST_MSG_MIN_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 2u) +#define PF2GUC_RELAY_TO_VF_REQUEST_MSG_MAX_LEN \ + (PF2GUC_RELAY_TO_VF_REQUEST_MSG_MIN_LEN + GUC_RELAY_MSG_MAX_LEN) +#define PF2GUC_RELAY_TO_VF_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 +#define PF2GUC_RELAY_TO_VF_REQUEST_MSG_1_VFID GUC_HXG_REQUEST_MSG_n_DATAn +#define PF2GUC_RELAY_TO_VF_REQUEST_MSG_2_RELAY_ID GUC_HXG_REQUEST_MSG_n_DATAn +#define PF2GUC_RELAY_TO_VF_REQUEST_MSG_3_RELAY_DATA1 GUC_HXG_REQUEST_MSG_n_DATAn +#define PF2GUC_RELAY_TO_VF_REQUEST_MSG_n_RELAY_DATAx GUC_HXG_REQUEST_MSG_n_DATAn +#define PF2GUC_RELAY_TO_VF_REQUEST_MSG_NUM_RELAY_DATA GUC_RELAY_MSG_MAX_LEN + +/** + * DOC: GUC2VF_RELAY_FROM_PF + * + * This message is used by the GuC firmware to deliver `Relay Message`_ from the + * Physical Function (PF) driver to this Virtual Function (VF) driver. + * See `GuC Relay Communication`_ for details. + * + * This message is always sent over CTB. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_EVENT_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF` = 0x5102 | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | **RELAY_ID** - VF/PF message ID | + * +---+-------+-----------------+--------------------------------------------+ + * | 2 | 31:0 | **RELAY_DATA1** | | + * +---+-------+-----------------+ | + * |...| | | [Embedded `Relay Message`_] | + * +---+-------+-----------------+ | + * | n | 31:0 | **RELAY_DATAx** | | + * +---+-------+-----------------+--------------------------------------------+ + */ +#define XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF 0x5102 + +#define GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN (GUC_HXG_EVENT_MSG_MIN_LEN + 1u) +#define GUC2VF_RELAY_FROM_PF_EVENT_MSG_MAX_LEN \ + (GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN + GUC_RELAY_MSG_MAX_LEN) +#define GUC2VF_RELAY_FROM_PF_EVENT_MSG_0_MBZ GUC_HXG_EVENT_MSG_0_DATA0 +#define GUC2VF_RELAY_FROM_PF_EVENT_MSG_1_RELAY_ID GUC_HXG_EVENT_MSG_n_DATAn +#define GUC2VF_RELAY_FROM_PF_EVENT_MSG_n_RELAY_DATAx GUC_HXG_EVENT_MSG_n_DATAn +#define GUC2VF_RELAY_FROM_PF_EVENT_MSG_NUM_RELAY_DATA GUC_RELAY_MSG_MAX_LEN + +/** + * DOC: VF2GUC_RELAY_TO_PF + * + * This message is used by the Virtual Function (VF) drivers to communicate with + * the Physical Function (PF) driver and send `Relay Message`_ to the PF driver. + * See `GuC Relay Communication`_ for details. + * + * This message must be sent over CTB. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ or GUC_HXG_TYPE_FAST_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`XE_GUC_ACTION_VF2GUC_RELAY_TO_PF` = 0x5103 | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | **RELAY_ID** - VF/PF message ID | + * +---+-------+-----------------+--------------------------------------------+ + * | 2 | 31:0 | **RELAY_DATA1** | | + * +---+-------+-----------------+ | + * |...| | | [Embedded `Relay Message`_] | + * +---+-------+-----------------+ | + * | n | 31:0 | **RELAY_DATAx** | | + * +---+-------+-----------------+--------------------------------------------+ + */ +#define XE_GUC_ACTION_VF2GUC_RELAY_TO_PF 0x5103 + +#define VF2GUC_RELAY_TO_PF_REQUEST_MSG_MIN_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 1u) +#define VF2GUC_RELAY_TO_PF_REQUEST_MSG_MAX_LEN \ + (VF2GUC_RELAY_TO_PF_REQUEST_MSG_MIN_LEN + GUC_RELAY_MSG_MAX_LEN) +#define VF2GUC_RELAY_TO_PF_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 +#define VF2GUC_RELAY_TO_PF_REQUEST_MSG_1_RELAY_ID GUC_HXG_REQUEST_MSG_n_DATAn +#define VF2GUC_RELAY_TO_PF_REQUEST_MSG_n_RELAY_DATAx GUC_HXG_REQUEST_MSG_n_DATAn +#define VF2GUC_RELAY_TO_PF_REQUEST_MSG_NUM_RELAY_DATA GUC_RELAY_MSG_MAX_LEN + +#endif diff --git a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h index 0b1146d0c9..8f86a16dc5 100644 --- a/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_communication_ctb_abi.h @@ -81,12 +81,13 @@ static_assert(sizeof(struct guc_ct_buffer_desc) == 64); #define GUC_CTB_HDR_LEN 1u #define GUC_CTB_MSG_MIN_LEN GUC_CTB_HDR_LEN -#define GUC_CTB_MSG_MAX_LEN 256u +#define GUC_CTB_MSG_MAX_LEN (GUC_CTB_MSG_MIN_LEN + GUC_CTB_MAX_DWORDS) #define GUC_CTB_MSG_0_FENCE (0xffffu << 16) #define GUC_CTB_MSG_0_FORMAT (0xfu << 12) #define GUC_CTB_FORMAT_HXG 0u #define GUC_CTB_MSG_0_RESERVED (0xfu << 8) #define GUC_CTB_MSG_0_NUM_DWORDS (0xffu << 0) +#define GUC_CTB_MAX_DWORDS 255 /** * DOC: CTB HXG Message diff --git a/drivers/gpu/drm/xe/abi/guc_messages_abi.h b/drivers/gpu/drm/xe/abi/guc_messages_abi.h index 29e414c82d..534a39db77 100644 --- a/drivers/gpu/drm/xe/abi/guc_messages_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_messages_abi.h @@ -24,6 +24,7 @@ * | | 30:28 | **TYPE** - message type | * | | | - _`GUC_HXG_TYPE_REQUEST` = 0 | * | | | - _`GUC_HXG_TYPE_EVENT` = 1 | + * | | | - _`GUC_HXG_TYPE_FAST_REQUEST` = 2 | * | | | - _`GUC_HXG_TYPE_NO_RESPONSE_BUSY` = 3 | * | | | - _`GUC_HXG_TYPE_NO_RESPONSE_RETRY` = 5 | * | | | - _`GUC_HXG_TYPE_RESPONSE_FAILURE` = 6 | @@ -46,6 +47,7 @@ #define GUC_HXG_MSG_0_TYPE (0x7u << 28) #define GUC_HXG_TYPE_REQUEST 0u #define GUC_HXG_TYPE_EVENT 1u +#define GUC_HXG_TYPE_FAST_REQUEST 2u #define GUC_HXG_TYPE_NO_RESPONSE_BUSY 3u #define GUC_HXG_TYPE_NO_RESPONSE_RETRY 5u #define GUC_HXG_TYPE_RESPONSE_FAILURE 6u diff --git a/drivers/gpu/drm/xe/abi/guc_relay_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_relay_actions_abi.h new file mode 100644 index 0000000000..747e428de4 --- /dev/null +++ b/drivers/gpu/drm/xe/abi/guc_relay_actions_abi.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _ABI_GUC_RELAY_ACTIONS_ABI_H_ +#define _ABI_GUC_RELAY_ACTIONS_ABI_H_ + +/** + * DOC: GuC Relay Debug Actions + * + * This range of action codes is reserved for debugging purposes only and should + * be used only on debug builds. These actions may not be supported by the + * production drivers. Their definitions could be changed in the future. + * + * _`GUC_RELAY_ACTION_DEBUG_ONLY_START` = 0xDEB0 + * _`GUC_RELAY_ACTION_DEBUG_ONLY_END` = 0xDEFF + */ + +#define GUC_RELAY_ACTION_DEBUG_ONLY_START 0xDEB0 +#define GUC_RELAY_ACTION_DEBUG_ONLY_END 0xDEFF + +/** + * DOC: VFXPF_TESTLOOP + * + * This `Relay Message`_ is used to selftest the `GuC Relay Communication`_. + * + * The following opcodes are defined: + * VFXPF_TESTLOOP_OPCODE_NOP_ will return no data. + * VFXPF_TESTLOOP_OPCODE_BUSY_ will reply with BUSY response first. + * VFXPF_TESTLOOP_OPCODE_RETRY_ will reply with RETRY response instead. + * VFXPF_TESTLOOP_OPCODE_ECHO_ will return same data as received. + * VFXPF_TESTLOOP_OPCODE_FAIL_ will always fail with error. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ or GUC_HXG_TYPE_FAST_REQUEST_ | + * | | | or GUC_HXG_TYPE_EVENT_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | **OPCODE** | + * | | | - _`VFXPF_TESTLOOP_OPCODE_NOP` = 0x0 | + * | | | - _`VFXPF_TESTLOOP_OPCODE_BUSY` = 0xB | + * | | | - _`VFXPF_TESTLOOP_OPCODE_RETRY` = 0xD | + * | | | - _`VFXPF_TESTLOOP_OPCODE_ECHO` = 0xE | + * | | | - _`VFXPF_TESTLOOP_OPCODE_FAIL` = 0xF | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`IOV_ACTION_SELFTEST_RELAY` | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | **DATA1** = optional, depends on **OPCODE**: | + * | | | for VFXPF_TESTLOOP_OPCODE_BUSY_: time in ms for reply | + * | | | for VFXPF_TESTLOOP_OPCODE_FAIL_: expected error | + * | | | for VFXPF_TESTLOOP_OPCODE_ECHO_: payload | + * +---+-------+--------------------------------------------------------------+ + * |...| 31:0 | **DATAn** = only for **OPCODE** VFXPF_TESTLOOP_OPCODE_ECHO_ | + * +---+-------+--------------------------------------------------------------+ + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | DATA0 = MBZ | + * +---+-------+--------------------------------------------------------------+ + * |...| 31:0 | DATAn = only for **OPCODE** VFXPF_TESTLOOP_OPCODE_ECHO_ | + * +---+-------+--------------------------------------------------------------+ + */ +#define GUC_RELAY_ACTION_VFXPF_TESTLOOP (GUC_RELAY_ACTION_DEBUG_ONLY_START + 1) +#define VFXPF_TESTLOOP_OPCODE_NOP 0x0 +#define VFXPF_TESTLOOP_OPCODE_BUSY 0xB +#define VFXPF_TESTLOOP_OPCODE_RETRY 0xD +#define VFXPF_TESTLOOP_OPCODE_ECHO 0xE +#define VFXPF_TESTLOOP_OPCODE_FAIL 0xF + +#endif diff --git a/drivers/gpu/drm/xe/abi/guc_relay_communication_abi.h b/drivers/gpu/drm/xe/abi/guc_relay_communication_abi.h new file mode 100644 index 0000000000..f92625f047 --- /dev/null +++ b/drivers/gpu/drm/xe/abi/guc_relay_communication_abi.h @@ -0,0 +1,118 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _ABI_GUC_RELAY_COMMUNICATION_ABI_H +#define _ABI_GUC_RELAY_COMMUNICATION_ABI_H + +#include <linux/build_bug.h> + +#include "guc_actions_sriov_abi.h" +#include "guc_communication_ctb_abi.h" +#include "guc_messages_abi.h" + +/** + * DOC: GuC Relay Communication + * + * The communication between Virtual Function (VF) drivers and Physical Function + * (PF) drivers is based on the GuC firmware acting as a proxy (relay) agent. + * + * To communicate with the PF driver, VF's drivers use `VF2GUC_RELAY_TO_PF`_ + * action that takes the `Relay Message`_ as opaque payload and requires the + * relay message identifier (RID) as additional parameter. + * + * This identifier is used by the drivers to match related messages. + * + * The GuC forwards this `Relay Message`_ and its identifier to the PF driver + * in `GUC2PF_RELAY_FROM_VF`_ action. This event message additionally contains + * the identifier of the origin VF (VFID). + * + * Likewise, to communicate with the VF drivers, PF driver use + * `VF2GUC_RELAY_TO_PF`_ action that in addition to the `Relay Message`_ + * and the relay message identifier (RID) also takes the target VF identifier. + * + * The GuC uses this target VFID from the message to select where to send the + * `GUC2VF_RELAY_FROM_PF`_ with the embedded `Relay Message`_ with response:: + * + * VF GuC PF + * | | | + * [ ] VF2GUC_RELAY_TO_PF | | + * [ ]---------------------------> [ ] | + * [ ] { rid, msg } [ ] | + * [ ] [ ] GUC2PF_RELAY_FROM_VF | + * [ ] [ ]---------------------------> [ ] + * [ ] | { VFID, rid, msg } [ ] + * [ ] | [ ] + * [ ] | PF2GUC_RELAY_TO_VF [ ] + * [ ] [ ] <---------------------------[ ] + * [ ] [ ] { VFID, rid, reply } | + * [ ] GUC2VF_RELAY_FROM_PF [ ] | + * [ ] <---------------------------[ ] | + * | { rid, reply } | | + * | | | + * + * It is also possible that PF driver will initiate communication with the + * selected VF driver. The same GuC action messages will be used:: + * + * VF GuC PF + * | | | + * | | PF2GUC_RELAY_TO_VF [ ] + * | [ ] <---------------------------[ ] + * | [ ] { VFID, rid, msg } [ ] + * | GUC2VF_RELAY_FROM_PF [ ] [ ] + * [ ] <---------------------------[ ] [ ] + * [ ] { rid, msg } | [ ] + * [ ] | [ ] + * [ ] VF2GUC_RELAY_TO_PF | [ ] + * [ ]---------------------------> [ ] [ ] + * | { rid, reply } [ ] [ ] + * | [ ] GUC2PF_RELAY_FROM_VF [ ] + * | [ ]---------------------------> [ ] + * | | { VFID, rid, reply } | + * | | | + */ + +/** + * DOC: Relay Message + * + * The `Relay Message`_ is used by Physical Function (PF) driver and Virtual + * Function (VF) drivers to communicate using `GuC Relay Communication`_. + * + * Format of the `Relay Message`_ follows format of the generic `HXG Message`_. + * + * +--------------------------------------------------------------------------+ + * | `Relay Message`_ | + * +==========================================================================+ + * | `HXG Message`_ | + * +--------------------------------------------------------------------------+ + * + * Maximum length of the `Relay Message`_ is limited by the maximum length of + * the `CTB HXG Message`_ and format of the `GUC2PF_RELAY_FROM_VF`_ message. + */ + +#define GUC_RELAY_MSG_MIN_LEN GUC_HXG_MSG_MIN_LEN +#define GUC_RELAY_MSG_MAX_LEN \ + (GUC_CTB_MAX_DWORDS - GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN) + +static_assert(PF2GUC_RELAY_TO_VF_REQUEST_MSG_MIN_LEN > + VF2GUC_RELAY_TO_PF_REQUEST_MSG_MIN_LEN); + +/** + * DOC: Relay Error Codes + * + * The `GuC Relay Communication`_ can be used to pass `Relay Message`_ between + * drivers that run on different Operating Systems. To help in troubleshooting, + * `GuC Relay Communication`_ uses error codes that mostly match errno values. + */ + +#define GUC_RELAY_ERROR_UNDISCLOSED 0 +#define GUC_RELAY_ERROR_OPERATION_NOT_PERMITTED 1 /* EPERM */ +#define GUC_RELAY_ERROR_PERMISSION_DENIED 13 /* EACCES */ +#define GUC_RELAY_ERROR_INVALID_ARGUMENT 22 /* EINVAL */ +#define GUC_RELAY_ERROR_INVALID_REQUEST_CODE 56 /* EBADRQC */ +#define GUC_RELAY_ERROR_NO_DATA_AVAILABLE 61 /* ENODATA */ +#define GUC_RELAY_ERROR_PROTOCOL_ERROR 71 /* EPROTO */ +#define GUC_RELAY_ERROR_MESSAGE_SIZE 90 /* EMSGSIZE */ + +#endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h index d9a81e6a4b..854a7bb535 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h @@ -163,18 +163,18 @@ static inline struct drm_i915_private *kdev_to_i915(struct device *kdev) #include "intel_wakeref.h" -static inline bool intel_runtime_pm_get(struct xe_runtime_pm *pm) +static inline intel_wakeref_t intel_runtime_pm_get(struct xe_runtime_pm *pm) { struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm); if (xe_pm_runtime_get(xe) < 0) { xe_pm_runtime_put(xe); - return false; + return 0; } - return true; + return 1; } -static inline bool intel_runtime_pm_get_if_in_use(struct xe_runtime_pm *pm) +static inline intel_wakeref_t intel_runtime_pm_get_if_in_use(struct xe_runtime_pm *pm) { struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm); @@ -188,7 +188,7 @@ static inline void intel_runtime_pm_put_unchecked(struct xe_runtime_pm *pm) xe_pm_runtime_put(xe); } -static inline void intel_runtime_pm_put(struct xe_runtime_pm *pm, bool wakeref) +static inline void intel_runtime_pm_put(struct xe_runtime_pm *pm, intel_wakeref_t wakeref) { if (wakeref) intel_runtime_pm_put_unchecked(pm); diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h index 888e7a87a9..bd233007c1 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h @@ -19,6 +19,9 @@ static inline int i915_gem_stolen_insert_node_in_range(struct xe_device *xe, int err; u32 flags = XE_BO_CREATE_PINNED_BIT | XE_BO_CREATE_STOLEN_BIT; + if (align) + size = ALIGN(size, align); + bo = xe_bo_create_locked_range(xe, xe_device_get_root_tile(xe), NULL, size, start, end, ttm_bo_type_kernel, flags); diff --git a/drivers/gpu/drm/xe/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 6ec375c1c4..6ec375c1c4 100644 --- a/drivers/gpu/drm/xe/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c diff --git a/drivers/gpu/drm/xe/xe_display.h b/drivers/gpu/drm/xe/display/xe_display.h index 710e56180b..710e56180b 100644 --- a/drivers/gpu/drm/xe/xe_display.h +++ b/drivers/gpu/drm/xe/display/xe_display.h diff --git a/drivers/gpu/drm/xe/display/xe_plane_initial.c b/drivers/gpu/drm/xe/display/xe_plane_initial.c index ccf83c12b5..866d1dd6ee 100644 --- a/drivers/gpu/drm/xe/display/xe_plane_initial.c +++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c @@ -10,6 +10,7 @@ #include "i915_drv.h" #include "intel_atomic_plane.h" +#include "intel_crtc.h" #include "intel_display.h" #include "intel_display_types.h" #include "intel_fb.h" @@ -18,19 +19,20 @@ #include "intel_plane_initial.h" static bool -intel_reuse_initial_plane_obj(struct drm_i915_private *i915, - const struct intel_initial_plane_config *plane_config, +intel_reuse_initial_plane_obj(struct intel_crtc *this, + const struct intel_initial_plane_config plane_configs[], struct drm_framebuffer **fb) { + struct drm_i915_private *i915 = to_i915(this->base.dev); struct intel_crtc *crtc; for_each_intel_crtc(&i915->drm, crtc) { - struct intel_crtc_state *crtc_state = - to_intel_crtc_state(crtc->base.state); struct intel_plane *plane = to_intel_plane(crtc->base.primary); - struct intel_plane_state *plane_state = + const struct intel_plane_state *plane_state = to_intel_plane_state(plane->base.state); + const struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); if (!crtc_state->uapi.active) continue; @@ -38,7 +40,7 @@ intel_reuse_initial_plane_obj(struct drm_i915_private *i915, if (!plane_state->ggtt_vma) continue; - if (intel_plane_ggtt_offset(plane_state) == plane_config->base) { + if (plane_configs[this->pipe].base == plane_configs[crtc->pipe].base) { *fb = plane_state->hw.fb; return true; } @@ -178,10 +180,10 @@ err_bo: static void intel_find_initial_plane_obj(struct intel_crtc *crtc, - struct intel_initial_plane_config *plane_config) + struct intel_initial_plane_config plane_configs[]) { - struct drm_device *dev = crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_initial_plane_config *plane_config = + &plane_configs[crtc->pipe]; struct intel_plane *plane = to_intel_plane(crtc->base.primary); struct intel_plane_state *plane_state = @@ -201,7 +203,7 @@ intel_find_initial_plane_obj(struct intel_crtc *crtc, if (intel_alloc_initial_plane_obj(crtc, plane_config)) fb = &plane_config->fb->base; - else if (!intel_reuse_initial_plane_obj(dev_priv, plane_config, &fb)) + else if (!intel_reuse_initial_plane_obj(crtc, plane_configs, &fb)) goto nofb; plane_state->uapi.rotation = plane_config->rotation; @@ -267,25 +269,36 @@ static void plane_config_fini(struct intel_initial_plane_config *plane_config) } } -void intel_crtc_initial_plane_config(struct intel_crtc *crtc) +void intel_initial_plane_config(struct drm_i915_private *i915) { - struct xe_device *xe = to_xe_device(crtc->base.dev); - struct intel_initial_plane_config plane_config = {}; + struct intel_initial_plane_config plane_configs[I915_MAX_PIPES] = {}; + struct intel_crtc *crtc; - /* - * Note that reserving the BIOS fb up front prevents us - * from stuffing other stolen allocations like the ring - * on top. This prevents some ugliness at boot time, and - * can even allow for smooth boot transitions if the BIOS - * fb is large enough for the active pipe configuration. - */ - xe->display.funcs.display->get_initial_plane_config(crtc, &plane_config); + for_each_intel_crtc(&i915->drm, crtc) { + struct intel_initial_plane_config *plane_config = + &plane_configs[crtc->pipe]; - /* - * If the fb is shared between multiple heads, we'll - * just get the first one. - */ - intel_find_initial_plane_obj(crtc, &plane_config); + if (!to_intel_crtc_state(crtc->base.state)->uapi.active) + continue; - plane_config_fini(&plane_config); + /* + * Note that reserving the BIOS fb up front prevents us + * from stuffing other stolen allocations like the ring + * on top. This prevents some ugliness at boot time, and + * can even allow for smooth boot transitions if the BIOS + * fb is large enough for the active pipe configuration. + */ + i915->display.funcs.display->get_initial_plane_config(crtc, plane_config); + + /* + * If the fb is shared between multiple heads, we'll + * just get the first one. + */ + intel_find_initial_plane_obj(crtc, plane_configs); + + if (i915->display.funcs.display->fixup_initial_plane_config(crtc, plane_config)) + intel_crtc_wait_for_next_vblank(crtc); + + plane_config_fini(plane_config); + } } diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h index 1cfa96167f..c74ceb550d 100644 --- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h @@ -56,6 +56,9 @@ #define MI_FLUSH_IMM_QW REG_FIELD_PREP(MI_FLUSH_DW_LEN_DW, 5 - 2) #define MI_FLUSH_DW_USE_GTT REG_BIT(2) +#define MI_LOAD_REGISTER_MEM (__MI_INSTR(0x29) | XE_INSTR_NUM_DW(4)) +#define MI_LRM_USE_GGTT REG_BIT(22) + #define MI_BATCH_BUFFER_START __MI_INSTR(0x31) #endif diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index 81b8362e93..deddc8be48 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -75,12 +75,17 @@ #define FF_THREAD_MODE(base) XE_REG((base) + 0xa0) #define FF_TESSELATION_DOP_GATE_DISABLE BIT(19) +#define RING_INT_SRC_RPT_PTR(base) XE_REG((base) + 0xa4) #define RING_IMR(base) XE_REG((base) + 0xa8) +#define RING_INT_STATUS_RPT_PTR(base) XE_REG((base) + 0xac) #define RING_EIR(base) XE_REG((base) + 0xb0) #define RING_EMR(base) XE_REG((base) + 0xb4) #define RING_ESR(base) XE_REG((base) + 0xb8) +#define INSTPM(base) XE_REG((base) + 0xc0, XE_REG_OPTION_MASKED) +#define ENABLE_SEMAPHORE_POLL_BIT REG_BIT(13) + #define RING_CMD_CCTL(base) XE_REG((base) + 0xc4, XE_REG_OPTION_MASKED) /* * CMD_CCTL read/write fields take a MOCS value and _not_ a table index. @@ -136,6 +141,7 @@ #define TAIL_ADDR 0x001FFFF8 #define RING_CTX_TIMESTAMP(base) XE_REG((base) + 0x3a8) +#define CSBE_DEBUG_STATUS(base) XE_REG((base) + 0x3fc) #define RING_FORCE_TO_NONPRIV(base, i) XE_REG(((base) + 0x4d0) + (i) * 4) #define RING_FORCE_TO_NONPRIV_DENY REG_BIT(30) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 1dd361046b..15ac2d284d 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -144,8 +144,12 @@ #define GSCPSMI_BASE XE_REG(0x880c) +#define CCCHKNREG1 XE_REG_MCR(0x8828) +#define ENCOMPPERFFIX REG_BIT(18) + /* Fuse readout registers for GT */ #define XEHP_FUSE4 XE_REG(0x9114) +#define CFEG_WMTP_DISABLE REG_BIT(20) #define CCS_EN_MASK REG_GENMASK(19, 16) #define GT_L3_EXC_MASK REG_GENMASK(6, 4) @@ -288,6 +292,9 @@ #define XEHP_L3NODEARBCFG XE_REG_MCR(0xb0b4) #define XEHP_LNESPARE REG_BIT(19) +#define L3SQCREG3 XE_REG_MCR(0xb108) +#define COMPPWOVERFETCHEN REG_BIT(28) + #define XEHP_L3SQCREG5 XE_REG_MCR(0xb158) #define L3_PWM_TIMER_INIT_VAL_MASK REG_GENMASK(9, 0) @@ -344,6 +351,9 @@ #define ROW_CHICKEN3 XE_REG_MCR(0xe49c, XE_REG_OPTION_MASKED) #define DIS_FIX_EOT1_FLUSH REG_BIT(9) +#define TDL_TSL_CHICKEN XE_REG_MCR(0xe4c4, XE_REG_OPTION_MASKED) +#define SLM_WMTP_RESTORE REG_BIT(11) + #define ROW_CHICKEN XE_REG_MCR(0xe4f0, XE_REG_OPTION_MASKED) #define UGM_BACKUP_MODE REG_BIT(13) #define MDQ_ARBITRATION_MODE REG_BIT(12) @@ -430,6 +440,15 @@ #define VOLTAGE_MASK REG_GENMASK(10, 0) #define GT_INTR_DW(x) XE_REG(0x190018 + ((x) * 4)) +#define INTR_GSC REG_BIT(31) +#define INTR_GUC REG_BIT(25) +#define INTR_MGUC REG_BIT(24) +#define INTR_BCS8 REG_BIT(23) +#define INTR_BCS(x) REG_BIT(15 - (x)) +#define INTR_CCS(x) REG_BIT(4 + (x)) +#define INTR_RCS0 REG_BIT(0) +#define INTR_VECS(x) REG_BIT(31 - (x)) +#define INTR_VCS(x) REG_BIT(x) #define RENDER_COPY_INTR_ENABLE XE_REG(0x190030) #define VCS_VECS_INTR_ENABLE XE_REG(0x190034) @@ -446,6 +465,7 @@ #define INTR_ENGINE_CLASS(x) REG_FIELD_GET(GENMASK(18, 16), x) #define INTR_ENGINE_INTR(x) REG_FIELD_GET(GENMASK(15, 0), x) #define OTHER_GUC_INSTANCE 0 +#define OTHER_GSC_HECI2_INSTANCE 3 #define OTHER_GSC_INSTANCE 6 #define IIR_REG_SELECTOR(x) XE_REG(0x190070 + ((x) * 4)) @@ -454,6 +474,7 @@ #define VCS0_VCS1_INTR_MASK XE_REG(0x1900a8) #define VCS2_VCS3_INTR_MASK XE_REG(0x1900ac) #define VECS0_VECS1_INTR_MASK XE_REG(0x1900d0) +#define HECI2_RSVD_INTR_MASK XE_REG(0x1900e4) #define GUC_SG_INTR_MASK XE_REG(0x1900e8) #define GPM_WGBOXPERF_INTR_MASK XE_REG(0x1900ec) #define GUNIT_GSC_INTR_MASK XE_REG(0x1900f4) @@ -469,10 +490,4 @@ #define GT_CS_MASTER_ERROR_INTERRUPT REG_BIT(3) #define GT_RENDER_USER_INTERRUPT REG_BIT(0) -#define PVC_GT0_PACKAGE_ENERGY_STATUS XE_REG(0x281004) -#define PVC_GT0_PACKAGE_RAPL_LIMIT XE_REG(0x281008) -#define PVC_GT0_PACKAGE_POWER_SKU_UNIT XE_REG(0x281068) -#define PVC_GT0_PLATFORM_ENERGY_STATUS XE_REG(0x28106c) -#define PVC_GT0_PACKAGE_POWER_SKU XE_REG(0x281080) - #endif diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h index 4be81abc86..1825d8f79d 100644 --- a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h +++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h @@ -14,4 +14,13 @@ #define CTX_PDP0_UDW (0x30 + 1) #define CTX_PDP0_LDW (0x32 + 1) +#define CTX_LRM_INT_MASK_ENABLE 0x50 +#define CTX_INT_MASK_ENABLE_REG (CTX_LRM_INT_MASK_ENABLE + 1) +#define CTX_INT_MASK_ENABLE_PTR (CTX_LRM_INT_MASK_ENABLE + 2) +#define CTX_LRI_INT_REPORT_PTR 0x55 +#define CTX_INT_STATUS_REPORT_REG (CTX_LRI_INT_REPORT_PTR + 1) +#define CTX_INT_STATUS_REPORT_PTR (CTX_LRI_INT_REPORT_PTR + 2) +#define CTX_INT_SRC_REPORT_REG (CTX_LRI_INT_REPORT_PTR + 3) +#define CTX_INT_SRC_REPORT_PTR (CTX_LRI_INT_REPORT_PTR + 4) + #endif diff --git a/drivers/gpu/drm/xe/regs/xe_pcode_regs.h b/drivers/gpu/drm/xe/regs/xe_pcode_regs.h new file mode 100644 index 0000000000..3dae858508 --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_pcode_regs.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _XE_PCODE_REGS_H_ +#define _XE_PCODE_REGS_H_ + +#include "regs/xe_reg_defs.h" + +/* + * This file contains addresses of PCODE registers visible through GT MMIO space. + */ + +#define PVC_GT0_PACKAGE_ENERGY_STATUS XE_REG(0x281004) +#define PVC_GT0_PACKAGE_RAPL_LIMIT XE_REG(0x281008) +#define PVC_GT0_PACKAGE_POWER_SKU_UNIT XE_REG(0x281068) +#define PVC_GT0_PLATFORM_ENERGY_STATUS XE_REG(0x28106c) +#define PVC_GT0_PACKAGE_POWER_SKU XE_REG(0x281080) + +#endif /* _XE_PCODE_REGS_H_ */ diff --git a/drivers/gpu/drm/xe/tests/Makefile b/drivers/gpu/drm/xe/tests/Makefile index 39d8a08922..9d1d88af8b 100644 --- a/drivers/gpu/drm/xe/tests/Makefile +++ b/drivers/gpu/drm/xe/tests/Makefile @@ -1,10 +1,15 @@ # SPDX-License-Identifier: GPL-2.0 +# "live" kunit tests obj-$(CONFIG_DRM_XE_KUNIT_TEST) += \ xe_bo_test.o \ xe_dma_buf_test.o \ xe_migrate_test.o \ - xe_mocs_test.o \ + xe_mocs_test.o + +# Normal kunit tests +obj-$(CONFIG_DRM_XE_KUNIT_TEST) += xe_test.o +xe_test-y = xe_test_mod.o \ xe_pci_test.o \ xe_rtp_test.o \ xe_wa_test.o diff --git a/drivers/gpu/drm/xe/tests/xe_guc_db_mgr_test.c b/drivers/gpu/drm/xe/tests/xe_guc_db_mgr_test.c new file mode 100644 index 0000000000..a87a7b4b04 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_guc_db_mgr_test.c @@ -0,0 +1,201 @@ +// SPDX-License-Identifier: GPL-2.0 AND MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include <kunit/test.h> + +#include "xe_device.h" +#include "xe_kunit_helpers.h" + +static int guc_dbm_test_init(struct kunit *test) +{ + struct xe_guc_db_mgr *dbm; + + xe_kunit_helper_xe_device_test_init(test); + dbm = &xe_device_get_gt(test->priv, 0)->uc.guc.dbm; + + mutex_init(dbm_mutex(dbm)); + test->priv = dbm; + return 0; +} + +static void test_empty(struct kunit *test) +{ + struct xe_guc_db_mgr *dbm = test->priv; + + KUNIT_ASSERT_EQ(test, xe_guc_db_mgr_init(dbm, 0), 0); + KUNIT_ASSERT_EQ(test, dbm->count, 0); + + mutex_lock(dbm_mutex(dbm)); + KUNIT_EXPECT_LT(test, xe_guc_db_mgr_reserve_id_locked(dbm), 0); + mutex_unlock(dbm_mutex(dbm)); + + KUNIT_EXPECT_LT(test, xe_guc_db_mgr_reserve_range(dbm, 1, 0), 0); +} + +static void test_default(struct kunit *test) +{ + struct xe_guc_db_mgr *dbm = test->priv; + + KUNIT_ASSERT_EQ(test, xe_guc_db_mgr_init(dbm, ~0), 0); + KUNIT_ASSERT_EQ(test, dbm->count, GUC_NUM_DOORBELLS); +} + +static const unsigned int guc_dbm_params[] = { + GUC_NUM_DOORBELLS / 64, + GUC_NUM_DOORBELLS / 32, + GUC_NUM_DOORBELLS / 8, + GUC_NUM_DOORBELLS, +}; + +static void uint_param_get_desc(const unsigned int *p, char *desc) +{ + snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%u", *p); +} + +KUNIT_ARRAY_PARAM(guc_dbm, guc_dbm_params, uint_param_get_desc); + +static void test_size(struct kunit *test) +{ + const unsigned int *p = test->param_value; + struct xe_guc_db_mgr *dbm = test->priv; + unsigned int n; + int id; + + KUNIT_ASSERT_EQ(test, xe_guc_db_mgr_init(dbm, *p), 0); + KUNIT_ASSERT_EQ(test, dbm->count, *p); + + mutex_lock(dbm_mutex(dbm)); + for (n = 0; n < *p; n++) { + KUNIT_EXPECT_GE(test, id = xe_guc_db_mgr_reserve_id_locked(dbm), 0); + KUNIT_EXPECT_LT(test, id, dbm->count); + } + KUNIT_EXPECT_LT(test, xe_guc_db_mgr_reserve_id_locked(dbm), 0); + mutex_unlock(dbm_mutex(dbm)); + + mutex_lock(dbm_mutex(dbm)); + for (n = 0; n < *p; n++) + xe_guc_db_mgr_release_id_locked(dbm, n); + mutex_unlock(dbm_mutex(dbm)); +} + +static void test_reuse(struct kunit *test) +{ + const unsigned int *p = test->param_value; + struct xe_guc_db_mgr *dbm = test->priv; + unsigned int n; + + KUNIT_ASSERT_EQ(test, xe_guc_db_mgr_init(dbm, *p), 0); + + mutex_lock(dbm_mutex(dbm)); + for (n = 0; n < *p; n++) + KUNIT_EXPECT_GE(test, xe_guc_db_mgr_reserve_id_locked(dbm), 0); + KUNIT_EXPECT_LT(test, xe_guc_db_mgr_reserve_id_locked(dbm), 0); + mutex_unlock(dbm_mutex(dbm)); + + mutex_lock(dbm_mutex(dbm)); + for (n = 0; n < *p; n++) { + xe_guc_db_mgr_release_id_locked(dbm, n); + KUNIT_EXPECT_EQ(test, xe_guc_db_mgr_reserve_id_locked(dbm), n); + } + KUNIT_EXPECT_LT(test, xe_guc_db_mgr_reserve_id_locked(dbm), 0); + mutex_unlock(dbm_mutex(dbm)); + + mutex_lock(dbm_mutex(dbm)); + for (n = 0; n < *p; n++) + xe_guc_db_mgr_release_id_locked(dbm, n); + mutex_unlock(dbm_mutex(dbm)); +} + +static void test_range_overlap(struct kunit *test) +{ + const unsigned int *p = test->param_value; + struct xe_guc_db_mgr *dbm = test->priv; + int id1, id2, id3; + unsigned int n; + + KUNIT_ASSERT_EQ(test, xe_guc_db_mgr_init(dbm, ~0), 0); + KUNIT_ASSERT_LE(test, *p, dbm->count); + + KUNIT_ASSERT_GE(test, id1 = xe_guc_db_mgr_reserve_range(dbm, *p, 0), 0); + for (n = 0; n < dbm->count - *p; n++) { + KUNIT_ASSERT_GE(test, id2 = xe_guc_db_mgr_reserve_range(dbm, 1, 0), 0); + KUNIT_ASSERT_NE(test, id2, id1); + KUNIT_ASSERT_NE_MSG(test, id2 < id1, id2 > id1 + *p - 1, + "id1=%d id2=%d", id1, id2); + } + KUNIT_ASSERT_LT(test, xe_guc_db_mgr_reserve_range(dbm, 1, 0), 0); + xe_guc_db_mgr_release_range(dbm, 0, dbm->count); + + if (*p >= 1) { + KUNIT_ASSERT_GE(test, id1 = xe_guc_db_mgr_reserve_range(dbm, 1, 0), 0); + KUNIT_ASSERT_GE(test, id2 = xe_guc_db_mgr_reserve_range(dbm, *p - 1, 0), 0); + KUNIT_ASSERT_NE(test, id2, id1); + KUNIT_ASSERT_NE_MSG(test, id1 < id2, id1 > id2 + *p - 2, + "id1=%d id2=%d", id1, id2); + for (n = 0; n < dbm->count - *p; n++) { + KUNIT_ASSERT_GE(test, id3 = xe_guc_db_mgr_reserve_range(dbm, 1, 0), 0); + KUNIT_ASSERT_NE(test, id3, id1); + KUNIT_ASSERT_NE(test, id3, id2); + KUNIT_ASSERT_NE_MSG(test, id3 < id2, id3 > id2 + *p - 2, + "id3=%d id2=%d", id3, id2); + } + KUNIT_ASSERT_LT(test, xe_guc_db_mgr_reserve_range(dbm, 1, 0), 0); + xe_guc_db_mgr_release_range(dbm, 0, dbm->count); + } +} + +static void test_range_compact(struct kunit *test) +{ + const unsigned int *p = test->param_value; + struct xe_guc_db_mgr *dbm = test->priv; + unsigned int n; + + KUNIT_ASSERT_EQ(test, xe_guc_db_mgr_init(dbm, ~0), 0); + KUNIT_ASSERT_NE(test, *p, 0); + KUNIT_ASSERT_LE(test, *p, dbm->count); + if (dbm->count % *p) + kunit_skip(test, "must be divisible"); + + KUNIT_ASSERT_GE(test, xe_guc_db_mgr_reserve_range(dbm, *p, 0), 0); + for (n = 1; n < dbm->count / *p; n++) + KUNIT_ASSERT_GE(test, xe_guc_db_mgr_reserve_range(dbm, *p, 0), 0); + KUNIT_ASSERT_LT(test, xe_guc_db_mgr_reserve_range(dbm, 1, 0), 0); + xe_guc_db_mgr_release_range(dbm, 0, dbm->count); +} + +static void test_range_spare(struct kunit *test) +{ + const unsigned int *p = test->param_value; + struct xe_guc_db_mgr *dbm = test->priv; + int id; + + KUNIT_ASSERT_EQ(test, xe_guc_db_mgr_init(dbm, ~0), 0); + KUNIT_ASSERT_LE(test, *p, dbm->count); + + KUNIT_ASSERT_LT(test, xe_guc_db_mgr_reserve_range(dbm, *p, dbm->count), 0); + KUNIT_ASSERT_LT(test, xe_guc_db_mgr_reserve_range(dbm, *p, dbm->count - *p + 1), 0); + KUNIT_ASSERT_EQ(test, id = xe_guc_db_mgr_reserve_range(dbm, *p, dbm->count - *p), 0); + KUNIT_ASSERT_LT(test, xe_guc_db_mgr_reserve_range(dbm, 1, dbm->count - *p), 0); + xe_guc_db_mgr_release_range(dbm, id, *p); +} + +static struct kunit_case guc_dbm_test_cases[] = { + KUNIT_CASE(test_empty), + KUNIT_CASE(test_default), + KUNIT_CASE_PARAM(test_size, guc_dbm_gen_params), + KUNIT_CASE_PARAM(test_reuse, guc_dbm_gen_params), + KUNIT_CASE_PARAM(test_range_overlap, guc_dbm_gen_params), + KUNIT_CASE_PARAM(test_range_compact, guc_dbm_gen_params), + KUNIT_CASE_PARAM(test_range_spare, guc_dbm_gen_params), + {} +}; + +static struct kunit_suite guc_dbm_suite = { + .name = "guc_dbm", + .test_cases = guc_dbm_test_cases, + .init = guc_dbm_test_init, +}; + +kunit_test_suites(&guc_dbm_suite); diff --git a/drivers/gpu/drm/xe/tests/xe_guc_relay_test.c b/drivers/gpu/drm/xe/tests/xe_guc_relay_test.c new file mode 100644 index 0000000000..13701451b9 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_guc_relay_test.c @@ -0,0 +1,522 @@ +// SPDX-License-Identifier: GPL-2.0 AND MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include <kunit/static_stub.h> +#include <kunit/test.h> +#include <kunit/test-bug.h> + +#include "xe_device.h" +#include "xe_kunit_helpers.h" +#include "xe_pci_test.h" + +#define TEST_RID 1234 +#define TEST_VFID 5 +#define TEST_LEN 6 +#define TEST_ACTION 0xa +#define TEST_DATA(n) (0xd0 + (n)) + +static int replacement_relay_get_totalvfs(struct xe_guc_relay *relay) +{ + return TEST_VFID; +} + +static int relay_test_init(struct kunit *test) +{ + struct xe_pci_fake_data fake = { + .sriov_mode = XE_SRIOV_MODE_PF, + .platform = XE_TIGERLAKE, /* some random platform */ + .subplatform = XE_SUBPLATFORM_NONE, + }; + struct xe_guc_relay *relay; + struct xe_device *xe; + + test->priv = &fake; + xe_kunit_helper_xe_device_test_init(test); + + xe = test->priv; + KUNIT_ASSERT_EQ(test, xe_sriov_init(xe), 0); + + relay = &xe_device_get_gt(xe, 0)->uc.guc.relay; + kunit_activate_static_stub(test, relay_get_totalvfs, + replacement_relay_get_totalvfs); + + KUNIT_ASSERT_EQ(test, xe_guc_relay_init(relay), 0); + KUNIT_EXPECT_TRUE(test, relay_is_ready(relay)); + relay->last_rid = TEST_RID - 1; + + test->priv = relay; + return 0; +} + +static const u32 TEST_MSG[TEST_LEN] = { + FIELD_PREP_CONST(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP_CONST(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) | + FIELD_PREP_CONST(GUC_HXG_EVENT_MSG_0_ACTION, TEST_ACTION) | + FIELD_PREP_CONST(GUC_HXG_EVENT_MSG_0_DATA0, TEST_DATA(0)), + TEST_DATA(1), TEST_DATA(2), TEST_DATA(3), TEST_DATA(4), +}; + +static int replacement_xe_guc_ct_send_recv_always_fails(struct xe_guc_ct *ct, + const u32 *msg, u32 len, + u32 *response_buffer) +{ + struct kunit *test = kunit_get_current_test(); + + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ct); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, msg); + KUNIT_ASSERT_GE(test, len, GUC_HXG_MSG_MIN_LEN); + + return -ECOMM; +} + +static int replacement_xe_guc_ct_send_recv_expects_pf2guc_relay(struct xe_guc_ct *ct, + const u32 *msg, u32 len, + u32 *response_buffer) +{ + struct kunit *test = kunit_get_current_test(); + + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ct); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, msg); + KUNIT_ASSERT_GE(test, len, PF2GUC_RELAY_TO_VF_REQUEST_MSG_MIN_LEN); + KUNIT_ASSERT_EQ(test, len, PF2GUC_RELAY_TO_VF_REQUEST_MSG_MIN_LEN + TEST_LEN); + KUNIT_EXPECT_EQ(test, GUC_HXG_ORIGIN_HOST, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0])); + KUNIT_EXPECT_EQ(test, GUC_HXG_TYPE_REQUEST, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0])); + KUNIT_EXPECT_EQ(test, XE_GUC_ACTION_PF2GUC_RELAY_TO_VF, + FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, msg[0])); + KUNIT_EXPECT_EQ(test, TEST_VFID, + FIELD_GET(PF2GUC_RELAY_TO_VF_REQUEST_MSG_1_VFID, msg[1])); + KUNIT_EXPECT_EQ(test, TEST_RID, + FIELD_GET(PF2GUC_RELAY_TO_VF_REQUEST_MSG_2_RELAY_ID, msg[2])); + KUNIT_EXPECT_MEMEQ(test, TEST_MSG, msg + PF2GUC_RELAY_TO_VF_REQUEST_MSG_MIN_LEN, + sizeof(u32) * TEST_LEN); + return 0; +} + +static const u32 test_guc2pf[GUC2PF_RELAY_FROM_VF_EVENT_MSG_MAX_LEN] = { + /* transport */ + FIELD_PREP_CONST(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_GUC) | + FIELD_PREP_CONST(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) | + FIELD_PREP_CONST(GUC_HXG_EVENT_MSG_0_ACTION, XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF), + FIELD_PREP_CONST(GUC2PF_RELAY_FROM_VF_EVENT_MSG_1_VFID, TEST_VFID), + FIELD_PREP_CONST(GUC2PF_RELAY_FROM_VF_EVENT_MSG_2_RELAY_ID, TEST_RID), + /* payload */ + FIELD_PREP_CONST(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP_CONST(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_SUCCESS), +}; + +static const u32 test_guc2vf[GUC2VF_RELAY_FROM_PF_EVENT_MSG_MAX_LEN] = { + /* transport */ + FIELD_PREP_CONST(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_GUC) | + FIELD_PREP_CONST(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) | + FIELD_PREP_CONST(GUC_HXG_EVENT_MSG_0_ACTION, XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF), + FIELD_PREP_CONST(GUC2VF_RELAY_FROM_PF_EVENT_MSG_1_RELAY_ID, TEST_RID), + /* payload */ + FIELD_PREP_CONST(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP_CONST(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_SUCCESS), +}; + +static void pf_rejects_guc2pf_too_short(struct kunit *test) +{ + const u32 len = GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN - 1; + struct xe_guc_relay *relay = test->priv; + const u32 *msg = test_guc2pf; + + KUNIT_ASSERT_EQ(test, -EPROTO, xe_guc_relay_process_guc2pf(relay, msg, len)); +} + +static void pf_rejects_guc2pf_too_long(struct kunit *test) +{ + const u32 len = GUC2PF_RELAY_FROM_VF_EVENT_MSG_MAX_LEN + 1; + struct xe_guc_relay *relay = test->priv; + const u32 *msg = test_guc2pf; + + KUNIT_ASSERT_EQ(test, -EMSGSIZE, xe_guc_relay_process_guc2pf(relay, msg, len)); +} + +static void pf_rejects_guc2pf_no_payload(struct kunit *test) +{ + const u32 len = GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN; + struct xe_guc_relay *relay = test->priv; + const u32 *msg = test_guc2pf; + + KUNIT_ASSERT_EQ(test, -EPROTO, xe_guc_relay_process_guc2pf(relay, msg, len)); +} + +static void pf_fails_no_payload(struct kunit *test) +{ + struct xe_guc_relay *relay = test->priv; + const u32 msg = 0; + + KUNIT_ASSERT_EQ(test, -EPROTO, relay_process_msg(relay, TEST_VFID, TEST_RID, &msg, 0)); +} + +static void pf_fails_bad_origin(struct kunit *test) +{ + struct xe_guc_relay *relay = test->priv; + static const u32 msg[] = { + FIELD_PREP_CONST(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_GUC) | + FIELD_PREP_CONST(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_SUCCESS), + }; + u32 len = ARRAY_SIZE(msg); + + KUNIT_ASSERT_EQ(test, -EPROTO, relay_process_msg(relay, TEST_VFID, TEST_RID, msg, len)); +} + +static void pf_fails_bad_type(struct kunit *test) +{ + struct xe_guc_relay *relay = test->priv; + const u32 msg[] = { + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, 4), /* only 4 is undefined */ + }; + u32 len = ARRAY_SIZE(msg); + + KUNIT_ASSERT_EQ(test, -EBADRQC, relay_process_msg(relay, TEST_VFID, TEST_RID, msg, len)); +} + +static void pf_txn_reports_error(struct kunit *test) +{ + struct xe_guc_relay *relay = test->priv; + struct relay_transaction *txn; + + txn = __relay_get_transaction(relay, false, TEST_VFID, TEST_RID, + TEST_MSG, TEST_LEN, NULL, 0); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, txn); + + kunit_activate_static_stub(test, xe_guc_ct_send_recv, + replacement_xe_guc_ct_send_recv_always_fails); + KUNIT_EXPECT_EQ(test, -ECOMM, relay_send_transaction(relay, txn)); + + relay_release_transaction(relay, txn); +} + +static void pf_txn_sends_pf2guc(struct kunit *test) +{ + struct xe_guc_relay *relay = test->priv; + struct relay_transaction *txn; + + txn = __relay_get_transaction(relay, false, TEST_VFID, TEST_RID, + TEST_MSG, TEST_LEN, NULL, 0); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, txn); + + kunit_activate_static_stub(test, xe_guc_ct_send_recv, + replacement_xe_guc_ct_send_recv_expects_pf2guc_relay); + KUNIT_ASSERT_EQ(test, 0, relay_send_transaction(relay, txn)); + + relay_release_transaction(relay, txn); +} + +static void pf_sends_pf2guc(struct kunit *test) +{ + struct xe_guc_relay *relay = test->priv; + + kunit_activate_static_stub(test, xe_guc_ct_send_recv, + replacement_xe_guc_ct_send_recv_expects_pf2guc_relay); + KUNIT_ASSERT_EQ(test, 0, + xe_guc_relay_send_to_vf(relay, TEST_VFID, + TEST_MSG, TEST_LEN, NULL, 0)); +} + +static int replacement_xe_guc_ct_send_recv_loopback_relay(struct xe_guc_ct *ct, + const u32 *msg, u32 len, + u32 *response_buffer) +{ + struct kunit *test = kunit_get_current_test(); + struct xe_guc_relay *relay = test->priv; + u32 *reply = kunit_kzalloc(test, len * sizeof(u32), GFP_KERNEL); + int (*guc2relay)(struct xe_guc_relay *, const u32 *, u32); + u32 action; + int err; + + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ct); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, msg); + KUNIT_ASSERT_GE(test, len, GUC_HXG_MSG_MIN_LEN); + KUNIT_ASSERT_EQ(test, GUC_HXG_TYPE_REQUEST, + FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0])); + KUNIT_ASSERT_GE(test, len, GUC_HXG_REQUEST_MSG_MIN_LEN); + KUNIT_ASSERT_NOT_NULL(test, reply); + + switch (FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, msg[0])) { + case XE_GUC_ACTION_PF2GUC_RELAY_TO_VF: + KUNIT_ASSERT_GE(test, len, PF2GUC_RELAY_TO_VF_REQUEST_MSG_MIN_LEN); + action = XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF; + guc2relay = xe_guc_relay_process_guc2pf; + break; + case XE_GUC_ACTION_VF2GUC_RELAY_TO_PF: + KUNIT_ASSERT_GE(test, len, VF2GUC_RELAY_TO_PF_REQUEST_MSG_MIN_LEN); + action = XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF; + guc2relay = xe_guc_relay_process_guc2vf; + break; + default: + KUNIT_FAIL(test, "bad RELAY action %#x", msg[0]); + return -EINVAL; + } + + reply[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_GUC) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) | + FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION, action); + memcpy(reply + 1, msg + 1, sizeof(u32) * (len - 1)); + + err = guc2relay(relay, reply, len); + KUNIT_EXPECT_EQ(test, err, 0); + + return err; +} + +static void test_requires_relay_testloop(struct kunit *test) +{ + /* + * The debug relay action GUC_RELAY_ACTION_VFXPF_TESTLOOP is available + * only on builds with CONFIG_DRM_XE_DEBUG_SRIOV enabled. + * See "kunit.py --kconfig_add" option if it's missing. + */ + if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) + kunit_skip(test, "requires %s\n", __stringify(CONFIG_DRM_XE_DEBUG_SRIOV)); +} + +static void pf_loopback_nop(struct kunit *test) +{ + struct xe_guc_relay *relay = test->priv; + u32 request[] = { + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_RELAY_ACTION_VFXPF_TESTLOOP) | + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_DATA0, VFXPF_TESTLOOP_OPCODE_NOP), + }; + u32 response[GUC_HXG_RESPONSE_MSG_MIN_LEN]; + int ret; + + test_requires_relay_testloop(test); + + kunit_activate_static_stub(test, relay_kick_worker, relay_process_incoming_action); + kunit_activate_static_stub(test, xe_guc_ct_send_recv, + replacement_xe_guc_ct_send_recv_loopback_relay); + ret = xe_guc_relay_send_to_vf(relay, TEST_VFID, + request, ARRAY_SIZE(request), + response, ARRAY_SIZE(response)); + KUNIT_ASSERT_EQ(test, ret, GUC_HXG_RESPONSE_MSG_MIN_LEN); + KUNIT_EXPECT_EQ(test, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, response[0]), + GUC_HXG_ORIGIN_HOST); + KUNIT_EXPECT_EQ(test, FIELD_GET(GUC_HXG_MSG_0_TYPE, response[0]), + GUC_HXG_TYPE_RESPONSE_SUCCESS); + KUNIT_EXPECT_EQ(test, FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, response[0]), 0); +} + +static void pf_loopback_echo(struct kunit *test) +{ + struct xe_guc_relay *relay = test->priv; + u32 request[] = { + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_RELAY_ACTION_VFXPF_TESTLOOP) | + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_DATA0, VFXPF_TESTLOOP_OPCODE_ECHO), + TEST_DATA(1), TEST_DATA(2), TEST_DATA(3), TEST_DATA(4), + }; + u32 response[ARRAY_SIZE(request)]; + unsigned int n; + int ret; + + test_requires_relay_testloop(test); + + kunit_activate_static_stub(test, relay_kick_worker, relay_process_incoming_action); + kunit_activate_static_stub(test, xe_guc_ct_send_recv, + replacement_xe_guc_ct_send_recv_loopback_relay); + ret = xe_guc_relay_send_to_vf(relay, TEST_VFID, + request, ARRAY_SIZE(request), + response, ARRAY_SIZE(response)); + KUNIT_ASSERT_EQ(test, ret, ARRAY_SIZE(response)); + KUNIT_EXPECT_EQ(test, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, response[0]), + GUC_HXG_ORIGIN_HOST); + KUNIT_EXPECT_EQ(test, FIELD_GET(GUC_HXG_MSG_0_TYPE, response[0]), + GUC_HXG_TYPE_RESPONSE_SUCCESS); + KUNIT_EXPECT_EQ(test, FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, response[0]), + ARRAY_SIZE(response)); + for (n = GUC_HXG_RESPONSE_MSG_MIN_LEN; n < ret; n++) + KUNIT_EXPECT_EQ(test, request[n], response[n]); +} + +static void pf_loopback_fail(struct kunit *test) +{ + struct xe_guc_relay *relay = test->priv; + u32 request[] = { + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_RELAY_ACTION_VFXPF_TESTLOOP) | + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_DATA0, VFXPF_TESTLOOP_OPCODE_FAIL), + }; + u32 response[GUC_HXG_RESPONSE_MSG_MIN_LEN]; + int ret; + + test_requires_relay_testloop(test); + + kunit_activate_static_stub(test, relay_kick_worker, relay_process_incoming_action); + kunit_activate_static_stub(test, xe_guc_ct_send_recv, + replacement_xe_guc_ct_send_recv_loopback_relay); + ret = xe_guc_relay_send_to_vf(relay, TEST_VFID, + request, ARRAY_SIZE(request), + response, ARRAY_SIZE(response)); + KUNIT_ASSERT_EQ(test, ret, -EREMOTEIO); +} + +static void pf_loopback_busy(struct kunit *test) +{ + struct xe_guc_relay *relay = test->priv; + u32 request[] = { + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_RELAY_ACTION_VFXPF_TESTLOOP) | + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_DATA0, VFXPF_TESTLOOP_OPCODE_BUSY), + TEST_DATA(0xb), + }; + u32 response[GUC_HXG_RESPONSE_MSG_MIN_LEN]; + int ret; + + test_requires_relay_testloop(test); + + kunit_activate_static_stub(test, relay_testonly_nop, relay_process_incoming_action); + kunit_activate_static_stub(test, relay_kick_worker, relay_process_incoming_action); + kunit_activate_static_stub(test, xe_guc_ct_send_recv, + replacement_xe_guc_ct_send_recv_loopback_relay); + ret = xe_guc_relay_send_to_vf(relay, TEST_VFID, + request, ARRAY_SIZE(request), + response, ARRAY_SIZE(response)); + KUNIT_ASSERT_EQ(test, ret, GUC_HXG_RESPONSE_MSG_MIN_LEN); +} + +static void pf_loopback_retry(struct kunit *test) +{ + struct xe_guc_relay *relay = test->priv; + u32 request[] = { + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_RELAY_ACTION_VFXPF_TESTLOOP) | + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_DATA0, VFXPF_TESTLOOP_OPCODE_RETRY), + TEST_DATA(0xd), TEST_DATA(0xd), + }; + u32 response[GUC_HXG_RESPONSE_MSG_MIN_LEN]; + int ret; + + test_requires_relay_testloop(test); + + kunit_activate_static_stub(test, relay_kick_worker, relay_process_incoming_action); + kunit_activate_static_stub(test, xe_guc_ct_send_recv, + replacement_xe_guc_ct_send_recv_loopback_relay); + ret = xe_guc_relay_send_to_vf(relay, TEST_VFID, + request, ARRAY_SIZE(request), + response, ARRAY_SIZE(response)); + KUNIT_ASSERT_EQ(test, ret, GUC_HXG_RESPONSE_MSG_MIN_LEN); +} + +static struct kunit_case pf_relay_test_cases[] = { + KUNIT_CASE(pf_rejects_guc2pf_too_short), + KUNIT_CASE(pf_rejects_guc2pf_too_long), + KUNIT_CASE(pf_rejects_guc2pf_no_payload), + KUNIT_CASE(pf_fails_no_payload), + KUNIT_CASE(pf_fails_bad_origin), + KUNIT_CASE(pf_fails_bad_type), + KUNIT_CASE(pf_txn_reports_error), + KUNIT_CASE(pf_txn_sends_pf2guc), + KUNIT_CASE(pf_sends_pf2guc), + KUNIT_CASE(pf_loopback_nop), + KUNIT_CASE(pf_loopback_echo), + KUNIT_CASE(pf_loopback_fail), + KUNIT_CASE_SLOW(pf_loopback_busy), + KUNIT_CASE_SLOW(pf_loopback_retry), + {} +}; + +static struct kunit_suite pf_relay_suite = { + .name = "pf_relay", + .test_cases = pf_relay_test_cases, + .init = relay_test_init, +}; + +static void vf_rejects_guc2vf_too_short(struct kunit *test) +{ + const u32 len = GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN - 1; + struct xe_guc_relay *relay = test->priv; + const u32 *msg = test_guc2vf; + + KUNIT_ASSERT_EQ(test, -EPROTO, xe_guc_relay_process_guc2vf(relay, msg, len)); +} + +static void vf_rejects_guc2vf_too_long(struct kunit *test) +{ + const u32 len = GUC2VF_RELAY_FROM_PF_EVENT_MSG_MAX_LEN + 1; + struct xe_guc_relay *relay = test->priv; + const u32 *msg = test_guc2vf; + + KUNIT_ASSERT_EQ(test, -EMSGSIZE, xe_guc_relay_process_guc2vf(relay, msg, len)); +} + +static void vf_rejects_guc2vf_no_payload(struct kunit *test) +{ + const u32 len = GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN; + struct xe_guc_relay *relay = test->priv; + const u32 *msg = test_guc2vf; + + KUNIT_ASSERT_EQ(test, -EPROTO, xe_guc_relay_process_guc2vf(relay, msg, len)); +} + +static struct kunit_case vf_relay_test_cases[] = { + KUNIT_CASE(vf_rejects_guc2vf_too_short), + KUNIT_CASE(vf_rejects_guc2vf_too_long), + KUNIT_CASE(vf_rejects_guc2vf_no_payload), + {} +}; + +static struct kunit_suite vf_relay_suite = { + .name = "vf_relay", + .test_cases = vf_relay_test_cases, + .init = relay_test_init, +}; + +static void xe_drops_guc2pf_if_not_ready(struct kunit *test) +{ + struct xe_device *xe = test->priv; + struct xe_guc_relay *relay = &xe_device_get_gt(xe, 0)->uc.guc.relay; + const u32 *msg = test_guc2pf; + u32 len = GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN + GUC_RELAY_MSG_MIN_LEN; + + KUNIT_ASSERT_EQ(test, -ENODEV, xe_guc_relay_process_guc2pf(relay, msg, len)); +} + +static void xe_drops_guc2vf_if_not_ready(struct kunit *test) +{ + struct xe_device *xe = test->priv; + struct xe_guc_relay *relay = &xe_device_get_gt(xe, 0)->uc.guc.relay; + const u32 *msg = test_guc2vf; + u32 len = GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN + GUC_RELAY_MSG_MIN_LEN; + + KUNIT_ASSERT_EQ(test, -ENODEV, xe_guc_relay_process_guc2vf(relay, msg, len)); +} + +static void xe_rejects_send_if_not_ready(struct kunit *test) +{ + struct xe_device *xe = test->priv; + struct xe_guc_relay *relay = &xe_device_get_gt(xe, 0)->uc.guc.relay; + u32 msg[GUC_RELAY_MSG_MIN_LEN]; + u32 len = ARRAY_SIZE(msg); + + KUNIT_ASSERT_EQ(test, -ENODEV, xe_guc_relay_send_to_pf(relay, msg, len, NULL, 0)); + KUNIT_ASSERT_EQ(test, -ENODEV, relay_send_to(relay, TEST_VFID, msg, len, NULL, 0)); +} + +static struct kunit_case no_relay_test_cases[] = { + KUNIT_CASE(xe_drops_guc2pf_if_not_ready), + KUNIT_CASE(xe_drops_guc2vf_if_not_ready), + KUNIT_CASE(xe_rejects_send_if_not_ready), + {} +}; + +static struct kunit_suite no_relay_suite = { + .name = "no_relay", + .test_cases = no_relay_test_cases, + .init = xe_kunit_helper_xe_device_test_init, +}; + +kunit_test_suites(&no_relay_suite, + &pf_relay_suite, + &vf_relay_suite); diff --git a/drivers/gpu/drm/xe/tests/xe_kunit_helpers.c b/drivers/gpu/drm/xe/tests/xe_kunit_helpers.c new file mode 100644 index 0000000000..fefe79b3b7 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_kunit_helpers.c @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: GPL-2.0 AND MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include <kunit/test.h> +#include <kunit/static_stub.h> +#include <kunit/visibility.h> + +#include <drm/drm_drv.h> +#include <drm/drm_kunit_helpers.h> + +#include "tests/xe_kunit_helpers.h" +#include "tests/xe_pci_test.h" +#include "xe_device_types.h" + +/** + * xe_kunit_helper_alloc_xe_device - Allocate a &xe_device for a KUnit test. + * @test: the &kunit where this &xe_device will be used + * @dev: The parent device object + * + * This function allocates xe_device using drm_kunit_helper_alloc_device(). + * The xe_device allocation is managed by the test. + * + * @dev should be allocated using drm_kunit_helper_alloc_device(). + * + * This function uses KUNIT_ASSERT to detect any allocation failures. + * + * Return: A pointer to the new &xe_device. + */ +struct xe_device *xe_kunit_helper_alloc_xe_device(struct kunit *test, + struct device *dev) +{ + struct xe_device *xe; + + xe = drm_kunit_helper_alloc_drm_device(test, dev, + struct xe_device, + drm, DRIVER_GEM); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe); + return xe; +} +EXPORT_SYMBOL_IF_KUNIT(xe_kunit_helper_alloc_xe_device); + +static void kunit_action_restore_priv(void *priv) +{ + struct kunit *test = kunit_get_current_test(); + + test->priv = priv; +} + +/** + * xe_kunit_helper_xe_device_test_init - Prepare a &xe_device for a KUnit test. + * @test: the &kunit where this fake &xe_device will be used + * + * This function allocates and initializes a fake &xe_device and stores its + * pointer as &kunit.priv to allow the test code to access it. + * + * This function can be directly used as custom implementation of + * &kunit_suite.init. + * + * It is possible to prepare specific variant of the fake &xe_device by passing + * in &kunit.priv pointer to the struct xe_pci_fake_data supplemented with + * desired parameters prior to calling this function. + * + * This function uses KUNIT_ASSERT to detect any failures. + * + * Return: Always 0. + */ +int xe_kunit_helper_xe_device_test_init(struct kunit *test) +{ + struct xe_device *xe; + struct device *dev; + int err; + + dev = drm_kunit_helper_alloc_device(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); + + xe = xe_kunit_helper_alloc_xe_device(test, dev); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe); + + err = xe_pci_fake_device_init(xe); + KUNIT_ASSERT_EQ(test, err, 0); + + err = kunit_add_action_or_reset(test, kunit_action_restore_priv, test->priv); + KUNIT_ASSERT_EQ(test, err, 0); + + test->priv = xe; + return 0; +} +EXPORT_SYMBOL_IF_KUNIT(xe_kunit_helper_xe_device_test_init); diff --git a/drivers/gpu/drm/xe/tests/xe_kunit_helpers.h b/drivers/gpu/drm/xe/tests/xe_kunit_helpers.h new file mode 100644 index 0000000000..067a1babf0 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_kunit_helpers.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 AND MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_KUNIT_HELPERS_H_ +#define _XE_KUNIT_HELPERS_H_ + +struct device; +struct kunit; +struct xe_device; + +struct xe_device *xe_kunit_helper_alloc_xe_device(struct kunit *test, + struct device *dev); +int xe_kunit_helper_xe_device_test_init(struct kunit *test); + +#endif diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c index 7dd34f94e8..df5c36b70a 100644 --- a/drivers/gpu/drm/xe/tests/xe_mocs.c +++ b/drivers/gpu/drm/xe/tests/xe_mocs.c @@ -128,3 +128,39 @@ void xe_live_mocs_kernel_kunit(struct kunit *test) xe_call_for_each_device(mocs_kernel_test_run_device); } EXPORT_SYMBOL_IF_KUNIT(xe_live_mocs_kernel_kunit); + +static int mocs_reset_test_run_device(struct xe_device *xe) +{ + /* Check the mocs setup is retained over GT reset */ + + struct live_mocs mocs; + struct xe_gt *gt; + unsigned int flags; + int id; + struct kunit *test = xe_cur_kunit(); + + for_each_gt(gt, xe, id) { + flags = live_mocs_init(&mocs, gt); + kunit_info(test, "mocs_reset_test before reset\n"); + if (flags & HAS_GLOBAL_MOCS) + read_mocs_table(gt, &mocs.table); + if (flags & HAS_LNCF_MOCS) + read_l3cc_table(gt, &mocs.table); + + xe_gt_reset_async(gt); + flush_work(>->reset.worker); + + kunit_info(test, "mocs_reset_test after reset\n"); + if (flags & HAS_GLOBAL_MOCS) + read_mocs_table(gt, &mocs.table); + if (flags & HAS_LNCF_MOCS) + read_l3cc_table(gt, &mocs.table); + } + return 0; +} + +void xe_live_mocs_reset_kunit(struct kunit *test) +{ + xe_call_for_each_device(mocs_reset_test_run_device); +} +EXPORT_SYMBOL_IF_KUNIT(xe_live_mocs_reset_kunit); diff --git a/drivers/gpu/drm/xe/tests/xe_mocs_test.c b/drivers/gpu/drm/xe/tests/xe_mocs_test.c index 421b819fd4..ee40f31e1e 100644 --- a/drivers/gpu/drm/xe/tests/xe_mocs_test.c +++ b/drivers/gpu/drm/xe/tests/xe_mocs_test.c @@ -9,6 +9,7 @@ static struct kunit_case xe_mocs_tests[] = { KUNIT_CASE(xe_live_mocs_kernel_kunit), + KUNIT_CASE(xe_live_mocs_reset_kunit), {} }; diff --git a/drivers/gpu/drm/xe/tests/xe_mocs_test.h b/drivers/gpu/drm/xe/tests/xe_mocs_test.h index 7faa3575e6..e7699d4954 100644 --- a/drivers/gpu/drm/xe/tests/xe_mocs_test.h +++ b/drivers/gpu/drm/xe/tests/xe_mocs_test.h @@ -9,5 +9,6 @@ struct kunit; void xe_live_mocs_kernel_kunit(struct kunit *test); +void xe_live_mocs_reset_kunit(struct kunit *test); #endif diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c index 602793644f..f62809ca8b 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci.c +++ b/drivers/gpu/drm/xe/tests/xe_pci.c @@ -156,6 +156,9 @@ int xe_pci_fake_device_init(struct xe_device *xe) return -ENODEV; done: + xe->sriov.__mode = data && data->sriov_mode ? + data->sriov_mode : XE_SRIOV_MODE_NONE; + kunit_activate_static_stub(test, read_gmdid, fake_read_gmdid); xe_info_init_early(xe, desc, subplatform_desc); diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.c b/drivers/gpu/drm/xe/tests/xe_pci_test.c index 171e4180f1..a6705a5363 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci_test.c +++ b/drivers/gpu/drm/xe/tests/xe_pci_test.c @@ -64,8 +64,3 @@ static struct kunit_suite xe_pci_test_suite = { }; kunit_test_suite(xe_pci_test_suite); - -MODULE_AUTHOR("Intel Corporation"); -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("xe_pci kunit test"); -MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.h b/drivers/gpu/drm/xe/tests/xe_pci_test.h index 811ffe5bd9..f40dcec839 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci_test.h +++ b/drivers/gpu/drm/xe/tests/xe_pci_test.h @@ -9,6 +9,7 @@ #include <linux/types.h> #include "xe_platform_types.h" +#include "xe_sriov_types.h" struct xe_device; struct xe_graphics_desc; @@ -23,6 +24,7 @@ void xe_call_for_each_graphics_ip(xe_graphics_fn xe_fn); void xe_call_for_each_media_ip(xe_media_fn xe_fn); struct xe_pci_fake_data { + enum xe_sriov_mode sriov_mode; enum xe_platform platform; enum xe_subplatform subplatform; u32 graphics_verx100; diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c index 4a69728976..06759d7547 100644 --- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c +++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c @@ -15,6 +15,7 @@ #include "regs/xe_reg_defs.h" #include "xe_device.h" #include "xe_device_types.h" +#include "xe_kunit_helpers.h" #include "xe_pci_test.h" #include "xe_reg_sr.h" #include "xe_rtp.h" @@ -276,9 +277,7 @@ static int xe_rtp_test_init(struct kunit *test) dev = drm_kunit_helper_alloc_device(test); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); - xe = drm_kunit_helper_alloc_drm_device(test, dev, - struct xe_device, - drm, DRIVER_GEM); + xe = xe_kunit_helper_alloc_xe_device(test, dev); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe); /* Initialize an empty device */ @@ -312,8 +311,3 @@ static struct kunit_suite xe_rtp_test_suite = { }; kunit_test_suite(xe_rtp_test_suite); - -MODULE_AUTHOR("Intel Corporation"); -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("xe_rtp kunit test"); -MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); diff --git a/drivers/gpu/drm/xe/tests/xe_test_mod.c b/drivers/gpu/drm/xe/tests/xe_test_mod.c new file mode 100644 index 0000000000..875f3e6f96 --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_test_mod.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright © 2023 Intel Corporation + */ +#include <linux/module.h> + +MODULE_AUTHOR("Intel Corporation"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("xe kunit tests"); +MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c index b4715b78ef..44570d8883 100644 --- a/drivers/gpu/drm/xe/tests/xe_wa_test.c +++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c @@ -9,6 +9,7 @@ #include <kunit/test.h> #include "xe_device.h" +#include "xe_kunit_helpers.h" #include "xe_pci_test.h" #include "xe_reg_sr.h" #include "xe_tuning.h" @@ -65,14 +66,8 @@ static const struct platform_test_case cases[] = { PLATFORM_CASE(ALDERLAKE_P, C0), SUBPLATFORM_CASE(ALDERLAKE_S, RPLS, D0), SUBPLATFORM_CASE(ALDERLAKE_P, RPLU, E0), - SUBPLATFORM_CASE(DG2, G10, A0), - SUBPLATFORM_CASE(DG2, G10, A1), - SUBPLATFORM_CASE(DG2, G10, B0), SUBPLATFORM_CASE(DG2, G10, C0), - SUBPLATFORM_CASE(DG2, G11, A0), - SUBPLATFORM_CASE(DG2, G11, B0), SUBPLATFORM_CASE(DG2, G11, B1), - SUBPLATFORM_CASE(DG2, G12, A0), SUBPLATFORM_CASE(DG2, G12, A1), GMDID_CASE(METEORLAKE, 1270, A0, 1300, A0), GMDID_CASE(METEORLAKE, 1271, A0, 1300, A0), @@ -105,9 +100,7 @@ static int xe_wa_test_init(struct kunit *test) dev = drm_kunit_helper_alloc_device(test); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); - xe = drm_kunit_helper_alloc_drm_device(test, dev, - struct xe_device, - drm, DRIVER_GEM); + xe = xe_kunit_helper_alloc_xe_device(test, dev); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe); test->priv = &data; @@ -160,8 +153,3 @@ static struct kunit_suite xe_rtp_test_suite = { }; kunit_test_suite(xe_rtp_test_suite); - -MODULE_AUTHOR("Intel Corporation"); -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("xe_wa kunit test"); -MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c index 7c124475c4..a35e0781b7 100644 --- a/drivers/gpu/drm/xe/xe_bb.c +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -96,7 +96,8 @@ struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q, { u64 addr = xe_sa_bo_gpu_addr(bb->bo); - xe_gt_assert(q->gt, !(q->vm && q->vm->flags & XE_VM_FLAG_MIGRATION)); + xe_gt_assert(q->gt, !xe_sched_job_is_migration(q)); + xe_gt_assert(q->gt, q->width == 1); return __xe_bb_create_job(q, bb, &addr); } diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index eb2c44a328..9c0837b6fd 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -46,22 +46,26 @@ static const struct ttm_place sys_placement_flags = { static struct ttm_placement sys_placement = { .num_placement = 1, .placement = &sys_placement_flags, - .num_busy_placement = 1, - .busy_placement = &sys_placement_flags, }; -static const struct ttm_place tt_placement_flags = { - .fpfn = 0, - .lpfn = 0, - .mem_type = XE_PL_TT, - .flags = 0, +static const struct ttm_place tt_placement_flags[] = { + { + .fpfn = 0, + .lpfn = 0, + .mem_type = XE_PL_TT, + .flags = TTM_PL_FLAG_DESIRED, + }, + { + .fpfn = 0, + .lpfn = 0, + .mem_type = XE_PL_SYSTEM, + .flags = TTM_PL_FLAG_FALLBACK, + } }; static struct ttm_placement tt_placement = { - .num_placement = 1, - .placement = &tt_placement_flags, - .num_busy_placement = 1, - .busy_placement = &sys_placement_flags, + .num_placement = 2, + .placement = tt_placement_flags, }; bool mem_type_is_vram(u32 mem_type) @@ -216,8 +220,6 @@ static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, bo->placement = (struct ttm_placement) { .num_placement = c, .placement = bo->placements, - .num_busy_placement = c, - .busy_placement = bo->placements, }; return 0; @@ -237,7 +239,6 @@ static void xe_evict_flags(struct ttm_buffer_object *tbo, /* Don't handle scatter gather BOs */ if (tbo->type == ttm_bo_type_sg) { placement->num_placement = 0; - placement->num_busy_placement = 0; return; } @@ -572,6 +573,8 @@ static int xe_bo_move_notify(struct xe_bo *bo, { struct ttm_buffer_object *ttm_bo = &bo->ttm; struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev); + struct ttm_resource *old_mem = ttm_bo->resource; + u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM; int ret; /* @@ -591,6 +594,18 @@ static int xe_bo_move_notify(struct xe_bo *bo, if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach) dma_buf_move_notify(ttm_bo->base.dma_buf); + /* + * TTM has already nuked the mmap for us (see ttm_bo_unmap_virtual), + * so if we moved from VRAM make sure to unlink this from the userfault + * tracking. + */ + if (mem_type_is_vram(old_mem_type)) { + mutex_lock(&xe->mem_access.vram_userfault.lock); + if (!list_empty(&bo->vram_userfault_link)) + list_del_init(&bo->vram_userfault_link); + mutex_unlock(&xe->mem_access.vram_userfault.lock); + } + return 0; } @@ -1012,7 +1027,7 @@ static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo) } } -struct ttm_device_funcs xe_ttm_funcs = { +const struct ttm_device_funcs xe_ttm_funcs = { .ttm_tt_create = xe_ttm_tt_create, .ttm_tt_populate = xe_ttm_tt_populate, .ttm_tt_unpopulate = xe_ttm_tt_unpopulate, @@ -1048,6 +1063,11 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo) if (bo->vm && xe_bo_is_user(bo)) xe_vm_put(bo->vm); + mutex_lock(&xe->mem_access.vram_userfault.lock); + if (!list_empty(&bo->vram_userfault_link)) + list_del(&bo->vram_userfault_link); + mutex_unlock(&xe->mem_access.vram_userfault.lock); + kfree(bo); } @@ -1088,16 +1108,20 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf) { struct ttm_buffer_object *tbo = vmf->vma->vm_private_data; struct drm_device *ddev = tbo->base.dev; + struct xe_device *xe = to_xe_device(ddev); + struct xe_bo *bo = ttm_to_xe_bo(tbo); + bool needs_rpm = bo->flags & XE_BO_CREATE_VRAM_MASK; vm_fault_t ret; int idx; + if (needs_rpm) + xe_device_mem_access_get(xe); + ret = ttm_bo_vm_reserve(tbo, vmf); if (ret) - return ret; + goto out; if (drm_dev_enter(ddev, &idx)) { - struct xe_bo *bo = ttm_to_xe_bo(tbo); - trace_xe_bo_cpu_fault(bo); ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot, @@ -1106,10 +1130,24 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf) } else { ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot); } + if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) - return ret; + goto out; + /* + * ttm_bo_vm_reserve() already has dma_resv_lock. + */ + if (ret == VM_FAULT_NOPAGE && mem_type_is_vram(tbo->resource->mem_type)) { + mutex_lock(&xe->mem_access.vram_userfault.lock); + if (list_empty(&bo->vram_userfault_link)) + list_add(&bo->vram_userfault_link, &xe->mem_access.vram_userfault.list); + mutex_unlock(&xe->mem_access.vram_userfault.lock); + } dma_resv_unlock(tbo->base.resv); +out: + if (needs_rpm) + xe_device_mem_access_put(xe); + return ret; } @@ -1220,6 +1258,7 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, #ifdef CONFIG_PROC_FS INIT_LIST_HEAD(&bo->client_link); #endif + INIT_LIST_HEAD(&bo->vram_userfault_link); drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size); @@ -1319,8 +1358,6 @@ static int __xe_bo_fixed_placement(struct xe_device *xe, bo->placement = (struct ttm_placement) { .num_placement = 1, .placement = place, - .num_busy_placement = 1, - .busy_placement = place, }; return 0; @@ -1534,6 +1571,38 @@ struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_til return bo; } +/** + * xe_managed_bo_reinit_in_vram + * @xe: xe device + * @tile: Tile where the new buffer will be created + * @src: Managed buffer object allocated in system memory + * + * Replace a managed src buffer object allocated in system memory with a new + * one allocated in vram, copying the data between them. + * Buffer object in VRAM is not going to have the same GGTT address, the caller + * is responsible for making sure that any old references to it are updated. + * + * Returns 0 for success, negative error code otherwise. + */ +int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, struct xe_bo **src) +{ + struct xe_bo *bo; + + xe_assert(xe, IS_DGFX(xe)); + xe_assert(xe, !(*src)->vmap.is_iomem); + + bo = xe_managed_bo_create_from_data(xe, tile, (*src)->vmap.vaddr, (*src)->size, + XE_BO_CREATE_VRAM_IF_DGFX(tile) | + XE_BO_CREATE_GGTT_BIT); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + drmm_release_action(&xe->drm, __xe_bo_unpin_map_no_vm, *src); + *src = bo; + + return 0; +} + /* * XXX: This is in the VM bind data path, likely should calculate this once and * store, with a recalculation if the BO is moved. @@ -2078,9 +2147,7 @@ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type) xe_place_from_ttm_type(mem_type, &requested); placement.num_placement = 1; - placement.num_busy_placement = 1; placement.placement = &requested; - placement.busy_placement = &requested; /* * Stolen needs to be handled like below VRAM handling if we ever need @@ -2230,6 +2297,16 @@ int xe_bo_dumb_create(struct drm_file *file_priv, return err; } +void xe_bo_runtime_pm_release_mmap_offset(struct xe_bo *bo) +{ + struct ttm_buffer_object *tbo = &bo->ttm; + struct ttm_device *bdev = tbo->bdev; + + drm_vma_node_unmap(&tbo->base.vma_node, bdev->dev_mapping); + + list_del_init(&bo->vram_userfault_link); +} + #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) #include "tests/xe_bo.c" #endif diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 8be42ac6cd..c59ad15961 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -44,6 +44,7 @@ #define XE_BO_FIXED_PLACEMENT_BIT BIT(11) #define XE_BO_PAGETABLE BIT(12) #define XE_BO_NEEDS_CPU_ACCESS BIT(13) +#define XE_BO_NEEDS_UC BIT(14) /* this one is trigger internally only */ #define XE_BO_INTERNAL_TEST BIT(30) #define XE_BO_INTERNAL_64K BIT(31) @@ -128,6 +129,7 @@ struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile size_t size, u32 flags); struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, const void *data, size_t size, u32 flags); +int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, struct xe_bo **src); int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, u32 bo_flags); @@ -242,13 +244,15 @@ int xe_bo_evict(struct xe_bo *bo, bool force_alloc); int xe_bo_evict_pinned(struct xe_bo *bo); int xe_bo_restore_pinned(struct xe_bo *bo); -extern struct ttm_device_funcs xe_ttm_funcs; +extern const struct ttm_device_funcs xe_ttm_funcs; extern const char *const xe_mem_type_to_name[]; int xe_gem_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +void xe_bo_runtime_pm_release_mmap_offset(struct xe_bo *bo); + int xe_bo_dumb_create(struct drm_file *file_priv, struct drm_device *dev, struct drm_mode_create_dumb *args); diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index 81dca15315..86422e113d 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -69,6 +69,9 @@ struct xe_bo { * objects. */ u16 cpu_caching; + + /** @vram_userfault_link: Link into @mem_access.vram_userfault.list */ + struct list_head vram_userfault_link; }; #define intel_bo_to_drm_bo(bo) (&(bo)->ttm.base) diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index c56fd7d59f..01db5b27be 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -55,6 +55,7 @@ static int info(struct seq_file *m, void *data) drm_printf(&p, "force_execlist %s\n", str_yes_no(xe->info.force_execlist)); drm_printf(&p, "has_flat_ccs %s\n", str_yes_no(xe->info.has_flat_ccs)); drm_printf(&p, "has_usm %s\n", str_yes_no(xe->info.has_usm)); + drm_printf(&p, "skip_guc_pc %s\n", str_yes_no(xe->info.skip_guc_pc)); for_each_gt(gt, xe, id) { drm_printf(&p, "gt%d force wake %d\n", id, xe_force_wake_ref(gt_to_fw(gt), XE_FW_GT)); diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index 68abc0b195..68d3d623a0 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -16,6 +16,8 @@ #include "xe_guc_ct.h" #include "xe_guc_submit.h" #include "xe_hw_engine.h" +#include "xe_sched_job.h" +#include "xe_vm.h" /** * DOC: Xe device coredump @@ -58,11 +60,22 @@ static struct xe_guc *exec_queue_to_guc(struct xe_exec_queue *q) return &q->gt->uc.guc; } +static void xe_devcoredump_deferred_snap_work(struct work_struct *work) +{ + struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work); + + xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL); + if (ss->vm) + xe_vm_snapshot_capture_delayed(ss->vm); + xe_force_wake_put(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL); +} + static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, size_t count, void *data, size_t datalen) { struct xe_devcoredump *coredump = data; - struct xe_devcoredump_snapshot *ss; + struct xe_device *xe = coredump_to_xe(coredump); + struct xe_devcoredump_snapshot *ss = &coredump->snapshot; struct drm_printer p; struct drm_print_iterator iter; struct timespec64 ts; @@ -72,12 +85,14 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, if (!data || !coredump_to_xe(coredump)) return -ENODEV; + /* Ensure delayed work is captured before continuing */ + flush_work(&ss->work); + iter.data = buffer; iter.offset = 0; iter.start = offset; iter.remain = count; - ss = &coredump->snapshot; p = drm_coredump_printer(&iter); drm_printf(&p, "**** Xe Device Coredump ****\n"); @@ -88,16 +103,24 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, drm_printf(&p, "Snapshot time: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec); ts = ktime_to_timespec64(ss->boot_time); drm_printf(&p, "Uptime: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec); + xe_device_snapshot_print(xe, &p); drm_printf(&p, "\n**** GuC CT ****\n"); xe_guc_ct_snapshot_print(coredump->snapshot.ct, &p); xe_guc_exec_queue_snapshot_print(coredump->snapshot.ge, &p); + drm_printf(&p, "\n**** Job ****\n"); + xe_sched_job_snapshot_print(coredump->snapshot.job, &p); + drm_printf(&p, "\n**** HW Engines ****\n"); for (i = 0; i < XE_NUM_HW_ENGINES; i++) if (coredump->snapshot.hwe[i]) xe_hw_engine_snapshot_print(coredump->snapshot.hwe[i], &p); + if (coredump->snapshot.vm) { + drm_printf(&p, "\n**** VM state ****\n"); + xe_vm_snapshot_print(coredump->snapshot.vm, &p); + } return count - iter.remain; } @@ -111,21 +134,28 @@ static void xe_devcoredump_free(void *data) if (!data || !coredump_to_xe(coredump)) return; + cancel_work_sync(&coredump->snapshot.work); + xe_guc_ct_snapshot_free(coredump->snapshot.ct); xe_guc_exec_queue_snapshot_free(coredump->snapshot.ge); + xe_sched_job_snapshot_free(coredump->snapshot.job); for (i = 0; i < XE_NUM_HW_ENGINES; i++) if (coredump->snapshot.hwe[i]) xe_hw_engine_snapshot_free(coredump->snapshot.hwe[i]); + xe_vm_snapshot_free(coredump->snapshot.vm); + /* To prevent stale data on next snapshot, clear everything */ + memset(&coredump->snapshot, 0, sizeof(coredump->snapshot)); coredump->captured = false; drm_info(&coredump_to_xe(coredump)->drm, "Xe device coredump has been deleted.\n"); } static void devcoredump_snapshot(struct xe_devcoredump *coredump, - struct xe_exec_queue *q) + struct xe_sched_job *job) { struct xe_devcoredump_snapshot *ss = &coredump->snapshot; + struct xe_exec_queue *q = job->q; struct xe_guc *guc = exec_queue_to_guc(q); struct xe_hw_engine *hwe; enum xe_hw_engine_id id; @@ -137,6 +167,9 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, ss->snapshot_time = ktime_get_real(); ss->boot_time = ktime_get_boottime(); + ss->gt = q->gt; + INIT_WORK(&ss->work, xe_devcoredump_deferred_snap_work); + cookie = dma_fence_begin_signalling(); for (i = 0; q->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) { if (adj_logical_mask & BIT(i)) { @@ -150,7 +183,9 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); coredump->snapshot.ct = xe_guc_ct_snapshot_capture(&guc->ct, true); - coredump->snapshot.ge = xe_guc_exec_queue_snapshot_capture(q); + coredump->snapshot.ge = xe_guc_exec_queue_snapshot_capture(job); + coredump->snapshot.job = xe_sched_job_snapshot_capture(job); + coredump->snapshot.vm = xe_vm_snapshot_capture(q->vm); for_each_hw_engine(hwe, q->gt, id) { if (hwe->class != q->hwe->class || @@ -161,21 +196,24 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, coredump->snapshot.hwe[id] = xe_hw_engine_snapshot_capture(hwe); } + if (ss->vm) + queue_work(system_unbound_wq, &ss->work); + xe_force_wake_put(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); dma_fence_end_signalling(cookie); } /** * xe_devcoredump - Take the required snapshots and initialize coredump device. - * @q: The faulty xe_exec_queue, where the issue was detected. + * @job: The faulty xe_sched_job, where the issue was detected. * * This function should be called at the crash time within the serialized * gt_reset. It is skipped if we still have the core dump device available * with the information of the 'first' snapshot. */ -void xe_devcoredump(struct xe_exec_queue *q) +void xe_devcoredump(struct xe_sched_job *job) { - struct xe_device *xe = gt_to_xe(q->gt); + struct xe_device *xe = gt_to_xe(job->q->gt); struct xe_devcoredump *coredump = &xe->devcoredump; if (coredump->captured) { @@ -184,7 +222,7 @@ void xe_devcoredump(struct xe_exec_queue *q) } coredump->captured = true; - devcoredump_snapshot(coredump, q); + devcoredump_snapshot(coredump, job); drm_info(&xe->drm, "Xe device coredump has been created\n"); drm_info(&xe->drm, "Check your /sys/class/drm/card%d/device/devcoredump/data\n", @@ -194,3 +232,4 @@ void xe_devcoredump(struct xe_exec_queue *q) xe_devcoredump_read, xe_devcoredump_free); } #endif + diff --git a/drivers/gpu/drm/xe/xe_devcoredump.h b/drivers/gpu/drm/xe/xe_devcoredump.h index 6ac218a5c1..df8671f0b5 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.h +++ b/drivers/gpu/drm/xe/xe_devcoredump.h @@ -7,12 +7,12 @@ #define _XE_DEVCOREDUMP_H_ struct xe_device; -struct xe_exec_queue; +struct xe_sched_job; #ifdef CONFIG_DEV_COREDUMP -void xe_devcoredump(struct xe_exec_queue *q); +void xe_devcoredump(struct xe_sched_job *job); #else -static inline void xe_devcoredump(struct xe_exec_queue *q) +static inline void xe_devcoredump(struct xe_sched_job *job) { } #endif diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h index 7fdad9c3d3..6f654b63c7 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump_types.h +++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h @@ -12,6 +12,7 @@ #include "xe_hw_engine_types.h" struct xe_device; +struct xe_gt; /** * struct xe_devcoredump_snapshot - Crash snapshot @@ -26,13 +27,23 @@ struct xe_devcoredump_snapshot { /** @boot_time: Relative boot time so the uptime can be calculated. */ ktime_t boot_time; + /** @gt: Affected GT, used by forcewake for delayed capture */ + struct xe_gt *gt; + /** @work: Workqueue for deferred capture outside of signaling context */ + struct work_struct work; + /* GuC snapshots */ /** @ct: GuC CT snapshot */ struct xe_guc_ct_snapshot *ct; /** @ge: Guc Engine snapshot */ struct xe_guc_submit_exec_queue_snapshot *ge; + /** @hwe: HW Engine snapshot array */ struct xe_hw_engine_snapshot *hwe[XE_NUM_HW_ENGINES]; + /** @job: Snapshot of job state */ + struct xe_sched_job_snapshot *job; + /** @vm: Snapshot of VM state */ + struct xe_vm_snapshot *vm; }; /** @@ -44,8 +55,6 @@ struct xe_devcoredump_snapshot { * for reading the information. */ struct xe_devcoredump { - /** @xe: Xe device. */ - struct xe_device *xe; /** @captured: The snapshot of the first hang has already been taken. */ bool captured; /** @snapshot: Snapshot is captured at time of the first crash */ diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 5176c27e4b..b3b37ed832 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -15,32 +15,35 @@ #include <drm/drm_print.h> #include <drm/xe_drm.h> +#include "display/xe_display.h" #include "regs/xe_gt_regs.h" #include "regs/xe_regs.h" #include "xe_bo.h" #include "xe_debugfs.h" -#include "xe_display.h" #include "xe_dma_buf.h" #include "xe_drm_client.h" #include "xe_drv.h" -#include "xe_exec_queue.h" #include "xe_exec.h" +#include "xe_exec_queue.h" #include "xe_ggtt.h" +#include "xe_gsc_proxy.h" #include "xe_gt.h" #include "xe_gt_mcr.h" +#include "xe_hwmon.h" #include "xe_irq.h" +#include "xe_memirq.h" #include "xe_mmio.h" #include "xe_module.h" #include "xe_pat.h" #include "xe_pcode.h" #include "xe_pm.h" #include "xe_query.h" +#include "xe_sriov.h" #include "xe_tile.h" #include "xe_ttm_stolen_mgr.h" #include "xe_ttm_sys_mgr.h" #include "xe_vm.h" #include "xe_wait_user_fence.h" -#include "xe_hwmon.h" #ifdef CONFIG_LOCKDEP struct lockdep_map xe_device_mem_access_lockdep_map = { @@ -190,6 +193,9 @@ static void xe_device_destroy(struct drm_device *dev, void *dummy) { struct xe_device *xe = to_xe_device(dev); + if (xe->preempt_fence_wq) + destroy_workqueue(xe->preempt_fence_wq); + if (xe->ordered_wq) destroy_workqueue(xe->ordered_wq); @@ -255,9 +261,15 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, INIT_LIST_HEAD(&xe->pinned.external_vram); INIT_LIST_HEAD(&xe->pinned.evicted); + xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", 0); xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0); xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0); - if (!xe->ordered_wq || !xe->unordered_wq) { + if (!xe->ordered_wq || !xe->unordered_wq || + !xe->preempt_fence_wq) { + /* + * Cleanup done in xe_device_destroy via + * drmm_add_action_or_reset register above + */ drm_err(&xe->drm, "Failed to allocate xe workqueues\n"); err = -ENOMEM; goto err; @@ -377,8 +389,14 @@ mask_err: return err; } -/* - * Initialize MMIO resources that don't require any knowledge about tile count. +/** + * xe_device_probe_early: Device early probe + * @xe: xe device instance + * + * Initialize MMIO resources that don't require any + * knowledge about tile count. Also initialize pcode + * + * Return: 0 on success, error code on failure */ int xe_device_probe_early(struct xe_device *xe) { @@ -392,6 +410,10 @@ int xe_device_probe_early(struct xe_device *xe) if (err) return err; + err = xe_pcode_probe_early(xe); + if (err) + return err; + return 0; } @@ -424,10 +446,15 @@ int xe_device_probe(struct xe_device *xe) struct xe_tile *tile; struct xe_gt *gt; int err; + u8 last_gt; u8 id; xe_pat_init_early(xe); + err = xe_sriov_init(xe); + if (err) + return err; + xe->info.mem_region_mask = 1; err = xe_display_init_nommio(xe); if (err) @@ -448,18 +475,26 @@ int xe_device_probe(struct xe_device *xe) err = xe_ggtt_init_early(tile->mem.ggtt); if (err) return err; + if (IS_SRIOV_VF(xe)) { + err = xe_memirq_init(&tile->sriov.vf.memirq); + if (err) + return err; + } } - err = drmm_add_action_or_reset(&xe->drm, xe_driver_flr_fini, xe); - if (err) - return err; - for_each_gt(gt, xe, id) { - err = xe_pcode_probe(gt); + err = xe_gt_init_hwconfig(gt); if (err) return err; } + err = drmm_add_action_or_reset(&xe->drm, xe_driver_flr_fini, xe); + if (err) + return err; + + for_each_gt(gt, xe, id) + xe_pcode_init(gt); + err = xe_display_init_noirq(xe); if (err) return err; @@ -502,16 +537,18 @@ int xe_device_probe(struct xe_device *xe) goto err_irq_shutdown; for_each_gt(gt, xe, id) { + last_gt = id; + err = xe_gt_init(gt); if (err) - goto err_irq_shutdown; + goto err_fini_gt; } xe_heci_gsc_init(xe); err = xe_display_init(xe); if (err) - goto err_irq_shutdown; + goto err_fini_gt; err = drm_dev_register(&xe->drm, 0); if (err) @@ -532,6 +569,14 @@ int xe_device_probe(struct xe_device *xe) err_fini_display: xe_display_driver_remove(xe); +err_fini_gt: + for_each_gt(gt, xe, id) { + if (id < last_gt) + xe_gt_remove(gt); + else + break; + } + err_irq_shutdown: xe_irq_shutdown(xe); err: @@ -549,12 +594,18 @@ static void xe_device_remove_display(struct xe_device *xe) void xe_device_remove(struct xe_device *xe) { + struct xe_gt *gt; + u8 id; + xe_device_remove_display(xe); xe_display_fini(xe); xe_heci_gsc_fini(xe); + for_each_gt(gt, xe, id) + xe_gt_remove(gt); + xe_irq_shutdown(xe); } @@ -659,3 +710,33 @@ void xe_device_mem_access_put(struct xe_device *xe) xe_assert(xe, ref >= 0); } + +void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p) +{ + struct xe_gt *gt; + u8 id; + + drm_printf(p, "PCI ID: 0x%04x\n", xe->info.devid); + drm_printf(p, "PCI revision: 0x%02x\n", xe->info.revid); + + for_each_gt(gt, xe, id) { + drm_printf(p, "GT id: %u\n", id); + drm_printf(p, "\tType: %s\n", + gt->info.type == XE_GT_TYPE_MAIN ? "main" : "media"); + drm_printf(p, "\tIP ver: %u.%u.%u\n", + REG_FIELD_GET(GMD_ID_ARCH_MASK, gt->info.gmdid), + REG_FIELD_GET(GMD_ID_RELEASE_MASK, gt->info.gmdid), + REG_FIELD_GET(GMD_ID_REVID, gt->info.gmdid)); + drm_printf(p, "\tCS reference clock: %u\n", gt->info.reference_clock); + } +} + +u64 xe_device_canonicalize_addr(struct xe_device *xe, u64 address) +{ + return sign_extend64(address, xe->info.va_bits - 1); +} + +u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address) +{ + return address & GENMASK_ULL(xe->info.va_bits - 1, 0); +} diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index bf8efb44ed..d413bc2c6b 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -164,6 +164,16 @@ static inline bool xe_device_has_sriov(struct xe_device *xe) return xe->info.has_sriov; } +static inline bool xe_device_has_memirq(struct xe_device *xe) +{ + return GRAPHICS_VERx100(xe) >= 1250; +} + u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size); +void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p); + +u64 xe_device_canonicalize_addr(struct xe_device *xe, u64 address); +u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address); + #endif diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index e8491979a6..8e3a222b41 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -16,6 +16,7 @@ #include "xe_heci_gsc.h" #include "xe_gt_types.h" #include "xe_lmtt_types.h" +#include "xe_memirq_types.h" #include "xe_platform_types.h" #include "xe_pt_types.h" #include "xe_sriov_types.h" @@ -142,10 +143,10 @@ struct xe_tile { * * 8MB-16MB: global GTT */ struct { - /** @size: size of tile's MMIO space */ + /** @mmio.size: size of tile's MMIO space */ size_t size; - /** @regs: pointer to tile's MMIO space (starting with registers) */ + /** @mmio.regs: pointer to tile's MMIO space (starting with registers) */ void __iomem *regs; } mmio; @@ -155,31 +156,31 @@ struct xe_tile { * Each tile has its own additional 256MB (28-bit) MMIO-extension space. */ struct { - /** @size: size of tile's additional MMIO-extension space */ + /** @mmio_ext.size: size of tile's additional MMIO-extension space */ size_t size; - /** @regs: pointer to tile's additional MMIO-extension space */ + /** @mmio_ext.regs: pointer to tile's additional MMIO-extension space */ void __iomem *regs; } mmio_ext; /** @mem: memory management info for tile */ struct { /** - * @vram: VRAM info for tile. + * @mem.vram: VRAM info for tile. * * Although VRAM is associated with a specific tile, it can * still be accessed by all tiles' GTs. */ struct xe_mem_region vram; - /** @vram_mgr: VRAM TTM manager */ + /** @mem.vram_mgr: VRAM TTM manager */ struct xe_ttm_vram_mgr *vram_mgr; - /** @ggtt: Global graphics translation table */ + /** @mem.ggtt: Global graphics translation table */ struct xe_ggtt *ggtt; /** - * @kernel_bb_pool: Pool from which batchbuffers are allocated. + * @mem.kernel_bb_pool: Pool from which batchbuffers are allocated. * * Media GT shares a pool with its primary GT. */ @@ -192,6 +193,10 @@ struct xe_tile { /** @sriov.pf.lmtt: Local Memory Translation Table. */ struct xe_lmtt lmtt; } pf; + struct { + /** @sriov.vf.memirq: Memory Based Interrupts. */ + struct xe_memirq memirq; + } vf; } sriov; /** @migrate: Migration helper for vram blits and clearing */ @@ -213,68 +218,68 @@ struct xe_device { /** @info: device info */ struct intel_device_info { - /** @graphics_name: graphics IP name */ + /** @info.graphics_name: graphics IP name */ const char *graphics_name; - /** @media_name: media IP name */ + /** @info.media_name: media IP name */ const char *media_name; - /** @tile_mmio_ext_size: size of MMIO extension space, per-tile */ + /** @info.tile_mmio_ext_size: size of MMIO extension space, per-tile */ u32 tile_mmio_ext_size; - /** @graphics_verx100: graphics IP version */ + /** @info.graphics_verx100: graphics IP version */ u32 graphics_verx100; - /** @media_verx100: media IP version */ + /** @info.media_verx100: media IP version */ u32 media_verx100; - /** @mem_region_mask: mask of valid memory regions */ + /** @info.mem_region_mask: mask of valid memory regions */ u32 mem_region_mask; - /** @platform: XE platform enum */ + /** @info.platform: XE platform enum */ enum xe_platform platform; - /** @subplatform: XE subplatform enum */ + /** @info.subplatform: XE subplatform enum */ enum xe_subplatform subplatform; - /** @devid: device ID */ + /** @info.devid: device ID */ u16 devid; - /** @revid: device revision */ + /** @info.revid: device revision */ u8 revid; - /** @step: stepping information for each IP */ + /** @info.step: stepping information for each IP */ struct xe_step_info step; - /** @dma_mask_size: DMA address bits */ + /** @info.dma_mask_size: DMA address bits */ u8 dma_mask_size; - /** @vram_flags: Vram flags */ + /** @info.vram_flags: Vram flags */ u8 vram_flags; - /** @tile_count: Number of tiles */ + /** @info.tile_count: Number of tiles */ u8 tile_count; - /** @gt_count: Total number of GTs for entire device */ + /** @info.gt_count: Total number of GTs for entire device */ u8 gt_count; - /** @vm_max_level: Max VM level */ + /** @info.vm_max_level: Max VM level */ u8 vm_max_level; - /** @va_bits: Maximum bits of a virtual address */ + /** @info.va_bits: Maximum bits of a virtual address */ u8 va_bits; - /** @is_dgfx: is discrete device */ + /** @info.is_dgfx: is discrete device */ u8 is_dgfx:1; - /** @has_asid: Has address space ID */ + /** @info.has_asid: Has address space ID */ u8 has_asid:1; - /** @force_execlist: Forced execlist submission */ + /** @info.force_execlist: Forced execlist submission */ u8 force_execlist:1; - /** @has_flat_ccs: Whether flat CCS metadata is used */ + /** @info.has_flat_ccs: Whether flat CCS metadata is used */ u8 has_flat_ccs:1; - /** @has_llc: Device has a shared CPU+GPU last level cache */ + /** @info.has_llc: Device has a shared CPU+GPU last level cache */ u8 has_llc:1; - /** @has_mmio_ext: Device has extra MMIO address range */ + /** @info.has_mmio_ext: Device has extra MMIO address range */ u8 has_mmio_ext:1; - /** @has_range_tlb_invalidation: Has range based TLB invalidations */ + /** @info.has_range_tlb_invalidation: Has range based TLB invalidations */ u8 has_range_tlb_invalidation:1; - /** @has_sriov: Supports SR-IOV */ + /** @info.has_sriov: Supports SR-IOV */ u8 has_sriov:1; - /** @has_usm: Device has unified shared memory support */ + /** @info.has_usm: Device has unified shared memory support */ u8 has_usm:1; - /** @enable_display: display enabled */ + /** @info.enable_display: display enabled */ u8 enable_display:1; - /** @skip_mtcfg: skip Multi-Tile configuration from MTCFG register */ + /** @info.skip_mtcfg: skip Multi-Tile configuration from MTCFG register */ u8 skip_mtcfg:1; - /** @skip_pcode: skip access to PCODE uC */ + /** @info.skip_pcode: skip access to PCODE uC */ u8 skip_pcode:1; - /** @has_heci_gscfi: device has heci gscfi */ + /** @info.has_heci_gscfi: device has heci gscfi */ u8 has_heci_gscfi:1; - /** @skip_guc_pc: Skip GuC based PM feature init */ + /** @info.skip_guc_pc: Skip GuC based PM feature init */ u8 skip_guc_pc:1; #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) @@ -286,10 +291,10 @@ struct xe_device { /** @irq: device interrupt state */ struct { - /** @lock: lock for processing irq's on this device */ + /** @irq.lock: lock for processing irq's on this device */ spinlock_t lock; - /** @enabled: interrupts enabled on this device */ + /** @irq.enabled: interrupts enabled on this device */ bool enabled; } irq; @@ -298,17 +303,17 @@ struct xe_device { /** @mmio: mmio info for device */ struct { - /** @size: size of MMIO space for device */ + /** @mmio.size: size of MMIO space for device */ size_t size; - /** @regs: pointer to MMIO space for device */ + /** @mmio.regs: pointer to MMIO space for device */ void __iomem *regs; } mmio; /** @mem: memory info for device */ struct { - /** @vram: VRAM info for device */ + /** @mem.vram: VRAM info for device */ struct xe_mem_region vram; - /** @sys_mgr: system TTM manager */ + /** @mem.sys_mgr: system TTM manager */ struct ttm_resource_manager sys_mgr; } mem; @@ -316,46 +321,51 @@ struct xe_device { struct { /** @sriov.__mode: SR-IOV mode (Don't access directly!) */ enum xe_sriov_mode __mode; + /** @sriov.wq: workqueue used by the virtualization workers */ + struct workqueue_struct *wq; } sriov; /** @clients: drm clients info */ struct { - /** @lock: Protects drm clients info */ + /** @clients.lock: Protects drm clients info */ spinlock_t lock; - /** @count: number of drm clients */ + /** @clients.count: number of drm clients */ u64 count; } clients; /** @usm: unified memory state */ struct { - /** @asid: convert a ASID to VM */ + /** @usm.asid: convert a ASID to VM */ struct xarray asid_to_vm; - /** @next_asid: next ASID, used to cyclical alloc asids */ + /** @usm.next_asid: next ASID, used to cyclical alloc asids */ u32 next_asid; - /** @num_vm_in_fault_mode: number of VM in fault mode */ + /** @usm.num_vm_in_fault_mode: number of VM in fault mode */ u32 num_vm_in_fault_mode; - /** @num_vm_in_non_fault_mode: number of VM in non-fault mode */ + /** @usm.num_vm_in_non_fault_mode: number of VM in non-fault mode */ u32 num_vm_in_non_fault_mode; - /** @lock: protects UM state */ + /** @usm.lock: protects UM state */ struct mutex lock; } usm; /** @pinned: pinned BO state */ struct { - /** @lock: protected pinned BO list state */ + /** @pinned.lock: protected pinned BO list state */ spinlock_t lock; - /** @evicted: pinned kernel BO that are present */ + /** @pinned.kernel_bo_present: pinned kernel BO that are present */ struct list_head kernel_bo_present; - /** @evicted: pinned BO that have been evicted */ + /** @pinned.evicted: pinned BO that have been evicted */ struct list_head evicted; - /** @external_vram: pinned external BO in vram*/ + /** @pinned.external_vram: pinned external BO in vram*/ struct list_head external_vram; } pinned; /** @ufence_wq: user fence wait queue */ wait_queue_head_t ufence_wq; + /** @preempt_fence_wq: used to serialize preempt fences */ + struct workqueue_struct *preempt_fence_wq; + /** @ordered_wq: used to serialize compute mode resume */ struct workqueue_struct *ordered_wq; @@ -370,36 +380,57 @@ struct xe_device { * triggering additional actions when they occur. */ struct { - /** @ref: ref count of memory accesses */ + /** @mem_access.ref: ref count of memory accesses */ atomic_t ref; + + /** + * @mem_access.vram_userfault: Encapsulate vram_userfault + * related stuff + */ + struct { + /** + * @mem_access.vram_userfault.lock: Protects access to + * @vram_usefault.list Using mutex instead of spinlock + * as lock is applied to entire list operation which + * may sleep + */ + struct mutex lock; + + /** + * @mem_access.vram_userfault.list: Keep list of userfaulted + * vram bo, which require to release their mmap mappings + * at runtime suspend path + */ + struct list_head list; + } vram_userfault; } mem_access; /** * @pat: Encapsulate PAT related stuff */ struct { - /** Internal operations to abstract platforms */ + /** @pat.ops: Internal operations to abstract platforms */ const struct xe_pat_ops *ops; - /** PAT table to program in the HW */ + /** @pat.table: PAT table to program in the HW */ const struct xe_pat_table_entry *table; - /** Number of PAT entries */ + /** @pat.n_entries: Number of PAT entries */ int n_entries; u32 idx[__XE_CACHE_LEVEL_COUNT]; } pat; /** @d3cold: Encapsulate d3cold related stuff */ struct { - /** capable: Indicates if root port is d3cold capable */ + /** @d3cold.capable: Indicates if root port is d3cold capable */ bool capable; - /** @allowed: Indicates if d3cold is a valid device state */ + /** @d3cold.allowed: Indicates if d3cold is a valid device state */ bool allowed; - /** @power_lost: Indicates if card has really lost power. */ + /** @d3cold.power_lost: Indicates if card has really lost power. */ bool power_lost; /** - * @vram_threshold: + * @d3cold.vram_threshold: * * This represents the permissible threshold(in megabytes) * for vram save/restore. d3cold will be disallowed, @@ -408,7 +439,7 @@ struct xe_device { * Default threshold value is 300mb. */ u32 vram_threshold; - /** @lock: protect vram_threshold */ + /** @d3cold.lock: protect vram_threshold */ struct mutex lock; } d3cold; @@ -516,17 +547,17 @@ struct xe_file { /** @vm: VM state for file */ struct { - /** @xe: xarray to store VMs */ + /** @vm.xe: xarray to store VMs */ struct xarray xa; - /** @lock: protects file VM state */ + /** @vm.lock: protects file VM state */ struct mutex lock; } vm; /** @exec_queue: Submission exec queue state for file */ struct { - /** @xe: xarray to store engines */ + /** @exec_queue.xe: xarray to store engines */ struct xarray xa; - /** @lock: protects file engine state */ + /** @exec_queue.lock: protects file engine state */ struct mutex lock; } exec_queue; diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index 6040e4d22b..87c10bd795 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -113,7 +113,7 @@ static void bo_meminfo(struct xe_bo *bo, else mem_type = XE_PL_TT; - if (bo->ttm.base.handle_count > 1) + if (drm_gem_object_is_shared_for_memory_stats(&bo->ttm.base)) stats[mem_type].shared += sz; else stats[mem_type].private += sz; diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 2cd4ad2fcf..cc5e0f75de 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -94,9 +94,16 @@ * Unlock all */ +/* + * Add validation and rebinding to the drm_exec locking loop, since both can + * trigger eviction which may require sleeping dma_resv locks. + */ static int xe_exec_fn(struct drm_gpuvm_exec *vm_exec) { - return drm_gpuvm_validate(vm_exec->vm, &vm_exec->exec); + struct xe_vm *vm = container_of(vm_exec->vm, struct xe_vm, gpuvm); + + /* The fence slot added here is intended for the exec sched job. */ + return xe_vm_validate_rebind(vm, &vm_exec->exec, 1); } int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) @@ -219,7 +226,6 @@ retry: } vm_exec.vm = &vm->gpuvm; - vm_exec.num_fences = 1 + vm->xe->info.tile_count; vm_exec.flags = DRM_EXEC_INTERRUPTIBLE_WAIT; if (xe_vm_in_lr_mode(vm)) { drm_exec_init(exec, vm_exec.flags, 0); @@ -251,14 +257,6 @@ retry: goto err_exec; } - /* - * Rebind any invalidated userptr or evicted BOs in the VM, non-compute - * VM mode only. - */ - err = xe_vm_rebind(vm, false); - if (err) - goto err_put_job; - /* Wait behind rebinds */ if (!xe_vm_in_lr_mode(vm)) { err = drm_sched_job_add_resv_dependencies(&job->drm, diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 5093c56d60..ead25d5e72 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -30,21 +30,23 @@ enum xe_exec_queue_sched_prop { XE_EXEC_QUEUE_SCHED_PROP_MAX = 3, }; -static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe, - struct xe_vm *vm, - u32 logical_mask, - u16 width, struct xe_hw_engine *hwe, - u32 flags) +static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, + u64 extensions, int ext_number, bool create); + +static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, + struct xe_vm *vm, + u32 logical_mask, + u16 width, struct xe_hw_engine *hwe, + u32 flags, u64 extensions) { struct xe_exec_queue *q; struct xe_gt *gt = hwe->gt; int err; - int i; /* only kernel queues can be permanent */ XE_WARN_ON((flags & EXEC_QUEUE_FLAG_PERMANENT) && !(flags & EXEC_QUEUE_FLAG_KERNEL)); - q = kzalloc(sizeof(*q) + sizeof(struct xe_lrc) * width, GFP_KERNEL); + q = kzalloc(struct_size(q, lrc, width), GFP_KERNEL); if (!q) return ERR_PTR(-ENOMEM); @@ -52,8 +54,6 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe, q->flags = flags; q->hwe = hwe; q->gt = gt; - if (vm) - q->vm = xe_vm_get(vm); q->class = hwe->class; q->width = width; q->logical_mask = logical_mask; @@ -74,17 +74,43 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe, else q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_NORMAL; + if (extensions) { + /* + * may set q->usm, must come before xe_lrc_init(), + * may overwrite q->sched_props, must come before q->ops->init() + */ + err = exec_queue_user_extensions(xe, q, extensions, 0, true); + if (err) { + kfree(q); + return ERR_PTR(err); + } + } + + if (vm) + q->vm = xe_vm_get(vm); + if (xe_exec_queue_is_parallel(q)) { q->parallel.composite_fence_ctx = dma_fence_context_alloc(1); q->parallel.composite_fence_seqno = XE_FENCE_INITIAL_SEQNO; } - if (q->flags & EXEC_QUEUE_FLAG_VM) { - q->bind.fence_ctx = dma_fence_context_alloc(1); - q->bind.fence_seqno = XE_FENCE_INITIAL_SEQNO; - } - for (i = 0; i < width; ++i) { - err = xe_lrc_init(q->lrc + i, hwe, q, vm, SZ_16K); + return q; +} + +static void __xe_exec_queue_free(struct xe_exec_queue *q) +{ + if (q->vm) + xe_vm_put(q->vm); + kfree(q); +} + +static int __xe_exec_queue_init(struct xe_exec_queue *q) +{ + struct xe_device *xe = gt_to_xe(q->gt); + int i, err; + + for (i = 0; i < q->width; ++i) { + err = xe_lrc_init(q->lrc + i, q->hwe, q, q->vm, SZ_16K); if (err) goto err_lrc; } @@ -101,35 +127,47 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe, * can perform GuC CT actions when needed. Caller is expected to have * already grabbed the rpm ref outside any sensitive locks. */ - if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM || !vm)) + if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM || !q->vm)) drm_WARN_ON(&xe->drm, !xe_device_mem_access_get_if_ongoing(xe)); - return q; + return 0; err_lrc: for (i = i - 1; i >= 0; --i) xe_lrc_finish(q->lrc + i); - kfree(q); - return ERR_PTR(err); + return err; } struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm, u32 logical_mask, u16 width, - struct xe_hw_engine *hwe, u32 flags) + struct xe_hw_engine *hwe, u32 flags, + u64 extensions) { struct xe_exec_queue *q; int err; + q = __xe_exec_queue_alloc(xe, vm, logical_mask, width, hwe, flags, + extensions); + if (IS_ERR(q)) + return q; + if (vm) { err = xe_vm_lock(vm, true); if (err) - return ERR_PTR(err); + goto err_post_alloc; } - q = __xe_exec_queue_create(xe, vm, logical_mask, width, hwe, flags); + + err = __xe_exec_queue_init(q); if (vm) xe_vm_unlock(vm); + if (err) + goto err_post_alloc; return q; + +err_post_alloc: + __xe_exec_queue_free(q); + return ERR_PTR(err); } struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt, @@ -154,7 +192,7 @@ struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe if (!logical_mask) return ERR_PTR(-ENODEV); - return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags); + return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags, 0); } void xe_exec_queue_destroy(struct kref *ref) @@ -180,10 +218,7 @@ void xe_exec_queue_fini(struct xe_exec_queue *q) xe_lrc_finish(q->lrc + i); if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM || !q->vm)) xe_device_mem_access_put(gt_to_xe(q->gt)); - if (q->vm) - xe_vm_put(q->vm); - - kfree(q); + __xe_exec_queue_free(q); } void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance) @@ -241,7 +276,11 @@ static int exec_queue_set_priority(struct xe_device *xe, struct xe_exec_queue *q if (XE_IOCTL_DBG(xe, value > xe_exec_queue_device_get_max_priority(xe))) return -EPERM; - return q->ops->set_priority(q, value); + if (!create) + return q->ops->set_priority(q, value); + + q->sched_props.priority = value; + return 0; } static bool xe_exec_queue_enforce_schedule_limit(void) @@ -308,7 +347,11 @@ static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue * !xe_hw_engine_timeout_in_range(value, min, max)) return -EINVAL; - return q->ops->set_timeslice(q, value); + if (!create) + return q->ops->set_timeslice(q, value); + + q->sched_props.timeslice_us = value; + return 0; } typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, @@ -537,6 +580,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) { for_each_gt(gt, xe, id) { struct xe_exec_queue *new; + u32 flags; if (xe_gt_is_media_type(gt)) continue; @@ -555,14 +599,12 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, /* The migration vm doesn't hold rpm ref */ xe_device_mem_access_get(xe); + flags = EXEC_QUEUE_FLAG_VM | (id ? EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD : 0); + migrate_vm = xe_migrate_get_vm(gt_to_tile(gt)->migrate); new = xe_exec_queue_create(xe, migrate_vm, logical_mask, - args->width, hwe, - EXEC_QUEUE_FLAG_PERSISTENT | - EXEC_QUEUE_FLAG_VM | - (id ? - EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD : - 0)); + args->width, hwe, flags, + args->extensions); xe_device_mem_access_put(xe); /* now held by engine */ @@ -608,7 +650,8 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, } q = xe_exec_queue_create(xe, vm, logical_mask, - args->width, hwe, 0); + args->width, hwe, 0, + args->extensions); up_read(&vm->lock); xe_vm_put(vm); if (IS_ERR(q)) @@ -624,12 +667,6 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, } } - if (args->extensions) { - err = exec_queue_user_extensions(xe, q, args->extensions, 0, true); - if (XE_IOCTL_DBG(xe, err)) - goto kill_exec_queue; - } - mutex_lock(&xef->exec_queue.lock); err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL); mutex_unlock(&xef->exec_queue.lock); diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h index d959cc4a1a..02ce8d2046 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.h +++ b/drivers/gpu/drm/xe/xe_exec_queue.h @@ -16,7 +16,8 @@ struct xe_file; struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm, u32 logical_mask, u16 width, - struct xe_hw_engine *hw_engine, u32 flags); + struct xe_hw_engine *hw_engine, u32 flags, + u64 extensions); struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt, struct xe_vm *vm, enum xe_engine_class class, u32 flags); diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 22e1dffd06..462b331950 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -105,50 +105,39 @@ struct xe_exec_queue { struct xe_guc_exec_queue *guc; }; - union { - /** - * @parallel: parallel submission state - */ - struct { - /** @composite_fence_ctx: context composite fence */ - u64 composite_fence_ctx; - /** @composite_fence_seqno: seqno for composite fence */ - u32 composite_fence_seqno; - } parallel; - /** - * @bind: bind submission state - */ - struct { - /** @fence_ctx: context bind fence */ - u64 fence_ctx; - /** @fence_seqno: seqno for bind fence */ - u32 fence_seqno; - } bind; - }; + /** + * @parallel: parallel submission state + */ + struct { + /** @parallel.composite_fence_ctx: context composite fence */ + u64 composite_fence_ctx; + /** @parallel.composite_fence_seqno: seqno for composite fence */ + u32 composite_fence_seqno; + } parallel; /** @sched_props: scheduling properties */ struct { - /** @timeslice_us: timeslice period in micro-seconds */ + /** @sched_props.timeslice_us: timeslice period in micro-seconds */ u32 timeslice_us; - /** @preempt_timeout_us: preemption timeout in micro-seconds */ + /** @sched_props.preempt_timeout_us: preemption timeout in micro-seconds */ u32 preempt_timeout_us; - /** @job_timeout_ms: job timeout in milliseconds */ + /** @sched_props.job_timeout_ms: job timeout in milliseconds */ u32 job_timeout_ms; - /** @priority: priority of this exec queue */ + /** @sched_props.priority: priority of this exec queue */ enum xe_exec_queue_priority priority; } sched_props; /** @compute: compute exec queue state */ struct { - /** @pfence: preemption fence */ + /** @compute.pfence: preemption fence */ struct dma_fence *pfence; - /** @context: preemption fence context */ + /** @compute.context: preemption fence context */ u64 context; - /** @seqno: preemption fence seqno */ + /** @compute.seqno: preemption fence seqno */ u32 seqno; - /** @link: link into VM's list of exec queues */ + /** @compute.link: link into VM's list of exec queues */ struct list_head link; - /** @lock: preemption fences lock */ + /** @compute.lock: preemption fences lock */ spinlock_t lock; } compute; @@ -185,8 +174,6 @@ struct xe_exec_queue_ops { int (*set_timeslice)(struct xe_exec_queue *q, u32 timeslice_us); /** @set_preempt_timeout: Set preemption timeout for exec queue */ int (*set_preempt_timeout)(struct xe_exec_queue *q, u32 preempt_timeout_us); - /** @set_job_timeout: Set job timeout for exec queue */ - int (*set_job_timeout)(struct xe_exec_queue *q, u32 job_timeout_ms); /** * @suspend: Suspend exec queue from executing, allowed to be called * multiple times in a row before resume with the caveat that diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c index acb4d9f38f..dece278593 100644 --- a/drivers/gpu/drm/xe/xe_execlist.c +++ b/drivers/gpu/drm/xe/xe_execlist.c @@ -416,13 +416,6 @@ static int execlist_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, return 0; } -static int execlist_exec_queue_set_job_timeout(struct xe_exec_queue *q, - u32 job_timeout_ms) -{ - /* NIY */ - return 0; -} - static int execlist_exec_queue_suspend(struct xe_exec_queue *q) { /* NIY */ @@ -453,7 +446,6 @@ static const struct xe_exec_queue_ops execlist_exec_queue_ops = { .set_priority = execlist_exec_queue_set_priority, .set_timeslice = execlist_exec_queue_set_timeslice, .set_preempt_timeout = execlist_exec_queue_set_preempt_timeout, - .set_job_timeout = execlist_exec_queue_set_job_timeout, .suspend = execlist_exec_queue_suspend, .suspend_wait = execlist_exec_queue_suspend_wait, .resume = execlist_exec_queue_resume, diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 3efd2d066b..ab96edb058 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -11,12 +11,16 @@ #include <drm/i915_drm.h> #include "regs/xe_gt_regs.h" +#include "regs/xe_regs.h" +#include "xe_assert.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_gt.h" +#include "xe_gt_printk.h" #include "xe_gt_tlb_invalidation.h" #include "xe_map.h" #include "xe_mmio.h" +#include "xe_sriov.h" #include "xe_wopcm.h" #define XELPG_GGTT_PTE_PAT0 BIT_ULL(52) @@ -141,7 +145,11 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) struct pci_dev *pdev = to_pci_dev(xe->drm.dev); unsigned int gsm_size; - gsm_size = probe_gsm_size(pdev); + if (IS_SRIOV_VF(xe)) + gsm_size = SZ_8M; /* GGTT is expected to be 4GiB */ + else + gsm_size = probe_gsm_size(pdev); + if (gsm_size == 0) { drm_err(&xe->drm, "Hardware reported no preallocated GSM\n"); return -ENOMEM; @@ -312,6 +320,74 @@ void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix) } } +static void xe_ggtt_dump_node(struct xe_ggtt *ggtt, + const struct drm_mm_node *node, const char *description) +{ + char buf[10]; + + if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) { + string_get_size(node->size, 1, STRING_UNITS_2, buf, sizeof(buf)); + xe_gt_dbg(ggtt->tile->primary_gt, "GGTT %#llx-%#llx (%s) %s\n", + node->start, node->start + node->size, buf, description); + } +} + +/** + * xe_ggtt_balloon - prevent allocation of specified GGTT addresses + * @ggtt: the &xe_ggtt where we want to make reservation + * @start: the starting GGTT address of the reserved region + * @end: then end GGTT address of the reserved region + * @node: the &drm_mm_node to hold reserved GGTT node + * + * Use xe_ggtt_deballoon() to release a reserved GGTT node. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_ggtt_balloon(struct xe_ggtt *ggtt, u64 start, u64 end, struct drm_mm_node *node) +{ + int err; + + xe_tile_assert(ggtt->tile, start < end); + xe_tile_assert(ggtt->tile, IS_ALIGNED(start, XE_PAGE_SIZE)); + xe_tile_assert(ggtt->tile, IS_ALIGNED(end, XE_PAGE_SIZE)); + xe_tile_assert(ggtt->tile, !drm_mm_node_allocated(node)); + + node->color = 0; + node->start = start; + node->size = end - start; + + mutex_lock(&ggtt->lock); + err = drm_mm_reserve_node(&ggtt->mm, node); + mutex_unlock(&ggtt->lock); + + if (xe_gt_WARN(ggtt->tile->primary_gt, err, + "Failed to balloon GGTT %#llx-%#llx (%pe)\n", + node->start, node->start + node->size, ERR_PTR(err))) + return err; + + xe_ggtt_dump_node(ggtt, node, "balloon"); + return 0; +} + +/** + * xe_ggtt_deballoon - release a reserved GGTT region + * @ggtt: the &xe_ggtt where reserved node belongs + * @node: the &drm_mm_node with reserved GGTT region + * + * See xe_ggtt_balloon() for details. + */ +void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct drm_mm_node *node) +{ + if (!drm_mm_node_allocated(node)) + return; + + xe_ggtt_dump_node(ggtt, node, "deballoon"); + + mutex_lock(&ggtt->lock); + drm_mm_remove_node(node); + mutex_unlock(&ggtt->lock); +} + int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, struct drm_mm_node *node, u32 size, u32 align, u32 mm_flags) { @@ -334,7 +410,8 @@ int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) { - u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB]; + u16 cache_mode = bo->flags & XE_BO_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB; + u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode]; u64 start = bo->ggtt_node.start; u64 offset, pte; diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index a09c166dff..42705e1338 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -16,6 +16,9 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt); int xe_ggtt_init(struct xe_ggtt *ggtt); void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix); +int xe_ggtt_balloon(struct xe_ggtt *ggtt, u64 start, u64 size, struct drm_mm_node *node); +void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct drm_mm_node *node); + int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, u32 size, u32 align); int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index a8a895cf4b..a61994292c 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -7,12 +7,14 @@ #include <drm/drm_managed.h> +#include <generated/xe_wa_oob.h> + #include "abi/gsc_mkhi_commands_abi.h" -#include "generated/xe_wa_oob.h" #include "xe_bb.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_exec_queue.h" +#include "xe_gsc_proxy.h" #include "xe_gsc_submit.h" #include "xe_gt.h" #include "xe_gt_printk.h" @@ -242,8 +244,31 @@ static int gsc_upload(struct xe_gsc *gsc) if (err) return err; + return 0; +} + +static int gsc_upload_and_init(struct xe_gsc *gsc) +{ + struct xe_gt *gt = gsc_to_gt(gsc); + int ret; + + ret = gsc_upload(gsc); + if (ret) + return ret; + + xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED); xe_gt_dbg(gt, "GSC FW async load completed\n"); + /* HuC auth failure is not fatal */ + if (xe_huc_is_authenticated(>->uc.huc, XE_HUC_AUTH_VIA_GUC)) + xe_huc_auth(>->uc.huc, XE_HUC_AUTH_VIA_GSC); + + ret = xe_gsc_proxy_start(gsc); + if (ret) + return ret; + + xe_gt_dbg(gt, "GSC proxy init completed\n"); + return 0; } @@ -252,24 +277,28 @@ static void gsc_work(struct work_struct *work) struct xe_gsc *gsc = container_of(work, typeof(*gsc), work); struct xe_gt *gt = gsc_to_gt(gsc); struct xe_device *xe = gt_to_xe(gt); + u32 actions; int ret; + spin_lock_irq(&gsc->lock); + actions = gsc->work_actions; + gsc->work_actions = 0; + spin_unlock_irq(&gsc->lock); + xe_device_mem_access_get(xe); xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC); - ret = gsc_upload(gsc); - if (ret && ret != -EEXIST) { - xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOAD_FAIL); - goto out; + if (actions & GSC_ACTION_FW_LOAD) { + ret = gsc_upload_and_init(gsc); + if (ret && ret != -EEXIST) + xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_LOAD_FAIL); + else + xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_RUNNING); } - xe_uc_fw_change_status(&gsc->fw, XE_UC_FIRMWARE_TRANSFERRED); - - /* HuC auth failure is not fatal */ - if (xe_huc_is_authenticated(>->uc.huc, XE_HUC_AUTH_VIA_GUC)) - xe_huc_auth(>->uc.huc, XE_HUC_AUTH_VIA_GSC); + if (actions & GSC_ACTION_SW_PROXY) + xe_gsc_proxy_request_handler(gsc); -out: xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC); xe_device_mem_access_put(xe); } @@ -282,6 +311,7 @@ int xe_gsc_init(struct xe_gsc *gsc) gsc->fw.type = XE_UC_FW_TYPE_GSC; INIT_WORK(&gsc->work, gsc_work); + spin_lock_init(&gsc->lock); /* The GSC uC is only available on the media GT */ if (tile->media_gt && (gt != tile->media_gt)) { @@ -302,6 +332,10 @@ int xe_gsc_init(struct xe_gsc *gsc) else if (ret) goto out; + ret = xe_gsc_proxy_init(gsc); + if (ret && ret != -ENODEV) + goto out; + return 0; out: @@ -356,7 +390,7 @@ int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc) q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance), 1, hwe, EXEC_QUEUE_FLAG_KERNEL | - EXEC_QUEUE_FLAG_PERMANENT); + EXEC_QUEUE_FLAG_PERMANENT, 0); if (IS_ERR(q)) { xe_gt_err(gt, "Failed to create queue for GSC submission\n"); err = PTR_ERR(q); @@ -401,6 +435,10 @@ void xe_gsc_load_start(struct xe_gsc *gsc) return; } + spin_lock_irq(&gsc->lock); + gsc->work_actions |= GSC_ACTION_FW_LOAD; + spin_unlock_irq(&gsc->lock); + queue_work(gsc->wq, &gsc->work); } @@ -410,6 +448,15 @@ void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc) flush_work(&gsc->work); } +/** + * xe_gsc_remove() - Clean up the GSC structures before driver removal + * @gsc: the GSC uC + */ +void xe_gsc_remove(struct xe_gsc *gsc) +{ + xe_gsc_proxy_remove(gsc); +} + /* * wa_14015076503: if the GSC FW is loaded, we need to alert it before doing a * GSC engine reset by writing a notification bit in the GS1 register and then diff --git a/drivers/gpu/drm/xe/xe_gsc.h b/drivers/gpu/drm/xe/xe_gsc.h index bc1ef7f31e..c6fb32e3fd 100644 --- a/drivers/gpu/drm/xe/xe_gsc.h +++ b/drivers/gpu/drm/xe/xe_gsc.h @@ -14,6 +14,7 @@ int xe_gsc_init(struct xe_gsc *gsc); int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc); void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc); void xe_gsc_load_start(struct xe_gsc *gsc); +void xe_gsc_remove(struct xe_gsc *gsc); void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep); diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c new file mode 100644 index 0000000000..309ef80e3b --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c @@ -0,0 +1,537 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include "xe_gsc_proxy.h" + +#include <linux/component.h> +#include <linux/delay.h> + +#include <drm/drm_managed.h> +#include <drm/i915_component.h> +#include <drm/i915_gsc_proxy_mei_interface.h> + +#include "abi/gsc_proxy_commands_abi.h" +#include "regs/xe_gsc_regs.h" +#include "xe_bo.h" +#include "xe_gsc.h" +#include "xe_gsc_submit.h" +#include "xe_gt.h" +#include "xe_gt_printk.h" +#include "xe_map.h" +#include "xe_mmio.h" +#include "xe_pm.h" + +/* + * GSC proxy: + * The GSC uC needs to communicate with the CSME to perform certain operations. + * Since the GSC can't perform this communication directly on platforms where it + * is integrated in GT, the graphics driver needs to transfer the messages from + * GSC to CSME and back. The proxy flow must be manually started after the GSC + * is loaded to signal to GSC that we're ready to handle its messages and allow + * it to query its init data from CSME; GSC will then trigger an HECI2 interrupt + * if it needs to send messages to CSME again. + * The proxy flow is as follow: + * 1 - Xe submits a request to GSC asking for the message to CSME + * 2 - GSC replies with the proxy header + payload for CSME + * 3 - Xe sends the reply from GSC as-is to CSME via the mei proxy component + * 4 - CSME replies with the proxy header + payload for GSC + * 5 - Xe submits a request to GSC with the reply from CSME + * 6 - GSC replies either with a new header + payload (same as step 2, so we + * restart from there) or with an end message. + */ + +/* + * The component should load quite quickly in most cases, but it could take + * a bit. Using a very big timeout just to cover the worst case scenario + */ +#define GSC_PROXY_INIT_TIMEOUT_MS 20000 + +/* shorthand define for code compactness */ +#define PROXY_HDR_SIZE (sizeof(struct xe_gsc_proxy_header)) + +/* the protocol supports up to 32K in each direction */ +#define GSC_PROXY_BUFFER_SIZE SZ_32K +#define GSC_PROXY_CHANNEL_SIZE (GSC_PROXY_BUFFER_SIZE * 2) + +static struct xe_gt * +gsc_to_gt(struct xe_gsc *gsc) +{ + return container_of(gsc, struct xe_gt, uc.gsc); +} + +static inline struct xe_device *kdev_to_xe(struct device *kdev) +{ + return dev_get_drvdata(kdev); +} + +static bool gsc_proxy_init_done(struct xe_gsc *gsc) +{ + struct xe_gt *gt = gsc_to_gt(gsc); + u32 fwsts1 = xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE)); + + return REG_FIELD_GET(HECI1_FWSTS1_CURRENT_STATE, fwsts1) == + HECI1_FWSTS1_PROXY_STATE_NORMAL; +} + +static void __gsc_proxy_irq_rmw(struct xe_gsc *gsc, u32 clr, u32 set) +{ + struct xe_gt *gt = gsc_to_gt(gsc); + + /* make sure we never accidentally write the RST bit */ + clr |= HECI_H_CSR_RST; + + xe_mmio_rmw32(gt, HECI_H_CSR(MTL_GSC_HECI2_BASE), clr, set); +} + +static void gsc_proxy_irq_clear(struct xe_gsc *gsc) +{ + /* The status bit is cleared by writing to it */ + __gsc_proxy_irq_rmw(gsc, 0, HECI_H_CSR_IS); +} + +static void gsc_proxy_irq_toggle(struct xe_gsc *gsc, bool enabled) +{ + u32 set = enabled ? HECI_H_CSR_IE : 0; + u32 clr = enabled ? 0 : HECI_H_CSR_IE; + + __gsc_proxy_irq_rmw(gsc, clr, set); +} + +static int proxy_send_to_csme(struct xe_gsc *gsc, u32 size) +{ + struct xe_gt *gt = gsc_to_gt(gsc); + struct i915_gsc_proxy_component *comp = gsc->proxy.component; + int ret; + + ret = comp->ops->send(comp->mei_dev, gsc->proxy.to_csme, size); + if (ret < 0) { + xe_gt_err(gt, "Failed to send CSME proxy message\n"); + return ret; + } + + ret = comp->ops->recv(comp->mei_dev, gsc->proxy.from_csme, GSC_PROXY_BUFFER_SIZE); + if (ret < 0) { + xe_gt_err(gt, "Failed to receive CSME proxy message\n"); + return ret; + } + + return ret; +} + +static int proxy_send_to_gsc(struct xe_gsc *gsc, u32 size) +{ + struct xe_gt *gt = gsc_to_gt(gsc); + u64 addr_in = xe_bo_ggtt_addr(gsc->proxy.bo); + u64 addr_out = addr_in + GSC_PROXY_BUFFER_SIZE; + int err; + + /* the message must contain at least the gsc and proxy headers */ + if (size > GSC_PROXY_BUFFER_SIZE) { + xe_gt_err(gt, "Invalid GSC proxy message size: %u\n", size); + return -EINVAL; + } + + err = xe_gsc_pkt_submit_kernel(gsc, addr_in, size, + addr_out, GSC_PROXY_BUFFER_SIZE); + if (err) { + xe_gt_err(gt, "Failed to submit gsc proxy rq (%pe)\n", ERR_PTR(err)); + return err; + } + + return 0; +} + +static int validate_proxy_header(struct xe_gsc_proxy_header *header, + u32 source, u32 dest, u32 max_size) +{ + u32 type = FIELD_GET(GSC_PROXY_TYPE, header->hdr); + u32 length = FIELD_GET(GSC_PROXY_PAYLOAD_LENGTH, header->hdr); + + if (header->destination != dest || header->source != source) + return -ENOEXEC; + + if (length + PROXY_HDR_SIZE > max_size) + return -E2BIG; + + switch (type) { + case GSC_PROXY_MSG_TYPE_PROXY_PAYLOAD: + if (length > 0) + break; + fallthrough; + case GSC_PROXY_MSG_TYPE_PROXY_INVALID: + return -EIO; + default: + break; + } + + return 0; +} + +#define proxy_header_wr(xe_, map_, offset_, field_, val_) \ + xe_map_wr_field(xe_, map_, offset_, struct xe_gsc_proxy_header, field_, val_) + +#define proxy_header_rd(xe_, map_, offset_, field_) \ + xe_map_rd_field(xe_, map_, offset_, struct xe_gsc_proxy_header, field_) + +static u32 emit_proxy_header(struct xe_device *xe, struct iosys_map *map, u32 offset) +{ + xe_map_memset(xe, map, offset, 0, PROXY_HDR_SIZE); + + proxy_header_wr(xe, map, offset, hdr, + FIELD_PREP(GSC_PROXY_TYPE, GSC_PROXY_MSG_TYPE_PROXY_QUERY) | + FIELD_PREP(GSC_PROXY_PAYLOAD_LENGTH, 0)); + + proxy_header_wr(xe, map, offset, source, GSC_PROXY_ADDRESSING_KMD); + proxy_header_wr(xe, map, offset, destination, GSC_PROXY_ADDRESSING_GSC); + proxy_header_wr(xe, map, offset, status, 0); + + return offset + PROXY_HDR_SIZE; +} + +static int proxy_query(struct xe_gsc *gsc) +{ + struct xe_gt *gt = gsc_to_gt(gsc); + struct xe_device *xe = gt_to_xe(gt); + struct xe_gsc_proxy_header *to_csme_hdr = gsc->proxy.to_csme; + void *to_csme_payload = gsc->proxy.to_csme + PROXY_HDR_SIZE; + u32 wr_offset; + u32 reply_offset; + u32 size; + int ret; + + wr_offset = xe_gsc_emit_header(xe, &gsc->proxy.to_gsc, 0, + HECI_MEADDRESS_PROXY, 0, PROXY_HDR_SIZE); + wr_offset = emit_proxy_header(xe, &gsc->proxy.to_gsc, wr_offset); + + size = wr_offset; + + while (1) { + /* + * Poison the GSC response header space to make sure we don't + * read a stale reply. + */ + xe_gsc_poison_header(xe, &gsc->proxy.from_gsc, 0); + + /* send proxy message to GSC */ + ret = proxy_send_to_gsc(gsc, size); + if (ret) + goto proxy_error; + + /* check the reply from GSC */ + ret = xe_gsc_read_out_header(xe, &gsc->proxy.from_gsc, 0, + PROXY_HDR_SIZE, &reply_offset); + if (ret) { + xe_gt_err(gt, "Invalid gsc header in proxy reply (%pe)\n", + ERR_PTR(ret)); + goto proxy_error; + } + + /* copy the proxy header reply from GSC */ + xe_map_memcpy_from(xe, to_csme_hdr, &gsc->proxy.from_gsc, + reply_offset, PROXY_HDR_SIZE); + + /* stop if this was the last message */ + if (FIELD_GET(GSC_PROXY_TYPE, to_csme_hdr->hdr) == GSC_PROXY_MSG_TYPE_PROXY_END) + break; + + /* make sure the GSC-to-CSME proxy header is sane */ + ret = validate_proxy_header(to_csme_hdr, + GSC_PROXY_ADDRESSING_GSC, + GSC_PROXY_ADDRESSING_CSME, + GSC_PROXY_BUFFER_SIZE - reply_offset); + if (ret) { + xe_gt_err(gt, "invalid GSC to CSME proxy header! (%pe)\n", + ERR_PTR(ret)); + goto proxy_error; + } + + /* copy the rest of the message */ + size = FIELD_GET(GSC_PROXY_PAYLOAD_LENGTH, to_csme_hdr->hdr); + xe_map_memcpy_from(xe, to_csme_payload, &gsc->proxy.from_gsc, + reply_offset + PROXY_HDR_SIZE, size); + + /* send the GSC message to the CSME */ + ret = proxy_send_to_csme(gsc, size + PROXY_HDR_SIZE); + if (ret < 0) + goto proxy_error; + + /* reply size from CSME, including the proxy header */ + size = ret; + if (size < PROXY_HDR_SIZE) { + xe_gt_err(gt, "CSME to GSC proxy msg too small: 0x%x\n", size); + ret = -EPROTO; + goto proxy_error; + } + + /* make sure the CSME-to-GSC proxy header is sane */ + ret = validate_proxy_header(gsc->proxy.from_csme, + GSC_PROXY_ADDRESSING_CSME, + GSC_PROXY_ADDRESSING_GSC, + GSC_PROXY_BUFFER_SIZE - reply_offset); + if (ret) { + xe_gt_err(gt, "invalid CSME to GSC proxy header! %d\n", ret); + goto proxy_error; + } + + /* Emit a new header for sending the reply to the GSC */ + wr_offset = xe_gsc_emit_header(xe, &gsc->proxy.to_gsc, 0, + HECI_MEADDRESS_PROXY, 0, size); + + /* copy the CSME reply and update the total msg size to include the GSC header */ + xe_map_memcpy_to(xe, &gsc->proxy.to_gsc, wr_offset, gsc->proxy.from_csme, size); + + size += wr_offset; + } + +proxy_error: + return ret < 0 ? ret : 0; +} + +int xe_gsc_proxy_request_handler(struct xe_gsc *gsc) +{ + struct xe_gt *gt = gsc_to_gt(gsc); + int slept; + int err; + + if (!gsc->proxy.component_added) + return -ENODEV; + + /* when GSC is loaded, we can queue this before the component is bound */ + for (slept = 0; slept < GSC_PROXY_INIT_TIMEOUT_MS; slept += 100) { + if (gsc->proxy.component) + break; + + msleep(100); + } + + mutex_lock(&gsc->proxy.mutex); + if (!gsc->proxy.component) { + xe_gt_err(gt, "GSC proxy component not bound!\n"); + err = -EIO; + } else { + /* + * clear the pending interrupt and allow new proxy requests to + * be generated while we handle the current one + */ + gsc_proxy_irq_clear(gsc); + err = proxy_query(gsc); + } + mutex_unlock(&gsc->proxy.mutex); + return err; +} + +void xe_gsc_proxy_irq_handler(struct xe_gsc *gsc, u32 iir) +{ + struct xe_gt *gt = gsc_to_gt(gsc); + + if (unlikely(!iir)) + return; + + if (!gsc->proxy.component) { + xe_gt_err(gt, "GSC proxy irq received without the component being bound!\n"); + return; + } + + spin_lock(&gsc->lock); + gsc->work_actions |= GSC_ACTION_SW_PROXY; + spin_unlock(&gsc->lock); + + queue_work(gsc->wq, &gsc->work); +} + +static int xe_gsc_proxy_component_bind(struct device *xe_kdev, + struct device *mei_kdev, void *data) +{ + struct xe_device *xe = kdev_to_xe(xe_kdev); + struct xe_gt *gt = xe->tiles[0].media_gt; + struct xe_gsc *gsc = >->uc.gsc; + + mutex_lock(&gsc->proxy.mutex); + gsc->proxy.component = data; + gsc->proxy.component->mei_dev = mei_kdev; + mutex_unlock(&gsc->proxy.mutex); + + return 0; +} + +static void xe_gsc_proxy_component_unbind(struct device *xe_kdev, + struct device *mei_kdev, void *data) +{ + struct xe_device *xe = kdev_to_xe(xe_kdev); + struct xe_gt *gt = xe->tiles[0].media_gt; + struct xe_gsc *gsc = >->uc.gsc; + + xe_gsc_wait_for_worker_completion(gsc); + + mutex_lock(&gsc->proxy.mutex); + gsc->proxy.component = NULL; + mutex_unlock(&gsc->proxy.mutex); +} + +static const struct component_ops xe_gsc_proxy_component_ops = { + .bind = xe_gsc_proxy_component_bind, + .unbind = xe_gsc_proxy_component_unbind, +}; + +static void proxy_channel_free(struct drm_device *drm, void *arg) +{ + struct xe_gsc *gsc = arg; + + if (!gsc->proxy.bo) + return; + + if (gsc->proxy.to_csme) { + kfree(gsc->proxy.to_csme); + gsc->proxy.to_csme = NULL; + gsc->proxy.from_csme = NULL; + } + + if (gsc->proxy.bo) { + iosys_map_clear(&gsc->proxy.to_gsc); + iosys_map_clear(&gsc->proxy.from_gsc); + xe_bo_unpin_map_no_vm(gsc->proxy.bo); + gsc->proxy.bo = NULL; + } +} + +static int proxy_channel_alloc(struct xe_gsc *gsc) +{ + struct xe_gt *gt = gsc_to_gt(gsc); + struct xe_tile *tile = gt_to_tile(gt); + struct xe_device *xe = gt_to_xe(gt); + struct xe_bo *bo; + void *csme; + int err; + + csme = kzalloc(GSC_PROXY_CHANNEL_SIZE, GFP_KERNEL); + if (!csme) + return -ENOMEM; + + bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_PROXY_CHANNEL_SIZE, + ttm_bo_type_kernel, + XE_BO_CREATE_SYSTEM_BIT | + XE_BO_CREATE_GGTT_BIT); + if (IS_ERR(bo)) { + kfree(csme); + return PTR_ERR(bo); + } + + gsc->proxy.bo = bo; + gsc->proxy.to_gsc = IOSYS_MAP_INIT_OFFSET(&bo->vmap, 0); + gsc->proxy.from_gsc = IOSYS_MAP_INIT_OFFSET(&bo->vmap, GSC_PROXY_BUFFER_SIZE); + gsc->proxy.to_csme = csme; + gsc->proxy.from_csme = csme + GSC_PROXY_BUFFER_SIZE; + + err = drmm_add_action_or_reset(&xe->drm, proxy_channel_free, gsc); + if (err) + return err; + + return 0; +} + +/** + * xe_gsc_proxy_init() - init objects and MEI component required by GSC proxy + * @gsc: the GSC uC + * + * Return: 0 if the initialization was successful, a negative errno otherwise. + */ +int xe_gsc_proxy_init(struct xe_gsc *gsc) +{ + int err; + struct xe_gt *gt = gsc_to_gt(gsc); + struct xe_tile *tile = gt_to_tile(gt); + struct xe_device *xe = tile_to_xe(tile); + + mutex_init(&gsc->proxy.mutex); + + if (!IS_ENABLED(CONFIG_INTEL_MEI_GSC_PROXY)) { + xe_gt_info(gt, "can't init GSC proxy due to missing mei component\n"); + return -ENODEV; + } + + /* no multi-tile devices with this feature yet */ + if (tile->id > 0) { + xe_gt_err(gt, "unexpected GSC proxy init on tile %u\n", tile->id); + return -EINVAL; + } + + err = proxy_channel_alloc(gsc); + if (err) + return err; + + err = component_add_typed(xe->drm.dev, &xe_gsc_proxy_component_ops, + I915_COMPONENT_GSC_PROXY); + if (err < 0) { + xe_gt_err(gt, "Failed to add GSC_PROXY component (%pe)\n", ERR_PTR(err)); + return err; + } + + gsc->proxy.component_added = true; + + /* the component must be removed before unload, so can't use drmm for cleanup */ + + return 0; +} + +/** + * xe_gsc_proxy_remove() - remove the GSC proxy MEI component + * @gsc: the GSC uC + */ +void xe_gsc_proxy_remove(struct xe_gsc *gsc) +{ + struct xe_gt *gt = gsc_to_gt(gsc); + struct xe_device *xe = gt_to_xe(gt); + int err = 0; + + if (!gsc->proxy.component_added) + return; + + /* disable HECI2 IRQs */ + xe_pm_runtime_get(xe); + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC); + if (err) + xe_gt_err(gt, "failed to get forcewake to disable GSC interrupts\n"); + + /* try do disable irq even if forcewake failed */ + gsc_proxy_irq_toggle(gsc, false); + + if (!err) + xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC); + xe_pm_runtime_put(xe); + + xe_gsc_wait_for_worker_completion(gsc); + + component_del(xe->drm.dev, &xe_gsc_proxy_component_ops); + gsc->proxy.component_added = false; +} + +/** + * xe_gsc_proxy_start() - start the proxy by submitting the first request + * @gsc: the GSC uC + * + * Return: 0 if the proxy are now enabled, a negative errno otherwise. + */ +int xe_gsc_proxy_start(struct xe_gsc *gsc) +{ + int err; + + /* enable the proxy interrupt in the GSC shim layer */ + gsc_proxy_irq_toggle(gsc, true); + + /* + * The handling of the first proxy request must be manually triggered to + * notify the GSC that we're ready to support the proxy flow. + */ + err = xe_gsc_proxy_request_handler(gsc); + if (err) + return err; + + if (!gsc_proxy_init_done(gsc)) { + xe_gt_err(gsc_to_gt(gsc), "GSC FW reports proxy init not completed\n"); + return -EIO; + } + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.h b/drivers/gpu/drm/xe/xe_gsc_proxy.h new file mode 100644 index 0000000000..908f9441f0 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GSC_PROXY_H_ +#define _XE_GSC_PROXY_H_ + +#include <linux/types.h> + +struct xe_gsc; + +int xe_gsc_proxy_init(struct xe_gsc *gsc); +void xe_gsc_proxy_remove(struct xe_gsc *gsc); +int xe_gsc_proxy_start(struct xe_gsc *gsc); + +int xe_gsc_proxy_request_handler(struct xe_gsc *gsc); +void xe_gsc_proxy_irq_handler(struct xe_gsc *gsc, u32 iir); + +#endif diff --git a/drivers/gpu/drm/xe/xe_gsc_submit.c b/drivers/gpu/drm/xe/xe_gsc_submit.c index 8c5381e591..348994b271 100644 --- a/drivers/gpu/drm/xe/xe_gsc_submit.c +++ b/drivers/gpu/drm/xe/xe_gsc_submit.c @@ -5,6 +5,8 @@ #include "xe_gsc_submit.h" +#include <linux/poison.h> + #include "abi/gsc_command_header_abi.h" #include "xe_bb.h" #include "xe_exec_queue.h" @@ -69,6 +71,17 @@ u32 xe_gsc_emit_header(struct xe_device *xe, struct iosys_map *map, u32 offset, }; /** + * xe_gsc_poison_header - poison the MTL GSC header in memory + * @xe: the Xe device + * @map: the iosys map to write to + * @offset: offset from the start of the map at which the header resides + */ +void xe_gsc_poison_header(struct xe_device *xe, struct iosys_map *map, u32 offset) +{ + xe_map_memset(xe, map, offset, POISON_FREE, GSC_HDR_SIZE); +}; + +/** * xe_gsc_check_and_update_pending - check the pending bit and update the input * header with the retry handle from the output header * @xe: the Xe device @@ -112,11 +125,18 @@ int xe_gsc_read_out_header(struct xe_device *xe, { u32 marker = mtl_gsc_header_rd(xe, map, offset, validity_marker); u32 size = mtl_gsc_header_rd(xe, map, offset, message_size); + u32 status = mtl_gsc_header_rd(xe, map, offset, status); u32 payload_size = size - GSC_HDR_SIZE; if (marker != GSC_HECI_VALIDITY_MARKER) return -EPROTO; + if (status != 0) { + drm_err(&xe->drm, "GSC header readout indicates error: %d\n", + status); + return -EINVAL; + } + if (size < GSC_HDR_SIZE || payload_size < min_payload_size) return -ENODATA; diff --git a/drivers/gpu/drm/xe/xe_gsc_submit.h b/drivers/gpu/drm/xe/xe_gsc_submit.h index 0801da5d44..1939855031 100644 --- a/drivers/gpu/drm/xe/xe_gsc_submit.h +++ b/drivers/gpu/drm/xe/xe_gsc_submit.h @@ -14,6 +14,7 @@ struct xe_gsc; u32 xe_gsc_emit_header(struct xe_device *xe, struct iosys_map *map, u32 offset, u8 heci_client_id, u64 host_session_id, u32 payload_size); +void xe_gsc_poison_header(struct xe_device *xe, struct iosys_map *map, u32 offset); bool xe_gsc_check_and_update_pending(struct xe_device *xe, struct iosys_map *in, u32 offset_in, diff --git a/drivers/gpu/drm/xe/xe_gsc_types.h b/drivers/gpu/drm/xe/xe_gsc_types.h index 57fefd66a7..138d8cc0f1 100644 --- a/drivers/gpu/drm/xe/xe_gsc_types.h +++ b/drivers/gpu/drm/xe/xe_gsc_types.h @@ -6,12 +6,17 @@ #ifndef _XE_GSC_TYPES_H_ #define _XE_GSC_TYPES_H_ +#include <linux/iosys-map.h> +#include <linux/mutex.h> +#include <linux/spinlock.h> +#include <linux/types.h> #include <linux/workqueue.h> #include "xe_uc_fw_types.h" struct xe_bo; struct xe_exec_queue; +struct i915_gsc_proxy_component; /** * struct xe_gsc - GSC @@ -34,6 +39,34 @@ struct xe_gsc { /** @work: delayed load and proxy handling work */ struct work_struct work; + + /** @lock: protects access to the work_actions mask */ + spinlock_t lock; + + /** @work_actions: mask of actions to be performed in the work */ + u32 work_actions; +#define GSC_ACTION_FW_LOAD BIT(0) +#define GSC_ACTION_SW_PROXY BIT(1) + + /** @proxy: sub-structure containing the SW proxy-related variables */ + struct { + /** @proxy.component: struct for communication with mei component */ + struct i915_gsc_proxy_component *component; + /** @proxy.mutex: protects the component binding and usage */ + struct mutex mutex; + /** @proxy.component_added: whether the component has been added */ + bool component_added; + /** @proxy.bo: object to store message to and from the GSC */ + struct xe_bo *bo; + /** @proxy.to_gsc: map of the memory used to send messages to the GSC */ + struct iosys_map to_gsc; + /** @proxy.from_gsc: map of the memory used to recv messages from the GSC */ + struct iosys_map from_gsc; + /** @proxy.to_csme: pointer to the memory used to send messages to CSME */ + void *to_csme; + /** @proxy.from_csme: pointer to the memory used to recv messages from CSME */ + void *from_csme; + } proxy; }; #endif diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index d23ee1397c..f9705430ad 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -78,6 +78,19 @@ void xe_gt_sanitize(struct xe_gt *gt) gt->uc.guc.submission_state.enabled = false; } +/** + * xe_gt_remove() - Clean up the GT structures before driver removal + * @gt: the GT object + * + * This function should only act on objects/structures that must be cleaned + * before the driver removal callback is complete and therefore can't be + * deferred to a drmm action. + */ +void xe_gt_remove(struct xe_gt *gt) +{ + xe_uc_remove(>->uc); +} + static void gt_fini(struct drm_device *drm, void *arg) { struct xe_gt *gt = arg; @@ -235,7 +248,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt) return -ENOMEM; q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance), 1, - hwe, EXEC_QUEUE_FLAG_KERNEL); + hwe, EXEC_QUEUE_FLAG_KERNEL, 0); if (IS_ERR(q)) { err = PTR_ERR(q); xe_gt_err(gt, "hwe %s: xe_exec_queue_create failed (%pe)\n", @@ -252,7 +265,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt) } nop_q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance), - 1, hwe, EXEC_QUEUE_FLAG_KERNEL); + 1, hwe, EXEC_QUEUE_FLAG_KERNEL, 0); if (IS_ERR(nop_q)) { err = PTR_ERR(nop_q); xe_gt_err(gt, "hwe %s: nop xe_exec_queue_create failed (%pe)\n", @@ -301,9 +314,6 @@ int xe_gt_init_early(struct xe_gt *gt) if (err) return err; - xe_gt_topology_init(gt); - xe_gt_mcr_init(gt); - err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); if (err) return err; @@ -327,7 +337,7 @@ static void dump_pat_on_error(struct xe_gt *gt) char prefix[32]; snprintf(prefix, sizeof(prefix), "[GT%u Error]", gt->info.id); - p = drm_debug_printer(prefix); + p = drm_dbg_printer(>_to_xe(gt)->drm, DRM_UT_DRIVER, prefix); xe_pat_dump(gt, &p); } @@ -341,8 +351,6 @@ static int gt_fw_domain_init(struct xe_gt *gt) if (err) goto err_hw_fence_irq; - xe_pat_init(gt); - if (!xe_gt_is_media_type(gt)) { err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt); if (err) @@ -351,22 +359,8 @@ static int gt_fw_domain_init(struct xe_gt *gt) xe_lmtt_init(>_to_tile(gt)->sriov.pf.lmtt); } - err = xe_uc_init(>->uc); - if (err) - goto err_force_wake; - - /* Raise GT freq to speed up HuC/GuC load */ - xe_guc_pc_init_early(>->uc.guc.pc); - - err = xe_uc_init_hwconfig(>->uc); - if (err) - goto err_force_wake; - xe_gt_idle_sysfs_init(>->gtidle); - /* XXX: Fake that we pull the engine mask from hwconfig blob */ - gt->info.engine_mask = gt->info.__engine_mask; - /* Enable per hw engine IRQs */ xe_irq_enable_hwe(gt); @@ -388,6 +382,12 @@ static int gt_fw_domain_init(struct xe_gt *gt) if (err) goto err_force_wake; + /* + * Stash hardware-reported version. Since this register does not exist + * on pre-MTL platforms, reading it there will (correctly) return 0. + */ + gt->info.gmdid = xe_mmio_read32(gt, GMD_ID); + err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); XE_WARN_ON(err); xe_device_mem_access_put(gt_to_xe(gt)); @@ -430,10 +430,6 @@ static int all_fw_domain_init(struct xe_gt *gt) if (err) goto err_force_wake; - err = xe_uc_init_post_hwconfig(>->uc); - if (err) - goto err_force_wake; - if (!xe_gt_is_media_type(gt)) { /* * USM has its only SA pool to non-block behind user operations @@ -460,6 +456,10 @@ static int all_fw_domain_init(struct xe_gt *gt) } } + err = xe_uc_init_post_hwconfig(>->uc); + if (err) + goto err_force_wake; + err = xe_uc_init_hw(>->uc); if (err) goto err_force_wake; @@ -489,6 +489,42 @@ err_hw_fence_irq: return err; } +/* + * Initialize enough GT to be able to load GuC in order to obtain hwconfig and + * enable CTB communication. + */ +int xe_gt_init_hwconfig(struct xe_gt *gt) +{ + int err; + + xe_device_mem_access_get(gt_to_xe(gt)); + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + goto out; + + xe_gt_topology_init(gt); + xe_gt_mcr_init(gt); + xe_pat_init(gt); + + err = xe_uc_init(>->uc); + if (err) + goto out_fw; + + err = xe_uc_init_hwconfig(>->uc); + if (err) + goto out_fw; + + /* XXX: Fake that we pull the engine mask from hwconfig blob */ + gt->info.engine_mask = gt->info.__engine_mask; + +out_fw: + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); +out: + xe_device_mem_access_put(gt_to_xe(gt)); + + return err; +} + int xe_gt_init(struct xe_gt *gt) { int err; @@ -624,12 +660,12 @@ static int gt_reset(struct xe_gt *gt) if (err) goto err_out; + xe_gt_tlb_invalidation_reset(gt); + err = do_gt_reset(gt); if (err) goto err_out; - xe_gt_tlb_invalidation_reset(gt); - err = do_gt_restart(gt); if (err) goto err_out; diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index 4486e083f5..ed6ea8057e 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -33,6 +33,7 @@ static inline bool xe_fault_inject_gt_reset(void) #endif struct xe_gt *xe_gt_alloc(struct xe_tile *tile); +int xe_gt_init_hwconfig(struct xe_gt *gt); int xe_gt_init_early(struct xe_gt *gt); int xe_gt_init(struct xe_gt *gt); int xe_gt_record_default_lrcs(struct xe_gt *gt); @@ -41,6 +42,7 @@ int xe_gt_suspend(struct xe_gt *gt); int xe_gt_resume(struct xe_gt *gt); void xe_gt_reset_async(struct xe_gt *gt); void xe_gt_sanitize(struct xe_gt *gt); +void xe_gt_remove(struct xe_gt *gt); /** * xe_gt_any_hw_engine_by_reset_domain - scan the list of engines and return the diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index 9fcae65b64..e7a39ad7ad 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -126,6 +126,13 @@ static const struct attribute *gt_idle_attrs[] = { static void gt_idle_sysfs_fini(struct drm_device *drm, void *arg) { struct kobject *kobj = arg; + struct xe_gt *gt = kobj_to_gt(kobj->parent); + + if (gt_to_xe(gt)->info.skip_guc_pc) { + XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FW_GT)); + xe_gt_idle_disable_c6(gt); + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + } sysfs_remove_files(kobj, gt_idle_attrs); kobject_put(kobj); @@ -184,7 +191,7 @@ void xe_gt_idle_enable_c6(struct xe_gt *gt) void xe_gt_idle_disable_c6(struct xe_gt *gt) { xe_device_assert_mem_access(gt_to_xe(gt)); - xe_force_wake_assert_held(gt_to_fw(gt), XE_FORCEWAKE_ALL); + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); xe_mmio_write32(gt, PG_ENABLE, 0); xe_mmio_write32(gt, RC_CONTROL, 0); diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index 8546cd3cc5..a7ab9ba645 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -10,6 +10,7 @@ #include "xe_gt_topology.h" #include "xe_gt_types.h" #include "xe_mmio.h" +#include "xe_sriov.h" /** * DOC: GT Multicast/Replicated (MCR) Register Support @@ -38,6 +39,8 @@ * ``init_steering_*()`` functions is to apply the platform-specific rules for * each MCR register type to identify a steering target that will select a * non-terminated instance. + * + * MCR registers are not available on Virtual Function (VF). */ #define STEER_SEMAPHORE XE_REG(0xFD0) @@ -352,6 +355,9 @@ void xe_gt_mcr_init(struct xe_gt *gt) BUILD_BUG_ON(IMPLICIT_STEERING + 1 != NUM_STEERING_TYPES); BUILD_BUG_ON(ARRAY_SIZE(xe_steering_types) != NUM_STEERING_TYPES); + if (IS_SRIOV_VF(xe)) + return; + spin_lock_init(>->mcr_lock); if (gt->info.type == XE_GT_TYPE_MEDIA) { @@ -405,6 +411,9 @@ void xe_gt_mcr_set_implicit_defaults(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); + if (IS_SRIOV_VF(xe)) + return; + if (xe->info.platform == XE_DG2) { u32 steer_val = REG_FIELD_PREP(MCR_SLICE_MASK, 0) | REG_FIELD_PREP(MCR_SUBSLICE_MASK, 2); @@ -588,6 +597,8 @@ u32 xe_gt_mcr_unicast_read_any(struct xe_gt *gt, struct xe_reg_mcr reg_mcr) u32 val; bool steer; + xe_gt_assert(gt, !IS_SRIOV_VF(gt_to_xe(gt))); + steer = xe_gt_mcr_get_nonterminated_steering(gt, reg_mcr, &group, &instance); @@ -619,6 +630,8 @@ u32 xe_gt_mcr_unicast_read(struct xe_gt *gt, { u32 val; + xe_gt_assert(gt, !IS_SRIOV_VF(gt_to_xe(gt))); + mcr_lock(gt); val = rw_with_mcr_steering(gt, reg_mcr, MCR_OP_READ, group, instance, 0); mcr_unlock(gt); @@ -640,6 +653,8 @@ u32 xe_gt_mcr_unicast_read(struct xe_gt *gt, void xe_gt_mcr_unicast_write(struct xe_gt *gt, struct xe_reg_mcr reg_mcr, u32 value, int group, int instance) { + xe_gt_assert(gt, !IS_SRIOV_VF(gt_to_xe(gt))); + mcr_lock(gt); rw_with_mcr_steering(gt, reg_mcr, MCR_OP_WRITE, group, instance, value); mcr_unlock(gt); @@ -658,6 +673,8 @@ void xe_gt_mcr_multicast_write(struct xe_gt *gt, struct xe_reg_mcr reg_mcr, { struct xe_reg reg = to_xe_reg(reg_mcr); + xe_gt_assert(gt, !IS_SRIOV_VF(gt_to_xe(gt))); + /* * Synchronize with any unicast operations. Once we have exclusive * access, the MULTICAST bit should already be set, so there's no need diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index ab8536c4fd..fa9e9853c5 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -100,10 +100,9 @@ static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma, { struct xe_bo *bo = xe_vma_bo(vma); struct xe_vm *vm = xe_vma_vm(vma); - unsigned int num_shared = 2; /* slots for bind + move */ int err; - err = xe_vm_prepare_vma(exec, vma, num_shared); + err = xe_vm_lock_vma(exec, vma); if (err) return err; @@ -287,9 +286,9 @@ static bool get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf) bool ret = false; spin_lock_irq(&pf_queue->lock); - if (pf_queue->head != pf_queue->tail) { + if (pf_queue->tail != pf_queue->head) { desc = (const struct xe_guc_pagefault_desc *) - (pf_queue->data + pf_queue->head); + (pf_queue->data + pf_queue->tail); pf->fault_level = FIELD_GET(PFD_FAULT_LEVEL, desc->dw0); pf->trva_fault = FIELD_GET(XE2_PFD_TRVA_FAULT, desc->dw0); @@ -307,7 +306,7 @@ static bool get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf) pf->page_addr |= FIELD_GET(PFD_VIRTUAL_ADDR_LO, desc->dw2) << PFD_VIRTUAL_ADDR_LO_SHIFT; - pf_queue->head = (pf_queue->head + PF_MSG_LEN_DW) % + pf_queue->tail = (pf_queue->tail + PF_MSG_LEN_DW) % PF_QUEUE_NUM_DW; ret = true; } @@ -320,7 +319,7 @@ static bool pf_queue_full(struct pf_queue *pf_queue) { lockdep_assert_held(&pf_queue->lock); - return CIRC_SPACE(pf_queue->tail, pf_queue->head, PF_QUEUE_NUM_DW) <= + return CIRC_SPACE(pf_queue->head, pf_queue->tail, PF_QUEUE_NUM_DW) <= PF_MSG_LEN_DW; } @@ -333,6 +332,11 @@ int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len) u32 asid; bool full; + /* + * The below logic doesn't work unless PF_QUEUE_NUM_DW % PF_MSG_LEN_DW == 0 + */ + BUILD_BUG_ON(PF_QUEUE_NUM_DW % PF_MSG_LEN_DW); + if (unlikely(len != PF_MSG_LEN_DW)) return -EPROTO; @@ -342,8 +346,8 @@ int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len) spin_lock_irqsave(&pf_queue->lock, flags); full = pf_queue_full(pf_queue); if (!full) { - memcpy(pf_queue->data + pf_queue->tail, msg, len * sizeof(u32)); - pf_queue->tail = (pf_queue->tail + len) % PF_QUEUE_NUM_DW; + memcpy(pf_queue->data + pf_queue->head, msg, len * sizeof(u32)); + pf_queue->head = (pf_queue->head + len) % PF_QUEUE_NUM_DW; queue_work(gt->usm.pf_wq, &pf_queue->worker); } else { drm_warn(&xe->drm, "PF Queue full, shouldn't be possible"); @@ -389,7 +393,7 @@ static void pf_queue_work_func(struct work_struct *w) send_pagefault_reply(>->uc.guc, &reply); if (time_after(jiffies, threshold) && - pf_queue->head != pf_queue->tail) { + pf_queue->tail != pf_queue->head) { queue_work(gt->usm.pf_wq, w); break; } @@ -564,9 +568,9 @@ static bool get_acc(struct acc_queue *acc_queue, struct acc *acc) bool ret = false; spin_lock(&acc_queue->lock); - if (acc_queue->head != acc_queue->tail) { + if (acc_queue->tail != acc_queue->head) { desc = (const struct xe_guc_acc_desc *) - (acc_queue->data + acc_queue->head); + (acc_queue->data + acc_queue->tail); acc->granularity = FIELD_GET(ACC_GRANULARITY, desc->dw2); acc->sub_granularity = FIELD_GET(ACC_SUBG_HI, desc->dw1) << 31 | @@ -579,7 +583,7 @@ static bool get_acc(struct acc_queue *acc_queue, struct acc *acc) acc->va_range_base = make_u64(desc->dw3 & ACC_VIRTUAL_ADDR_RANGE_HI, desc->dw2 & ACC_VIRTUAL_ADDR_RANGE_LO); - acc_queue->head = (acc_queue->head + ACC_MSG_LEN_DW) % + acc_queue->tail = (acc_queue->tail + ACC_MSG_LEN_DW) % ACC_QUEUE_NUM_DW; ret = true; } @@ -607,7 +611,7 @@ static void acc_queue_work_func(struct work_struct *w) } if (time_after(jiffies, threshold) && - acc_queue->head != acc_queue->tail) { + acc_queue->tail != acc_queue->head) { queue_work(gt->usm.acc_wq, w); break; } @@ -618,7 +622,7 @@ static bool acc_queue_full(struct acc_queue *acc_queue) { lockdep_assert_held(&acc_queue->lock); - return CIRC_SPACE(acc_queue->tail, acc_queue->head, ACC_QUEUE_NUM_DW) <= + return CIRC_SPACE(acc_queue->head, acc_queue->tail, ACC_QUEUE_NUM_DW) <= ACC_MSG_LEN_DW; } @@ -629,6 +633,11 @@ int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len) u32 asid; bool full; + /* + * The below logic doesn't work unless ACC_QUEUE_NUM_DW % ACC_MSG_LEN_DW == 0 + */ + BUILD_BUG_ON(ACC_QUEUE_NUM_DW % ACC_MSG_LEN_DW); + if (unlikely(len != ACC_MSG_LEN_DW)) return -EPROTO; @@ -638,9 +647,9 @@ int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len) spin_lock(&acc_queue->lock); full = acc_queue_full(acc_queue); if (!full) { - memcpy(acc_queue->data + acc_queue->tail, msg, + memcpy(acc_queue->data + acc_queue->head, msg, len * sizeof(u32)); - acc_queue->tail = (acc_queue->tail + len) % ACC_QUEUE_NUM_DW; + acc_queue->head = (acc_queue->head + len) % ACC_QUEUE_NUM_DW; queue_work(gt->usm.acc_wq, &acc_queue->worker); } else { drm_warn(>_to_xe(gt)->drm, "ACC Queue full, dropping ACC"); diff --git a/drivers/gpu/drm/xe/xe_gt_printk.h b/drivers/gpu/drm/xe/xe_gt_printk.h index 5991bcadd4..c2b004d3f4 100644 --- a/drivers/gpu/drm/xe/xe_gt_printk.h +++ b/drivers/gpu/drm/xe/xe_gt_printk.h @@ -43,4 +43,48 @@ #define xe_gt_WARN_ON_ONCE(_gt, _condition) \ xe_gt_WARN_ONCE((_gt), _condition, "%s(%s)", "gt_WARN_ON_ONCE", __stringify(_condition)) +static inline void __xe_gt_printfn_err(struct drm_printer *p, struct va_format *vaf) +{ + struct xe_gt *gt = p->arg; + + xe_gt_err(gt, "%pV", vaf); +} + +static inline void __xe_gt_printfn_info(struct drm_printer *p, struct va_format *vaf) +{ + struct xe_gt *gt = p->arg; + + xe_gt_info(gt, "%pV", vaf); +} + +/** + * xe_gt_err_printer - Construct a &drm_printer that outputs to xe_gt_err() + * @gt: the &xe_gt pointer to use in xe_gt_err() + * + * Return: The &drm_printer object. + */ +static inline struct drm_printer xe_gt_err_printer(struct xe_gt *gt) +{ + struct drm_printer p = { + .printfn = __xe_gt_printfn_err, + .arg = gt, + }; + return p; +} + +/** + * xe_gt_info_printer - Construct a &drm_printer that outputs to xe_gt_info() + * @gt: the &xe_gt pointer to use in xe_gt_info() + * + * Return: The &drm_printer object. + */ +static inline struct drm_printer xe_gt_info_printer(struct xe_gt *gt) +{ + struct drm_printer p = { + .printfn = __xe_gt_printfn_info, + .arg = gt, + }; + return p; +} + #endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_printk.h b/drivers/gpu/drm/xe/xe_gt_sriov_printk.h new file mode 100644 index 0000000000..17624b1630 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_printk.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GT_SRIOV_PRINTK_H_ +#define _XE_GT_SRIOV_PRINTK_H_ + +#include "xe_gt_printk.h" +#include "xe_sriov_printk.h" + +#define __xe_gt_sriov_printk(gt, _level, fmt, ...) \ + xe_gt_printk((gt), _level, "%s" fmt, xe_sriov_printk_prefix(gt_to_xe(gt)), ##__VA_ARGS__) + +#define xe_gt_sriov_err(_gt, _fmt, ...) \ + __xe_gt_sriov_printk(_gt, err, _fmt, ##__VA_ARGS__) + +#define xe_gt_sriov_notice(_gt, _fmt, ...) \ + __xe_gt_sriov_printk(_gt, notice, _fmt, ##__VA_ARGS__) + +#define xe_gt_sriov_info(_gt, _fmt, ...) \ + __xe_gt_sriov_printk(_gt, info, _fmt, ##__VA_ARGS__) + +#define xe_gt_sriov_dbg(_gt, _fmt, ...) \ + __xe_gt_sriov_printk(_gt, dbg, _fmt, ##__VA_ARGS__) + +/* for low level noisy debug messages */ +#ifdef CONFIG_DRM_XE_DEBUG_SRIOV +#define xe_gt_sriov_dbg_verbose(_gt, _fmt, ...) xe_gt_sriov_dbg(_gt, _fmt, ##__VA_ARGS__) +#else +#define xe_gt_sriov_dbg_verbose(_gt, _fmt, ...) typecheck(struct xe_gt *, (_gt)) +#endif + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index f4c485289d..e598a4363d 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -8,6 +8,7 @@ #include "abi/guc_actions_abi.h" #include "xe_device.h" #include "xe_gt.h" +#include "xe_gt_printk.h" #include "xe_guc.h" #include "xe_guc_ct.h" #include "xe_trace.h" @@ -30,8 +31,8 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work) break; trace_xe_gt_tlb_invalidation_fence_timeout(fence); - drm_err(>_to_xe(gt)->drm, "gt%d: TLB invalidation fence timeout, seqno=%d recv=%d", - gt->info.id, fence->seqno, gt->tlb_invalidation.seqno_recv); + xe_gt_err(gt, "TLB invalidation fence timeout, seqno=%d recv=%d", + fence->seqno, gt->tlb_invalidation.seqno_recv); list_del(&fence->link); fence->base.error = -ETIME; @@ -60,7 +61,6 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt) INIT_LIST_HEAD(>->tlb_invalidation.pending_fences); spin_lock_init(>->tlb_invalidation.pending_lock); spin_lock_init(>->tlb_invalidation.lock); - gt->tlb_invalidation.fence_context = dma_fence_context_alloc(1); INIT_DELAYED_WORK(>->tlb_invalidation.fence_tdr, xe_gt_tlb_fence_timeout); @@ -320,9 +320,7 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, */ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno) { - struct xe_device *xe = gt_to_xe(gt); struct xe_guc *guc = >->uc.guc; - struct drm_printer p = drm_err_printer(__func__); int ret; /* Execlists not supported */ @@ -337,8 +335,10 @@ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno) tlb_invalidation_seqno_past(gt, seqno), TLB_TIMEOUT); if (!ret) { - drm_err(&xe->drm, "gt%d: TLB invalidation time'd out, seqno=%d, recv=%d\n", - gt->info.id, seqno, gt->tlb_invalidation.seqno_recv); + struct drm_printer p = xe_gt_err_printer(gt); + + xe_gt_err(gt, "TLB invalidation time'd out, seqno=%d, recv=%d\n", + seqno, gt->tlb_invalidation.seqno_recv); xe_guc_ct_print(&guc->ct, &p, true); return -ETIME; } diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c index a8d7f272c3..5dc62fe1be 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.c +++ b/drivers/gpu/drm/xe/xe_gt_topology.c @@ -84,7 +84,7 @@ void xe_gt_topology_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); - struct drm_printer p = drm_debug_printer("GT topology"); + struct drm_printer p; int num_geometry_regs, num_compute_regs; get_num_dss_regs(xe, &num_geometry_regs, &num_compute_regs); @@ -107,6 +107,8 @@ xe_gt_topology_init(struct xe_gt *gt) XE2_GT_COMPUTE_DSS_2); load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss); + p = drm_dbg_printer(>_to_xe(gt)->drm, DRM_UT_DRIVER, "GT topology"); + xe_gt_topology_dump(gt, &p); } diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index f746846604..07b2f724ec 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -103,20 +103,22 @@ struct xe_gt { /** @info: GT info */ struct { - /** @type: type of GT */ + /** @info.type: type of GT */ enum xe_gt_type type; - /** @id: Unique ID of this GT within the PCI Device */ + /** @info.id: Unique ID of this GT within the PCI Device */ u8 id; - /** @reference_clock: clock frequency */ + /** @info.reference_clock: clock frequency */ u32 reference_clock; - /** @engine_mask: mask of engines present on GT */ + /** @info.engine_mask: mask of engines present on GT */ u64 engine_mask; /** - * @__engine_mask: mask of engines present on GT read from + * @info.__engine_mask: mask of engines present on GT read from * xe_pci.c, used to fake reading the engine_mask from the * hwconfig blob. */ u64 __engine_mask; + /** @info.gmdid: raw GMD_ID value from hardware */ + u32 gmdid; } info; /** @@ -125,14 +127,14 @@ struct xe_gt { * specific offset, as well as their own forcewake handling. */ struct { - /** @fw: force wake for GT */ + /** @mmio.fw: force wake for GT */ struct xe_force_wake fw; /** - * @adj_limit: adjust MMIO address if address is below this + * @mmio.adj_limit: adjust MMIO address if address is below this * value */ u32 adj_limit; - /** @adj_offset: offect to add to MMIO address when adjusting */ + /** @mmio.adj_offset: offect to add to MMIO address when adjusting */ u32 adj_offset; } mmio; @@ -144,7 +146,7 @@ struct xe_gt { /** @reset: state for GT resets */ struct { /** - * @worker: work so GT resets can done async allowing to reset + * @reset.worker: work so GT resets can done async allowing to reset * code to safely flush all code paths */ struct work_struct worker; @@ -152,36 +154,30 @@ struct xe_gt { /** @tlb_invalidation: TLB invalidation state */ struct { - /** @seqno: TLB invalidation seqno, protected by CT lock */ + /** @tlb_invalidation.seqno: TLB invalidation seqno, protected by CT lock */ #define TLB_INVALIDATION_SEQNO_MAX 0x100000 int seqno; /** - * @seqno_recv: last received TLB invalidation seqno, protected by CT lock + * @tlb_invalidation.seqno_recv: last received TLB invalidation seqno, + * protected by CT lock */ int seqno_recv; /** - * @pending_fences: list of pending fences waiting TLB + * @tlb_invalidation.pending_fences: list of pending fences waiting TLB * invaliations, protected by CT lock */ struct list_head pending_fences; /** - * @pending_lock: protects @pending_fences and updating - * @seqno_recv. + * @tlb_invalidation.pending_lock: protects @tlb_invalidation.pending_fences + * and updating @tlb_invalidation.seqno_recv. */ spinlock_t pending_lock; /** - * @fence_tdr: schedules a delayed call to + * @tlb_invalidation.fence_tdr: schedules a delayed call to * xe_gt_tlb_fence_timeout after the timeut interval is over. */ struct delayed_work fence_tdr; - /** @fence_context: context for TLB invalidation fences */ - u64 fence_context; - /** - * @fence_seqno: seqno to TLB invalidation fences, protected by - * tlb_invalidation.lock - */ - u32 fence_seqno; - /** @lock: protects TLB invalidation fences */ + /** @tlb_invalidation.lock: protects TLB invalidation fences */ spinlock_t lock; } tlb_invalidation; @@ -196,7 +192,7 @@ struct xe_gt { /** @usm: unified shared memory state */ struct { /** - * @bb_pool: Pool from which batchbuffers, for USM operations + * @usm.bb_pool: Pool from which batchbuffers, for USM operations * (e.g. migrations, fixing page tables), are allocated. * Dedicated pool needed so USM operations to not get blocked * behind any user operations which may have resulted in a @@ -204,66 +200,67 @@ struct xe_gt { */ struct xe_sa_manager *bb_pool; /** - * @reserved_bcs_instance: reserved BCS instance used for USM + * @usm.reserved_bcs_instance: reserved BCS instance used for USM * operations (e.g. mmigrations, fixing page tables) */ u16 reserved_bcs_instance; - /** @pf_wq: page fault work queue, unbound, high priority */ + /** @usm.pf_wq: page fault work queue, unbound, high priority */ struct workqueue_struct *pf_wq; - /** @acc_wq: access counter work queue, unbound, high priority */ + /** @usm.acc_wq: access counter work queue, unbound, high priority */ struct workqueue_struct *acc_wq; /** - * @pf_queue: Page fault queue used to sync faults so faults can + * @usm.pf_queue: Page fault queue used to sync faults so faults can * be processed not under the GuC CT lock. The queue is sized so * it can sync all possible faults (1 per physical engine). * Multiple queues exists for page faults from different VMs are * be processed in parallel. */ struct pf_queue { - /** @gt: back pointer to GT */ + /** @usm.pf_queue.gt: back pointer to GT */ struct xe_gt *gt; #define PF_QUEUE_NUM_DW 128 - /** @data: data in the page fault queue */ + /** @usm.pf_queue.data: data in the page fault queue */ u32 data[PF_QUEUE_NUM_DW]; /** - * @head: head pointer in DWs for page fault queue, - * moved by worker which processes faults. + * @usm.pf_queue.tail: tail pointer in DWs for page fault queue, + * moved by worker which processes faults (consumer). */ - u16 head; + u16 tail; /** - * @tail: tail pointer in DWs for page fault queue, - * moved by G2H handler. + * @usm.pf_queue.head: head pointer in DWs for page fault queue, + * moved by G2H handler (producer). */ - u16 tail; - /** @lock: protects page fault queue */ + u16 head; + /** @usm.pf_queue.lock: protects page fault queue */ spinlock_t lock; - /** @worker: to process page faults */ + /** @usm.pf_queue.worker: to process page faults */ struct work_struct worker; #define NUM_PF_QUEUE 4 } pf_queue[NUM_PF_QUEUE]; /** - * @acc_queue: Same as page fault queue, cannot process access + * @usm.acc_queue: Same as page fault queue, cannot process access * counters under CT lock. */ struct acc_queue { - /** @gt: back pointer to GT */ + /** @usm.acc_queue.gt: back pointer to GT */ struct xe_gt *gt; #define ACC_QUEUE_NUM_DW 128 - /** @data: data in the page fault queue */ + /** @usm.acc_queue.data: data in the page fault queue */ u32 data[ACC_QUEUE_NUM_DW]; /** - * @head: head pointer in DWs for page fault queue, - * moved by worker which processes faults. + * @usm.acc_queue.tail: tail pointer in DWs for access counter queue, + * moved by worker which processes counters + * (consumer). */ - u16 head; + u16 tail; /** - * @tail: tail pointer in DWs for page fault queue, - * moved by G2H handler. + * @usm.acc_queue.head: head pointer in DWs for access counter queue, + * moved by G2H handler (producer). */ - u16 tail; - /** @lock: protects page fault queue */ + u16 head; + /** @usm.acc_queue.lock: protects page fault queue */ spinlock_t lock; - /** @worker: to process access counters */ + /** @usm.acc_queue.worker: to process access counters */ struct work_struct worker; #define NUM_ACC_QUEUE 4 } acc_queue[NUM_ACC_QUEUE]; @@ -300,7 +297,7 @@ struct xe_gt { /** @pcode: GT's PCODE */ struct { - /** @lock: protecting GT's PCODE mailbox data */ + /** @pcode.lock: protecting GT's PCODE mailbox data */ struct mutex lock; } pcode; @@ -312,32 +309,32 @@ struct xe_gt { /** @mocs: info */ struct { - /** @uc_index: UC index */ + /** @mocs.uc_index: UC index */ u8 uc_index; - /** @wb_index: WB index, only used on L3_CCS platforms */ + /** @mocs.wb_index: WB index, only used on L3_CCS platforms */ u8 wb_index; } mocs; /** @fuse_topo: GT topology reported by fuse registers */ struct { - /** @g_dss_mask: dual-subslices usable by geometry */ + /** @fuse_topo.g_dss_mask: dual-subslices usable by geometry */ xe_dss_mask_t g_dss_mask; - /** @c_dss_mask: dual-subslices usable by compute */ + /** @fuse_topo.c_dss_mask: dual-subslices usable by compute */ xe_dss_mask_t c_dss_mask; - /** @eu_mask_per_dss: EU mask per DSS*/ + /** @fuse_topo.eu_mask_per_dss: EU mask per DSS*/ xe_eu_mask_t eu_mask_per_dss; } fuse_topo; /** @steering: register steering for individual HW units */ struct { - /* @ranges: register ranges used for this steering type */ + /** @steering.ranges: register ranges used for this steering type */ const struct xe_mmio_range *ranges; - /** @group_target: target to steer accesses to */ + /** @steering.group_target: target to steer accesses to */ u16 group_target; - /** @instance_target: instance to steer accesses to */ + /** @steering.instance_target: instance to steer accesses to */ u16 instance_target; } steering[NUM_STEERING_TYPES]; @@ -349,13 +346,13 @@ struct xe_gt { /** @wa_active: keep track of active workarounds */ struct { - /** @gt: bitmap with active GT workarounds */ + /** @wa_active.gt: bitmap with active GT workarounds */ unsigned long *gt; - /** @engine: bitmap with active engine workarounds */ + /** @wa_active.engine: bitmap with active engine workarounds */ unsigned long *engine; - /** @lrc: bitmap with active LRC workarounds */ + /** @wa_active.lrc: bitmap with active LRC workarounds */ unsigned long *lrc; - /** @oob: bitmap with active OOB workaroudns */ + /** @wa_active.oob: bitmap with active OOB workaroudns */ unsigned long *oob; } wa_active; }; diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 0a61390c64..a38f59b435 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -7,9 +7,10 @@ #include <drm/drm_managed.h> +#include <generated/xe_wa_oob.h> + #include "abi/guc_actions_abi.h" #include "abi/guc_errors_abi.h" -#include "generated/xe_wa_oob.h" #include "regs/xe_gt_regs.h" #include "regs/xe_guc_regs.h" #include "xe_bo.h" @@ -21,9 +22,12 @@ #include "xe_guc_hwconfig.h" #include "xe_guc_log.h" #include "xe_guc_pc.h" +#include "xe_guc_relay.h" #include "xe_guc_submit.h" +#include "xe_memirq.h" #include "xe_mmio.h" #include "xe_platform_types.h" +#include "xe_sriov.h" #include "xe_uc.h" #include "xe_uc_fw.h" #include "xe_wa.h" @@ -129,22 +133,24 @@ static u32 guc_ctl_ads_flags(struct xe_guc *guc) return flags; } +#define GUC_VER(maj, min, pat) (((maj) << 16) | ((min) << 8) | (pat)) + static u32 guc_ctl_wa_flags(struct xe_guc *guc) { struct xe_device *xe = guc_to_xe(guc); struct xe_gt *gt = guc_to_gt(guc); + struct xe_uc_fw *uc_fw = &guc->fw; + struct xe_uc_fw_version *version = &uc_fw->versions.found[XE_UC_FW_VER_RELEASE]; + u32 flags = 0; if (XE_WA(gt, 22012773006)) flags |= GUC_WA_POLLCS; - if (XE_WA(gt, 16011759253)) - flags |= GUC_WA_GAM_CREDITS; - if (XE_WA(gt, 14014475959)) flags |= GUC_WA_HOLD_CCS_SWITCHOUT; - if (XE_WA(gt, 22011391025) || XE_WA(gt, 14012197797)) + if (XE_WA(gt, 22011391025)) flags |= GUC_WA_DUAL_QUEUE; /* @@ -155,9 +161,6 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc) if (GRAPHICS_VERx100(xe) < 1270) flags |= GUC_WA_PRE_PARSER; - if (XE_WA(gt, 16011777198)) - flags |= GUC_WA_RCS_RESET_BEFORE_RC6; - if (XE_WA(gt, 22012727170) || XE_WA(gt, 22012727685)) flags |= GUC_WA_CONTEXT_ISOLATION; @@ -168,6 +171,14 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc) if (XE_WA(gt, 1509372804)) flags |= GUC_WA_RENDER_RST_RC6_EXIT; + if (XE_WA(gt, 14018913170)) { + if (GUC_VER(version->major, version->minor, version->patch) >= GUC_VER(70, 7, 0)) + flags |= GUC_WA_ENABLE_TSC_CHECK_ON_RC6; + else + drm_dbg(&xe->drm, "Skip WA 14018913170: GUC version expected >= 70.7.0, found %u.%u.%u\n", + version->major, version->minor, version->patch); + } + return flags; } @@ -241,11 +252,54 @@ static void guc_fini(struct drm_device *drm, void *arg) struct xe_guc *guc = arg; xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); - xe_guc_pc_fini(&guc->pc); xe_uc_fini_hw(&guc_to_gt(guc)->uc); xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); } +/** + * xe_guc_comm_init_early - early initialization of GuC communication + * @guc: the &xe_guc to initialize + * + * Must be called prior to first MMIO communication with GuC firmware. + */ +void xe_guc_comm_init_early(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + + if (xe_gt_is_media_type(gt)) + guc->notify_reg = MED_GUC_HOST_INTERRUPT; + else + guc->notify_reg = GUC_HOST_INTERRUPT; +} + +static int xe_guc_realloc_post_hwconfig(struct xe_guc *guc) +{ + struct xe_tile *tile = gt_to_tile(guc_to_gt(guc)); + struct xe_device *xe = guc_to_xe(guc); + int ret; + + if (!IS_DGFX(guc_to_xe(guc))) + return 0; + + ret = xe_managed_bo_reinit_in_vram(xe, tile, &guc->fw.bo); + if (ret) + return ret; + + ret = xe_managed_bo_reinit_in_vram(xe, tile, &guc->log.bo); + if (ret) + return ret; + + ret = xe_managed_bo_reinit_in_vram(xe, tile, &guc->ads.bo); + if (ret) + return ret; + + ret = xe_managed_bo_reinit_in_vram(xe, tile, &guc->ct.bo); + if (ret) + return ret; + + return 0; +} + int xe_guc_init(struct xe_guc *guc) { struct xe_device *xe = guc_to_xe(guc); @@ -272,7 +326,7 @@ int xe_guc_init(struct xe_guc *guc) if (ret) goto out; - ret = xe_guc_pc_init(&guc->pc); + ret = xe_guc_relay_init(&guc->relay); if (ret) goto out; @@ -282,10 +336,7 @@ int xe_guc_init(struct xe_guc *guc) guc_init_params(guc); - if (xe_gt_is_media_type(gt)) - guc->notify_reg = MED_GUC_HOST_INTERRUPT; - else - guc->notify_reg = GUC_HOST_INTERRUPT; + xe_guc_comm_init_early(guc); xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE); @@ -304,8 +355,18 @@ out: */ int xe_guc_init_post_hwconfig(struct xe_guc *guc) { + int ret; + + ret = xe_guc_realloc_post_hwconfig(guc); + if (ret) + return ret; + guc_init_params_post_hwconfig(guc); + ret = xe_guc_pc_init(&guc->pc); + if (ret) + return ret; + return xe_guc_ads_init_post_hwconfig(&guc->ads); } @@ -429,7 +490,6 @@ static int guc_wait_ucode(struct xe_guc *guc) if (ret) { struct drm_device *drm = &xe->drm; - struct drm_printer p = drm_info_printer(drm->dev); drm_info(drm, "GuC load failed: status = 0x%08X\n", status); drm_info(drm, "GuC load failed: status: Reset = %d, BootROM = 0x%02X, UKernel = 0x%02X, MIA = 0x%02X, Auth = 0x%02X\n", @@ -451,8 +511,6 @@ static int guc_wait_ucode(struct xe_guc *guc) SOFT_SCRATCH(13))); ret = -ENXIO; } - - xe_guc_log_print(&guc->log, &p); } else { drm_dbg(&xe->drm, "GuC successfully loaded"); } @@ -516,6 +574,9 @@ int xe_guc_min_load_for_hwconfig(struct xe_guc *guc) xe_guc_ads_populate_minimal(&guc->ads); + /* Raise GT freq to speed up HuC/GuC load */ + xe_guc_pc_init_early(&guc->pc); + ret = __xe_guc_upload(guc); if (ret) return ret; @@ -579,9 +640,19 @@ static void guc_enable_irq(struct xe_guc *guc) int xe_guc_enable_communication(struct xe_guc *guc) { + struct xe_device *xe = guc_to_xe(guc); int err; - guc_enable_irq(guc); + if (IS_SRIOV_VF(xe) && xe_device_has_memirq(xe)) { + struct xe_gt *gt = guc_to_gt(guc); + struct xe_tile *tile = gt_to_tile(gt); + + err = xe_memirq_init_guc(&tile->sriov.vf.memirq, guc); + if (err) + return err; + } else { + guc_enable_irq(guc); + } xe_mmio_rmw32(guc_to_gt(guc), PMINTRMSK, ARAT_EXPIRED_INTRMSK, 0); @@ -650,7 +721,7 @@ int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request, BUILD_BUG_ON(VF_SW_FLAG_COUNT != MED_VF_SW_FLAG_COUNT); - xe_assert(xe, !guc->ct.enabled); + xe_assert(xe, !xe_guc_ct_enabled(&guc->ct)); xe_assert(xe, len); xe_assert(xe, len <= VF_SW_FLAG_COUNT); xe_assert(xe, len <= MED_VF_SW_FLAG_COUNT); @@ -707,8 +778,12 @@ timeout: if (unlikely(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) != GUC_HXG_ORIGIN_GUC)) goto proto; - if (unlikely(ret)) + if (unlikely(ret)) { + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) != + GUC_HXG_TYPE_NO_RESPONSE_BUSY) + goto proto; goto timeout; + } } if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) == @@ -832,7 +907,7 @@ int xe_guc_stop(struct xe_guc *guc) { int ret; - xe_guc_ct_disable(&guc->ct); + xe_guc_ct_stop(&guc->ct); ret = xe_guc_submit_stop(guc); if (ret) diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h index d3e49e7fd7..94f2dc5f6f 100644 --- a/drivers/gpu/drm/xe/xe_guc.h +++ b/drivers/gpu/drm/xe/xe_guc.h @@ -13,6 +13,7 @@ struct drm_printer; +void xe_guc_comm_init_early(struct xe_guc *guc); int xe_guc_init(struct xe_guc *guc); int xe_guc_init_post_hwconfig(struct xe_guc *guc); int xe_guc_post_load_init(struct xe_guc *guc); diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 390e6f1bf4..5339f8a490 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -100,7 +100,7 @@ struct __guc_ads_blob { struct guc_engine_usage engine_usage; struct guc_um_init_params um_init_params; /* From here on, location is dynamic! Refer to above diagram. */ - struct guc_mmio_reg regset[0]; + struct guc_mmio_reg regset[]; } __packed; #define ads_blob_read(ads_, field_) \ @@ -273,7 +273,7 @@ int xe_guc_ads_init(struct xe_guc_ads *ads) ads->regset_size = calculate_regset_size(gt); bo = xe_managed_bo_create_pin_map(xe, tile, guc_ads_size(ads) + MAX_GOLDEN_LRC_SIZE, - XE_BO_CREATE_VRAM_IF_DGFX(tile) | + XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_GGTT_BIT); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 24a33fa364..8bbfa45798 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -9,16 +9,21 @@ #include <linux/circ_buf.h> #include <linux/delay.h> +#include <kunit/static_stub.h> + #include <drm/drm_managed.h> #include "abi/guc_actions_abi.h" +#include "abi/guc_actions_sriov_abi.h" #include "abi/guc_klvs_abi.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_gt.h" #include "xe_gt_pagefault.h" +#include "xe_gt_printk.h" #include "xe_gt_tlb_invalidation.h" #include "xe_guc.h" +#include "xe_guc_relay.h" #include "xe_guc_submit.h" #include "xe_map.h" #include "xe_pm.h" @@ -28,6 +33,7 @@ struct g2h_fence { u32 *response_buffer; u32 seqno; + u32 response_data; u16 response_len; u16 error; u16 hint; @@ -40,6 +46,7 @@ struct g2h_fence { static void g2h_fence_init(struct g2h_fence *g2h_fence, u32 *response_buffer) { g2h_fence->response_buffer = response_buffer; + g2h_fence->response_data = 0; g2h_fence->response_len = 0; g2h_fence->fail = false; g2h_fence->retry = false; @@ -113,6 +120,7 @@ static void guc_ct_fini(struct drm_device *drm, void *arg) { struct xe_guc_ct *ct = arg; + destroy_workqueue(ct->g2h_wq); xa_destroy(&ct->fence_lookup); } @@ -138,17 +146,24 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) xe_assert(xe, !(guc_ct_size() % PAGE_SIZE)); - drmm_mutex_init(&xe->drm, &ct->lock); + ct->g2h_wq = alloc_ordered_workqueue("xe-g2h-wq", 0); + if (!ct->g2h_wq) + return -ENOMEM; + spin_lock_init(&ct->fast_lock); xa_init(&ct->fence_lookup); INIT_WORK(&ct->g2h_worker, g2h_worker_func); init_waitqueue_head(&ct->wq); init_waitqueue_head(&ct->g2h_fence_wq); + err = drmm_mutex_init(&xe->drm, &ct->lock); + if (err) + return err; + primelockdep(ct); bo = xe_managed_bo_create_pin_map(xe, tile, guc_ct_size(), - XE_BO_CREATE_VRAM_IF_DGFX(tile) | + XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_GGTT_BIT); if (IS_ERR(bo)) return PTR_ERR(bo); @@ -159,6 +174,8 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) if (err) return err; + xe_assert(xe, ct->state == XE_GUC_CT_STATE_NOT_INITIALIZED); + ct->state = XE_GUC_CT_STATE_DISABLED; return 0; } @@ -278,12 +295,35 @@ static int guc_ct_control_toggle(struct xe_guc_ct *ct, bool enable) return ret > 0 ? -EPROTO : ret; } +static void xe_guc_ct_set_state(struct xe_guc_ct *ct, + enum xe_guc_ct_state state) +{ + mutex_lock(&ct->lock); /* Serialise dequeue_one_g2h() */ + spin_lock_irq(&ct->fast_lock); /* Serialise CT fast-path */ + + xe_gt_assert(ct_to_gt(ct), ct->g2h_outstanding == 0 || + state == XE_GUC_CT_STATE_STOPPED); + + ct->g2h_outstanding = 0; + ct->state = state; + + spin_unlock_irq(&ct->fast_lock); + + /* + * Lockdep doesn't like this under the fast lock and he destroy only + * needs to be serialized with the send path which ct lock provides. + */ + xa_destroy(&ct->fence_lookup); + + mutex_unlock(&ct->lock); +} + int xe_guc_ct_enable(struct xe_guc_ct *ct) { struct xe_device *xe = ct_to_xe(ct); int err; - xe_assert(xe, !ct->enabled); + xe_assert(xe, !xe_guc_ct_enabled(ct)); guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap); guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap); @@ -300,12 +340,7 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct) if (err) goto err_out; - mutex_lock(&ct->lock); - spin_lock_irq(&ct->fast_lock); - ct->g2h_outstanding = 0; - ct->enabled = true; - spin_unlock_irq(&ct->fast_lock); - mutex_unlock(&ct->lock); + xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_ENABLED); smp_mb(); wake_up_all(&ct->wq); @@ -319,15 +354,34 @@ err_out: return err; } +static void stop_g2h_handler(struct xe_guc_ct *ct) +{ + cancel_work_sync(&ct->g2h_worker); +} + +/** + * xe_guc_ct_disable - Set GuC to disabled state + * @ct: the &xe_guc_ct + * + * Set GuC CT to disabled state and stop g2h handler. No outstanding g2h expected + * in this transition. + */ void xe_guc_ct_disable(struct xe_guc_ct *ct) { - mutex_lock(&ct->lock); /* Serialise dequeue_one_g2h() */ - spin_lock_irq(&ct->fast_lock); /* Serialise CT fast-path */ - ct->enabled = false; /* Finally disable CT communication */ - spin_unlock_irq(&ct->fast_lock); - mutex_unlock(&ct->lock); + xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_DISABLED); + stop_g2h_handler(ct); +} - xa_destroy(&ct->fence_lookup); +/** + * xe_guc_ct_stop - Set GuC to stopped state + * @ct: the &xe_guc_ct + * + * Set GuC CT to stopped state, stop g2h handler, and clear any outstanding g2h + */ +void xe_guc_ct_stop(struct xe_guc_ct *ct) +{ + xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_STOPPED); + stop_g2h_handler(ct); } static bool h2g_has_room(struct xe_guc_ct *ct, u32 cmd_len) @@ -448,7 +502,7 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, GUC_HXG_EVENT_MSG_0_DATA0, action[0]); } else { cmd[1] = - FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_FAST_REQUEST) | FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | GUC_HXG_EVENT_MSG_0_DATA0, action[0]); } @@ -475,13 +529,34 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, return 0; } +/* + * The CT protocol accepts a 16 bits fence. This field is fully owned by the + * driver, the GuC will just copy it to the reply message. Since we need to + * be able to distinguish between replies to REQUEST and FAST_REQUEST messages, + * we use one bit of the seqno as an indicator for that and a rolling counter + * for the remaining 15 bits. + */ +#define CT_SEQNO_MASK GENMASK(14, 0) +#define CT_SEQNO_UNTRACKED BIT(15) +static u16 next_ct_seqno(struct xe_guc_ct *ct, bool is_g2h_fence) +{ + u32 seqno = ct->fence_seqno++ & CT_SEQNO_MASK; + + if (!is_g2h_fence) + seqno |= CT_SEQNO_UNTRACKED; + + return seqno; +} + static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len, u32 g2h_len, u32 num_g2h, struct g2h_fence *g2h_fence) { struct xe_device *xe = ct_to_xe(ct); + u16 seqno; int ret; + xe_assert(xe, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED); xe_assert(xe, !g2h_len || !g2h_fence); xe_assert(xe, !num_g2h || !g2h_fence); xe_assert(xe, !g2h_len || num_g2h); @@ -493,11 +568,18 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, goto out; } - if (unlikely(!ct->enabled)) { + if (ct->state == XE_GUC_CT_STATE_DISABLED) { ret = -ENODEV; goto out; } + if (ct->state == XE_GUC_CT_STATE_STOPPED) { + ret = -ECANCELED; + goto out; + } + + xe_assert(xe, xe_guc_ct_enabled(ct)); + if (g2h_fence) { g2h_len = GUC_CTB_HXG_MSG_MAX_LEN; num_g2h = 1; @@ -505,7 +587,7 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, if (g2h_fence_needs_alloc(g2h_fence)) { void *ptr; - g2h_fence->seqno = (ct->fence_seqno++ & 0xffff); + g2h_fence->seqno = next_ct_seqno(ct, true); ptr = xa_store(&ct->fence_lookup, g2h_fence->seqno, g2h_fence, GFP_ATOMIC); @@ -514,6 +596,10 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, goto out; } } + + seqno = g2h_fence->seqno; + } else { + seqno = next_ct_seqno(ct, false); } if (g2h_len) @@ -523,8 +609,7 @@ retry: if (unlikely(ret)) goto out_unlock; - ret = h2g_write(ct, action, len, g2h_fence ? g2h_fence->seqno : 0, - !!g2h_fence); + ret = h2g_write(ct, action, len, seqno, !!g2h_fence); if (unlikely(ret)) { if (ret == -EAGAIN) goto retry; @@ -682,7 +767,8 @@ static bool retry_failure(struct xe_guc_ct *ct, int ret) return false; #define ct_alive(ct) \ - (ct->enabled && !ct->ctbs.h2g.info.broken && !ct->ctbs.g2h.info.broken) + (xe_guc_ct_enabled(ct) && !ct->ctbs.h2g.info.broken && \ + !ct->ctbs.g2h.info.broken) if (!wait_event_interruptible_timeout(ct->wq, ct_alive(ct), HZ * 5)) return false; #undef ct_alive @@ -752,12 +838,31 @@ retry_same_fence: ret = -EIO; } - return ret > 0 ? 0 : ret; + return ret > 0 ? response_buffer ? g2h_fence.response_len : g2h_fence.response_data : ret; } +/** + * xe_guc_ct_send_recv - Send and receive HXG to the GuC + * @ct: the &xe_guc_ct + * @action: the dword array with `HXG Request`_ message (can't be NULL) + * @len: length of the `HXG Request`_ message (in dwords, can't be 0) + * @response_buffer: placeholder for the `HXG Response`_ message (can be NULL) + * + * Send a `HXG Request`_ message to the GuC over CT communication channel and + * blocks until GuC replies with a `HXG Response`_ message. + * + * For non-blocking communication with GuC use xe_guc_ct_send(). + * + * Note: The size of &response_buffer must be at least GUC_CTB_MAX_DWORDS_. + * + * Return: response length (in dwords) if &response_buffer was not NULL, or + * DATA0 from `HXG Response`_ if &response_buffer was NULL, or + * a negative error code on failure. + */ int xe_guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len, u32 *response_buffer) { + KUNIT_STATIC_STUB_REDIRECT(xe_guc_ct_send_recv, ct, action, len, response_buffer); return guc_ct_send_recv(ct, action, len, response_buffer, false); } @@ -767,9 +872,20 @@ int xe_guc_ct_send_recv_no_fail(struct xe_guc_ct *ct, const u32 *action, return guc_ct_send_recv(ct, action, len, response_buffer, true); } +static u32 *msg_to_hxg(u32 *msg) +{ + return msg + GUC_CTB_MSG_MIN_LEN; +} + +static u32 msg_len_to_hxg_len(u32 len) +{ + return len - GUC_CTB_MSG_MIN_LEN; +} + static int parse_g2h_event(struct xe_guc_ct *ct, u32 *msg, u32 len) { - u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]); + u32 *hxg = msg_to_hxg(msg); + u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]); lockdep_assert_held(&ct->lock); @@ -786,18 +902,41 @@ static int parse_g2h_event(struct xe_guc_ct *ct, u32 *msg, u32 len) static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len) { - struct xe_device *xe = ct_to_xe(ct); - u32 response_len = len - GUC_CTB_MSG_MIN_LEN; + struct xe_gt *gt = ct_to_gt(ct); + struct xe_device *xe = gt_to_xe(gt); + u32 *hxg = msg_to_hxg(msg); + u32 hxg_len = msg_len_to_hxg_len(len); u32 fence = FIELD_GET(GUC_CTB_MSG_0_FENCE, msg[0]); - u32 type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]); + u32 type = FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]); struct g2h_fence *g2h_fence; lockdep_assert_held(&ct->lock); + /* + * Fences for FAST_REQUEST messages are not tracked in ct->fence_lookup. + * Those messages should never fail, so if we do get an error back it + * means we're likely doing an illegal operation and the GuC is + * rejecting it. We have no way to inform the code that submitted the + * H2G that the message was rejected, so we need to escalate the + * failure to trigger a reset. + */ + if (fence & CT_SEQNO_UNTRACKED) { + if (type == GUC_HXG_TYPE_RESPONSE_FAILURE) + xe_gt_err(gt, "FAST_REQ H2G fence 0x%x failed! e=0x%x, h=%u\n", + fence, + FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, hxg[0]), + FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, hxg[0])); + else + xe_gt_err(gt, "unexpected response %u for FAST_REQ H2G fence 0x%x!\n", + type, fence); + + return -EPROTO; + } + g2h_fence = xa_erase(&ct->fence_lookup, fence); if (unlikely(!g2h_fence)) { /* Don't tear down channel, as send could've timed out */ - drm_warn(&xe->drm, "G2H fence (%u) not found!\n", fence); + xe_gt_warn(gt, "G2H fence (%u) not found!\n", fence); g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN); return 0; } @@ -806,18 +945,16 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len) if (type == GUC_HXG_TYPE_RESPONSE_FAILURE) { g2h_fence->fail = true; - g2h_fence->error = - FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, msg[1]); - g2h_fence->hint = - FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, msg[1]); + g2h_fence->error = FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, hxg[0]); + g2h_fence->hint = FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, hxg[0]); } else if (type == GUC_HXG_TYPE_NO_RESPONSE_RETRY) { g2h_fence->retry = true; - g2h_fence->reason = - FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, msg[1]); + g2h_fence->reason = FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, hxg[0]); } else if (g2h_fence->response_buffer) { - g2h_fence->response_len = response_len; - memcpy(g2h_fence->response_buffer, msg + GUC_CTB_MSG_MIN_LEN, - response_len * sizeof(u32)); + g2h_fence->response_len = hxg_len; + memcpy(g2h_fence->response_buffer, hxg, hxg_len * sizeof(u32)); + } else { + g2h_fence->response_data = FIELD_GET(GUC_HXG_RESPONSE_MSG_0_DATA0, hxg[0]); } g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN); @@ -833,14 +970,13 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len) static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) { struct xe_device *xe = ct_to_xe(ct); - u32 hxg, origin, type; + u32 *hxg = msg_to_hxg(msg); + u32 origin, type; int ret; lockdep_assert_held(&ct->lock); - hxg = msg[1]; - - origin = FIELD_GET(GUC_HXG_MSG_0_ORIGIN, hxg); + origin = FIELD_GET(GUC_HXG_MSG_0_ORIGIN, hxg[0]); if (unlikely(origin != GUC_HXG_ORIGIN_GUC)) { drm_err(&xe->drm, "G2H channel broken on read, origin=%d, reset required\n", @@ -850,7 +986,7 @@ static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) return -EPROTO; } - type = FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg); + type = FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]); switch (type) { case GUC_HXG_TYPE_EVENT: ret = parse_g2h_event(ct, msg, len); @@ -876,14 +1012,19 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) { struct xe_device *xe = ct_to_xe(ct); struct xe_guc *guc = ct_to_guc(ct); - u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]); - u32 *payload = msg + GUC_CTB_HXG_MSG_MIN_LEN; - u32 adj_len = len - GUC_CTB_HXG_MSG_MIN_LEN; + u32 hxg_len = msg_len_to_hxg_len(len); + u32 *hxg = msg_to_hxg(msg); + u32 action, adj_len; + u32 *payload; int ret = 0; - if (FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]) != GUC_HXG_TYPE_EVENT) + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_EVENT) return 0; + action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]); + payload = hxg + GUC_HXG_EVENT_MSG_MIN_LEN; + adj_len = hxg_len - GUC_HXG_EVENT_MSG_MIN_LEN; + switch (action) { case XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE: ret = xe_guc_sched_done_handler(guc, payload, adj_len); @@ -920,6 +1061,12 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) ret = xe_guc_access_counter_notify_handler(guc, payload, adj_len); break; + case XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF: + ret = xe_guc_relay_process_guc2pf(&guc->relay, hxg, hxg_len); + break; + case XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF: + ret = xe_guc_relay_process_guc2vf(&guc->relay, hxg, hxg_len); + break; default: drm_err(&xe->drm, "unexpected action 0x%04x\n", action); } @@ -938,15 +1085,22 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path) u32 tail, head, len; s32 avail; u32 action; + u32 *hxg; + xe_assert(xe, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED); lockdep_assert_held(&ct->fast_lock); - if (!ct->enabled) + if (ct->state == XE_GUC_CT_STATE_DISABLED) return -ENODEV; + if (ct->state == XE_GUC_CT_STATE_STOPPED) + return -ECANCELED; + if (g2h->info.broken) return -EPIPE; + xe_assert(xe, xe_guc_ct_enabled(ct)); + /* Calculate DW available to read */ tail = desc_read(xe, g2h, tail); avail = tail - g2h->info.head; @@ -988,10 +1142,11 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path) avail * sizeof(u32)); } - action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]); + hxg = msg_to_hxg(msg); + action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]); if (fast_path) { - if (FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[1]) != GUC_HXG_TYPE_EVENT) + if (FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_EVENT) return 0; switch (action) { @@ -1017,9 +1172,11 @@ static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len) { struct xe_device *xe = ct_to_xe(ct); struct xe_guc *guc = ct_to_guc(ct); - u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[1]); - u32 *payload = msg + GUC_CTB_HXG_MSG_MIN_LEN; - u32 adj_len = len - GUC_CTB_HXG_MSG_MIN_LEN; + u32 hxg_len = msg_len_to_hxg_len(len); + u32 *hxg = msg_to_hxg(msg); + u32 action = FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, hxg[0]); + u32 *payload = hxg + GUC_HXG_MSG_MIN_LEN; + u32 adj_len = hxg_len - GUC_HXG_MSG_MIN_LEN; int ret = 0; switch (action) { @@ -1245,7 +1402,7 @@ struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct, return NULL; } - if (ct->enabled) { + if (xe_guc_ct_enabled(ct)) { snapshot->ct_enabled = true; snapshot->g2h_outstanding = READ_ONCE(ct->g2h_outstanding); guc_ctb_snapshot_capture(xe, &ct->ctbs.h2g, @@ -1271,7 +1428,7 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, return; if (snapshot->ct_enabled) { - drm_puts(p, "\nH2G CTB (all sizes in DW):\n"); + drm_puts(p, "H2G CTB (all sizes in DW):\n"); guc_ctb_snapshot_print(&snapshot->h2g, p); drm_puts(p, "\nG2H CTB (all sizes in DW):\n"); @@ -1280,7 +1437,7 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, drm_printf(p, "\tg2h outstanding: %d\n", snapshot->g2h_outstanding); } else { - drm_puts(p, "\nCT disabled\n"); + drm_puts(p, "CT disabled\n"); } } diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h index f15f8a4857..105bb8e99a 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.h +++ b/drivers/gpu/drm/xe/xe_guc_ct.h @@ -13,6 +13,7 @@ struct drm_printer; int xe_guc_ct_init(struct xe_guc_ct *ct); int xe_guc_ct_enable(struct xe_guc_ct *ct); void xe_guc_ct_disable(struct xe_guc_ct *ct); +void xe_guc_ct_stop(struct xe_guc_ct *ct); void xe_guc_ct_fast_path(struct xe_guc_ct *ct); struct xe_guc_ct_snapshot * @@ -22,11 +23,18 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot); void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool atomic); +static inline bool xe_guc_ct_enabled(struct xe_guc_ct *ct) +{ + return ct->state == XE_GUC_CT_STATE_ENABLED; +} + static inline void xe_guc_ct_irq_handler(struct xe_guc_ct *ct) { + if (!xe_guc_ct_enabled(ct)) + return; + wake_up_all(&ct->wq); - if (ct->enabled) - queue_work(system_unbound_wq, &ct->g2h_worker); + queue_work(ct->g2h_wq, &ct->g2h_worker); xe_guc_ct_fast_path(ct); } diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h index d814d4ee3f..fede4c6e93 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct_types.h +++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h @@ -73,6 +73,20 @@ struct xe_guc_ct_snapshot { }; /** + * enum xe_guc_ct_state - CT state + * @XE_GUC_CT_STATE_NOT_INITIALIZED: CT not initialized, messages not expected in this state + * @XE_GUC_CT_STATE_DISABLED: CT disabled, messages not expected in this state + * @XE_GUC_CT_STATE_STOPPED: CT stopped, drop messages without errors + * @XE_GUC_CT_STATE_ENABLED: CT enabled, messages sent / received in this state + */ +enum xe_guc_ct_state { + XE_GUC_CT_STATE_NOT_INITIALIZED = 0, + XE_GUC_CT_STATE_DISABLED, + XE_GUC_CT_STATE_STOPPED, + XE_GUC_CT_STATE_ENABLED, +}; + +/** * struct xe_guc_ct - GuC command transport (CT) layer * * Includes a pair of CT buffers for bi-directional communication and tracking @@ -87,17 +101,17 @@ struct xe_guc_ct { spinlock_t fast_lock; /** @ctbs: buffers for sending and receiving commands */ struct { - /** @send: Host to GuC (H2G, send) channel */ + /** @ctbs.send: Host to GuC (H2G, send) channel */ struct guc_ctb h2g; - /** @recv: GuC to Host (G2H, receive) channel */ + /** @ctbs.recv: GuC to Host (G2H, receive) channel */ struct guc_ctb g2h; } ctbs; /** @g2h_outstanding: number of outstanding G2H */ u32 g2h_outstanding; /** @g2h_worker: worker to process G2H messages */ struct work_struct g2h_worker; - /** @enabled: CT enabled */ - bool enabled; + /** @state: CT state */ + enum xe_guc_ct_state state; /** @fence_seqno: G2H fence seqno - 16 bits used by CT */ u32 fence_seqno; /** @fence_lookup: G2H fence lookup */ @@ -106,6 +120,8 @@ struct xe_guc_ct { wait_queue_head_t wq; /** @g2h_fence_wq: wait queue used for G2H fencing */ wait_queue_head_t g2h_fence_wq; + /** @g2h_wq: used to process G2H */ + struct workqueue_struct *g2h_wq; /** @msg: Message buffer */ u32 msg[GUC_CTB_MSG_MAX_LEN]; /** @fast_msg: Message buffer */ diff --git a/drivers/gpu/drm/xe/xe_guc_db_mgr.c b/drivers/gpu/drm/xe/xe_guc_db_mgr.c new file mode 100644 index 0000000000..8d9a0287df --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_db_mgr.c @@ -0,0 +1,266 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include <linux/bitmap.h> +#include <linux/mutex.h> + +#include <drm/drm_managed.h> + +#include "regs/xe_guc_regs.h" + +#include "xe_assert.h" +#include "xe_gt_printk.h" +#include "xe_guc.h" +#include "xe_guc_db_mgr.h" +#include "xe_guc_types.h" + +/** + * DOC: GuC Doorbells + * + * The GFX doorbell solution provides a mechanism for submission of workload + * to the graphics hardware by a ring3 application without the penalty of + * ring transition for each workload submission. + * + * In SR-IOV mode, the doorbells are treated as shared resource and PF must + * be able to provision exclusive range of IDs across VFs, which may want to + * use this feature. + */ + +static struct xe_guc *dbm_to_guc(struct xe_guc_db_mgr *dbm) +{ + return container_of(dbm, struct xe_guc, dbm); +} + +static struct xe_gt *dbm_to_gt(struct xe_guc_db_mgr *dbm) +{ + return guc_to_gt(dbm_to_guc(dbm)); +} + +static struct xe_device *dbm_to_xe(struct xe_guc_db_mgr *dbm) +{ + return gt_to_xe(dbm_to_gt(dbm)); +} + +#define dbm_assert(_dbm, _cond) xe_gt_assert(dbm_to_gt(_dbm), _cond) +#define dbm_mutex(_dbm) (&dbm_to_guc(_dbm)->submission_state.lock) + +static void dbm_print_locked(struct xe_guc_db_mgr *dbm, struct drm_printer *p, int indent); + +static void __fini_dbm(struct drm_device *drm, void *arg) +{ + struct xe_guc_db_mgr *dbm = arg; + unsigned int weight; + + mutex_lock(dbm_mutex(dbm)); + + weight = bitmap_weight(dbm->bitmap, dbm->count); + if (weight) { + struct drm_printer p = xe_gt_info_printer(dbm_to_gt(dbm)); + + xe_gt_err(dbm_to_gt(dbm), "GuC doorbells manager unclean (%u/%u)\n", + weight, dbm->count); + dbm_print_locked(dbm, &p, 1); + } + + bitmap_free(dbm->bitmap); + dbm->bitmap = NULL; + dbm->count = 0; + + mutex_unlock(dbm_mutex(dbm)); +} + +/** + * xe_guc_db_mgr_init() - Initialize GuC Doorbells Manager. + * @dbm: the &xe_guc_db_mgr to initialize + * @count: number of doorbells to manage + * + * The bare-metal or PF driver can pass ~0 as &count to indicate that all + * doorbells supported by the hardware are available for use. + * + * Only VF's drivers will have to provide explicit number of doorbells IDs + * that they can use. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_guc_db_mgr_init(struct xe_guc_db_mgr *dbm, unsigned int count) +{ + int ret; + + if (count == ~0) + count = GUC_NUM_DOORBELLS; + + dbm_assert(dbm, !dbm->bitmap); + dbm_assert(dbm, count <= GUC_NUM_DOORBELLS); + + if (!count) + goto done; + + dbm->bitmap = bitmap_zalloc(count, GFP_KERNEL); + if (!dbm->bitmap) + return -ENOMEM; + dbm->count = count; + + ret = drmm_add_action_or_reset(&dbm_to_xe(dbm)->drm, __fini_dbm, dbm); + if (ret) + return ret; +done: + xe_gt_dbg(dbm_to_gt(dbm), "using %u doorbell(s)\n", dbm->count); + return 0; +} + +static int dbm_reserve_chunk_locked(struct xe_guc_db_mgr *dbm, + unsigned int count, unsigned int spare) +{ + unsigned int used; + int index; + + dbm_assert(dbm, count); + dbm_assert(dbm, count <= GUC_NUM_DOORBELLS); + dbm_assert(dbm, dbm->count <= GUC_NUM_DOORBELLS); + lockdep_assert_held(dbm_mutex(dbm)); + + if (!dbm->count) + return -ENODATA; + + if (spare) { + used = bitmap_weight(dbm->bitmap, dbm->count); + if (used + count + spare > dbm->count) + return -EDQUOT; + } + + index = bitmap_find_next_zero_area(dbm->bitmap, dbm->count, 0, count, 0); + if (index >= dbm->count) + return -ENOSPC; + + bitmap_set(dbm->bitmap, index, count); + + return index; +} + +static void dbm_release_chunk_locked(struct xe_guc_db_mgr *dbm, + unsigned int start, unsigned int count) +{ + dbm_assert(dbm, count); + dbm_assert(dbm, count <= GUC_NUM_DOORBELLS); + dbm_assert(dbm, dbm->count); + dbm_assert(dbm, dbm->count <= GUC_NUM_DOORBELLS); + lockdep_assert_held(dbm_mutex(dbm)); + + if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) { + unsigned int n; + + for (n = 0; n < count; n++) + dbm_assert(dbm, test_bit(start + n, dbm->bitmap)); + } + bitmap_clear(dbm->bitmap, start, count); +} + +/** + * xe_guc_db_mgr_reserve_id_locked() - Reserve a single GuC Doorbell ID. + * @dbm: the &xe_guc_db_mgr + * + * This function expects that submission lock is already taken. + * + * Return: ID of the allocated GuC doorbell or a negative error code on failure. + */ +int xe_guc_db_mgr_reserve_id_locked(struct xe_guc_db_mgr *dbm) +{ + return dbm_reserve_chunk_locked(dbm, 1, 0); +} + +/** + * xe_guc_db_mgr_release_id_locked() - Release a single GuC Doorbell ID. + * @dbm: the &xe_guc_db_mgr + * @id: the GuC Doorbell ID to release + * + * This function expects that submission lock is already taken. + */ +void xe_guc_db_mgr_release_id_locked(struct xe_guc_db_mgr *dbm, unsigned int id) +{ + return dbm_release_chunk_locked(dbm, id, 1); +} + +/** + * xe_guc_db_mgr_reserve_range() - Reserve a range of GuC Doorbell IDs. + * @dbm: the &xe_guc_db_mgr + * @count: number of GuC doorbell IDs to reserve + * @spare: number of GuC doorbell IDs to keep available + * + * This function is dedicated for the for use by the PF which expects that + * allocated range for the VF will be contiguous and that there will be at + * least &spare IDs still available for the PF use after this reservation. + * + * Return: starting ID of the allocated GuC doorbell ID range or + * a negative error code on failure. + */ +int xe_guc_db_mgr_reserve_range(struct xe_guc_db_mgr *dbm, + unsigned int count, unsigned int spare) +{ + int ret; + + mutex_lock(dbm_mutex(dbm)); + ret = dbm_reserve_chunk_locked(dbm, count, spare); + mutex_unlock(dbm_mutex(dbm)); + + return ret; +} + +/** + * xe_guc_db_mgr_release_range() - Release a range of Doorbell IDs. + * @dbm: the &xe_guc_db_mgr + * @start: the starting ID of GuC doorbell ID range to release + * @count: number of GuC doorbell IDs to release + */ +void xe_guc_db_mgr_release_range(struct xe_guc_db_mgr *dbm, + unsigned int start, unsigned int count) +{ + mutex_lock(dbm_mutex(dbm)); + dbm_release_chunk_locked(dbm, start, count); + mutex_unlock(dbm_mutex(dbm)); +} + +static void dbm_print_locked(struct xe_guc_db_mgr *dbm, struct drm_printer *p, int indent) +{ + unsigned int rs, re; + unsigned int total; + + drm_printf_indent(p, indent, "count: %u\n", dbm->count); + if (!dbm->bitmap) + return; + + total = 0; + for_each_clear_bitrange(rs, re, dbm->bitmap, dbm->count) { + drm_printf_indent(p, indent, "available range: %u..%u (%u)\n", + rs, re - 1, re - rs); + total += re - rs; + } + drm_printf_indent(p, indent, "available total: %u\n", total); + + total = 0; + for_each_set_bitrange(rs, re, dbm->bitmap, dbm->count) { + drm_printf_indent(p, indent, "reserved range: %u..%u (%u)\n", + rs, re - 1, re - rs); + total += re - rs; + } + drm_printf_indent(p, indent, "reserved total: %u\n", total); +} + +/** + * xe_guc_db_mgr_print() - Print status of GuC Doorbells Manager. + * @dbm: the &xe_guc_db_mgr to print + * @p: the &drm_printer to print to + * @indent: tab indentation level + */ +void xe_guc_db_mgr_print(struct xe_guc_db_mgr *dbm, + struct drm_printer *p, int indent) +{ + mutex_lock(dbm_mutex(dbm)); + dbm_print_locked(dbm, p, indent); + mutex_unlock(dbm_mutex(dbm)); +} + +#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST) +#include "tests/xe_guc_db_mgr_test.c" +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_db_mgr.h b/drivers/gpu/drm/xe/xe_guc_db_mgr.h new file mode 100644 index 0000000000..c250fa0ca9 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_db_mgr.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GUC_DB_MGR_H_ +#define _XE_GUC_DB_MGR_H_ + +struct drm_printer; +struct xe_guc_db_mgr; + +int xe_guc_db_mgr_init(struct xe_guc_db_mgr *dbm, unsigned int count); + +int xe_guc_db_mgr_reserve_id_locked(struct xe_guc_db_mgr *dbm); +void xe_guc_db_mgr_release_id_locked(struct xe_guc_db_mgr *dbm, unsigned int id); + +int xe_guc_db_mgr_reserve_range(struct xe_guc_db_mgr *dbm, unsigned int count, unsigned int spare); +void xe_guc_db_mgr_release_range(struct xe_guc_db_mgr *dbm, unsigned int start, unsigned int count); + +void xe_guc_db_mgr_print(struct xe_guc_db_mgr *dbm, struct drm_printer *p, int indent); + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h index 4dd5a88a78..c281fdbfd2 100644 --- a/drivers/gpu/drm/xe/xe_guc_fwif.h +++ b/drivers/gpu/drm/xe/xe_guc_fwif.h @@ -97,6 +97,7 @@ struct guc_update_exec_queue_policy { #define GUC_WA_POLLCS BIT(18) #define GUC_WA_RENDER_RST_RC6_EXIT BIT(19) #define GUC_WA_RCS_REGS_IN_CCS_REGS_LIST BIT(21) +#define GUC_WA_ENABLE_TSC_CHECK_ON_RC6 BIT(22) #define GUC_CTL_FEATURE 2 #define GUC_CTL_ENABLE_SLPC BIT(2) diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c b/drivers/gpu/drm/xe/xe_guc_hwconfig.c index 2a13a00917..ea49f3885c 100644 --- a/drivers/gpu/drm/xe/xe_guc_hwconfig.c +++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c @@ -78,7 +78,7 @@ int xe_guc_hwconfig_init(struct xe_guc *guc) return -EINVAL; bo = xe_managed_bo_create_pin_map(xe, tile, PAGE_ALIGN(size), - XE_BO_CREATE_VRAM_IF_DGFX(tile) | + XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_GGTT_BIT); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_guc_hxg_helpers.h b/drivers/gpu/drm/xe/xe_guc_hxg_helpers.h new file mode 100644 index 0000000000..aeeb573c68 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_hxg_helpers.h @@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GUC_HXG_HELPERS_H_ +#define _XE_GUC_HXG_HELPERS_H_ + +#include <linux/bitfield.h> +#include <linux/types.h> + +#include "abi/guc_messages_abi.h" + +/** + * hxg_sizeof - Queries size of the object or type (in HXG units). + * @T: the object or type + * + * Force a compilation error if actual size is not aligned to HXG unit (u32). + * + * Return: size in dwords (u32). + */ +#define hxg_sizeof(T) (sizeof(T) / sizeof(u32) + BUILD_BUG_ON_ZERO(sizeof(T) % sizeof(u32))) + +static inline const char *guc_hxg_type_to_string(unsigned int type) +{ + switch (type) { + case GUC_HXG_TYPE_REQUEST: + return "request"; + case GUC_HXG_TYPE_FAST_REQUEST: + return "fast-request"; + case GUC_HXG_TYPE_EVENT: + return "event"; + case GUC_HXG_TYPE_NO_RESPONSE_BUSY: + return "busy"; + case GUC_HXG_TYPE_NO_RESPONSE_RETRY: + return "retry"; + case GUC_HXG_TYPE_RESPONSE_FAILURE: + return "failure"; + case GUC_HXG_TYPE_RESPONSE_SUCCESS: + return "response"; + default: + return "<invalid>"; + } +} + +static inline bool guc_hxg_type_is_action(unsigned int type) +{ + switch (type) { + case GUC_HXG_TYPE_REQUEST: + case GUC_HXG_TYPE_FAST_REQUEST: + case GUC_HXG_TYPE_EVENT: + return true; + default: + return false; + } +} + +static inline bool guc_hxg_type_is_reply(unsigned int type) +{ + switch (type) { + case GUC_HXG_TYPE_NO_RESPONSE_BUSY: + case GUC_HXG_TYPE_NO_RESPONSE_RETRY: + case GUC_HXG_TYPE_RESPONSE_FAILURE: + case GUC_HXG_TYPE_RESPONSE_SUCCESS: + return true; + default: + return false; + } +} + +static inline u32 guc_hxg_msg_encode_success(u32 *msg, u32 data0) +{ + msg[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_SUCCESS) | + FIELD_PREP(GUC_HXG_RESPONSE_MSG_0_DATA0, data0); + + return GUC_HXG_RESPONSE_MSG_MIN_LEN; +} + +static inline u32 guc_hxg_msg_encode_failure(u32 *msg, u32 error, u32 hint) +{ + msg[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_FAILURE) | + FIELD_PREP(GUC_HXG_FAILURE_MSG_0_HINT, hint) | + FIELD_PREP(GUC_HXG_FAILURE_MSG_0_ERROR, error); + + return GUC_HXG_FAILURE_MSG_LEN; +} + +static inline u32 guc_hxg_msg_encode_busy(u32 *msg, u32 counter) +{ + msg[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_NO_RESPONSE_BUSY) | + FIELD_PREP(GUC_HXG_BUSY_MSG_0_COUNTER, counter); + + return GUC_HXG_BUSY_MSG_LEN; +} + +static inline u32 guc_hxg_msg_encode_retry(u32 *msg, u32 reason) +{ + msg[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_NO_RESPONSE_RETRY) | + FIELD_PREP(GUC_HXG_RETRY_MSG_0_REASON, reason); + + return GUC_HXG_RETRY_MSG_LEN; +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c index bcd2f4d340..45135c3520 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.c +++ b/drivers/gpu/drm/xe/xe_guc_log.c @@ -84,7 +84,7 @@ int xe_guc_log_init(struct xe_guc_log *log) struct xe_bo *bo; bo = xe_managed_bo_create_pin_map(xe, tile, guc_log_size(), - XE_BO_CREATE_VRAM_IF_DGFX(tile) | + XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_GGTT_BIT); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index d917025925..dd9d65f923 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -381,8 +381,6 @@ u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc) struct xe_device *xe = gt_to_xe(gt); u32 freq; - xe_device_mem_access_get(gt_to_xe(gt)); - /* When in RC6, actual frequency reported will be 0. */ if (GRAPHICS_VERx100(xe) >= 1270) { freq = xe_mmio_read32(gt, MTL_MIRROR_TARGET_WP1); @@ -394,8 +392,6 @@ u32 xe_guc_pc_get_act_freq(struct xe_guc_pc *pc) freq = decode_freq(freq); - xe_device_mem_access_put(gt_to_xe(gt)); - return freq; } @@ -412,14 +408,13 @@ int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq) struct xe_gt *gt = pc_to_gt(pc); int ret; - xe_device_mem_access_get(gt_to_xe(gt)); /* * GuC SLPC plays with cur freq request when GuCRC is enabled * Block RC6 for a more reliable read. */ ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (ret) - goto out; + return ret; *freq = xe_mmio_read32(gt, RPNSWREQ); @@ -427,9 +422,7 @@ int xe_guc_pc_get_cur_freq(struct xe_guc_pc *pc, u32 *freq) *freq = decode_freq(*freq); XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); -out: - xe_device_mem_access_put(gt_to_xe(gt)); - return ret; + return 0; } /** @@ -451,12 +444,7 @@ u32 xe_guc_pc_get_rp0_freq(struct xe_guc_pc *pc) */ u32 xe_guc_pc_get_rpe_freq(struct xe_guc_pc *pc) { - struct xe_gt *gt = pc_to_gt(pc); - struct xe_device *xe = gt_to_xe(gt); - - xe_device_mem_access_get(xe); pc_update_rp_values(pc); - xe_device_mem_access_put(xe); return pc->rpe_freq; } @@ -485,7 +473,6 @@ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq) struct xe_gt *gt = pc_to_gt(pc); int ret; - xe_device_mem_access_get(pc_to_xe(pc)); mutex_lock(&pc->freq_lock); if (!pc->freq_ready) { /* Might be in the middle of a gt reset */ @@ -511,7 +498,6 @@ fw: XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); out: mutex_unlock(&pc->freq_lock); - xe_device_mem_access_put(pc_to_xe(pc)); return ret; } @@ -528,7 +514,6 @@ int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq) { int ret; - xe_device_mem_access_get(pc_to_xe(pc)); mutex_lock(&pc->freq_lock); if (!pc->freq_ready) { /* Might be in the middle of a gt reset */ @@ -544,8 +529,6 @@ int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq) out: mutex_unlock(&pc->freq_lock); - xe_device_mem_access_put(pc_to_xe(pc)); - return ret; } @@ -561,7 +544,6 @@ int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq) { int ret; - xe_device_mem_access_get(pc_to_xe(pc)); mutex_lock(&pc->freq_lock); if (!pc->freq_ready) { /* Might be in the middle of a gt reset */ @@ -577,7 +559,6 @@ int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq) out: mutex_unlock(&pc->freq_lock); - xe_device_mem_access_put(pc_to_xe(pc)); return ret; } @@ -594,7 +575,6 @@ int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq) { int ret; - xe_device_mem_access_get(pc_to_xe(pc)); mutex_lock(&pc->freq_lock); if (!pc->freq_ready) { /* Might be in the middle of a gt reset */ @@ -610,7 +590,6 @@ int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq) out: mutex_unlock(&pc->freq_lock); - xe_device_mem_access_put(pc_to_xe(pc)); return ret; } @@ -623,8 +602,6 @@ enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc) struct xe_gt *gt = pc_to_gt(pc); u32 reg, gt_c_state; - xe_device_mem_access_get(gt_to_xe(gt)); - if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) { reg = xe_mmio_read32(gt, MTL_MIRROR_TARGET_WP1); gt_c_state = REG_FIELD_GET(MTL_CC_MASK, reg); @@ -633,8 +610,6 @@ enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc) gt_c_state = REG_FIELD_GET(RCN_MASK, reg); } - xe_device_mem_access_put(gt_to_xe(gt)); - switch (gt_c_state) { case GT_C6: return GT_IDLE_C6; @@ -654,9 +629,7 @@ u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc) struct xe_gt *gt = pc_to_gt(pc); u32 reg; - xe_device_mem_access_get(gt_to_xe(gt)); reg = xe_mmio_read32(gt, GT_GFX_RC6); - xe_device_mem_access_put(gt_to_xe(gt)); return reg; } @@ -670,9 +643,7 @@ u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc) struct xe_gt *gt = pc_to_gt(pc); u64 reg; - xe_device_mem_access_get(gt_to_xe(gt)); reg = xe_mmio_read32(gt, MTL_MEDIA_MC6); - xe_device_mem_access_put(gt_to_xe(gt)); return reg; } @@ -801,23 +772,19 @@ int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc) if (xe->info.skip_guc_pc) return 0; - xe_device_mem_access_get(pc_to_xe(pc)); - ret = pc_action_setup_gucrc(pc, XE_GUCRC_HOST_CONTROL); if (ret) - goto out; + return ret; ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (ret) - goto out; + return ret; xe_gt_idle_disable_c6(gt); XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); -out: - xe_device_mem_access_put(pc_to_xe(pc)); - return ret; + return 0; } static void pc_init_pcode_freq(struct xe_guc_pc *pc) @@ -870,11 +837,9 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) xe_gt_assert(gt, xe_device_uc_enabled(xe)); - xe_device_mem_access_get(pc_to_xe(pc)); - ret = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (ret) - goto out_fail_force_wake; + return ret; if (xe->info.skip_guc_pc) { if (xe->info.platform != XE_PVC) @@ -914,8 +879,6 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) out: XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); -out_fail_force_wake: - xe_device_mem_access_put(pc_to_xe(pc)); return ret; } @@ -928,12 +891,9 @@ int xe_guc_pc_stop(struct xe_guc_pc *pc) struct xe_device *xe = pc_to_xe(pc); int ret; - xe_device_mem_access_get(pc_to_xe(pc)); - if (xe->info.skip_guc_pc) { xe_gt_idle_disable_c6(pc_to_gt(pc)); - ret = 0; - goto out; + return 0; } mutex_lock(&pc->freq_lock); @@ -942,36 +902,29 @@ int xe_guc_pc_stop(struct xe_guc_pc *pc) ret = pc_action_shutdown(pc); if (ret) - goto out; + return ret; if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_NOT_RUNNING)) { drm_err(&pc_to_xe(pc)->drm, "GuC PC Shutdown failed\n"); - ret = -EIO; + return -EIO; } -out: - xe_device_mem_access_put(pc_to_xe(pc)); - return ret; + return 0; } /** * xe_guc_pc_fini - Finalize GuC's Power Conservation component - * @pc: Xe_GuC_PC instance + * @drm: DRM device + * @arg: opaque pointer that should point to Xe_GuC_PC instance */ -void xe_guc_pc_fini(struct xe_guc_pc *pc) +static void xe_guc_pc_fini(struct drm_device *drm, void *arg) { - struct xe_device *xe = pc_to_xe(pc); - - if (xe->info.skip_guc_pc) { - xe_device_mem_access_get(xe); - xe_gt_idle_disable_c6(pc_to_gt(pc)); - xe_device_mem_access_put(xe); - return; - } + struct xe_guc_pc *pc = arg; + xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL); XE_WARN_ON(xe_guc_pc_gucrc_disable(pc)); XE_WARN_ON(xe_guc_pc_stop(pc)); - mutex_destroy(&pc->freq_lock); + xe_force_wake_put(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL); } /** @@ -985,11 +938,14 @@ int xe_guc_pc_init(struct xe_guc_pc *pc) struct xe_device *xe = gt_to_xe(gt); struct xe_bo *bo; u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data)); + int err; if (xe->info.skip_guc_pc) return 0; - mutex_init(&pc->freq_lock); + err = drmm_mutex_init(&xe->drm, &pc->freq_lock); + if (err) + return err; bo = xe_managed_bo_create_pin_map(xe, tile, size, XE_BO_CREATE_VRAM_IF_DGFX(tile) | @@ -998,5 +954,10 @@ int xe_guc_pc_init(struct xe_guc_pc *pc) return PTR_ERR(bo); pc->bo = bo; + + err = drmm_add_action_or_reset(&xe->drm, xe_guc_pc_fini, pc); + if (err) + return err; + return 0; } diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h index cecad8e930..d3680d8949 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.h +++ b/drivers/gpu/drm/xe/xe_guc_pc.h @@ -9,7 +9,6 @@ #include "xe_guc_pc_types.h" int xe_guc_pc_init(struct xe_guc_pc *pc); -void xe_guc_pc_fini(struct xe_guc_pc *pc); int xe_guc_pc_start(struct xe_guc_pc *pc); int xe_guc_pc_stop(struct xe_guc_pc *pc); int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc); diff --git a/drivers/gpu/drm/xe/xe_guc_relay.c b/drivers/gpu/drm/xe/xe_guc_relay.c new file mode 100644 index 0000000000..c0a2d8d5d3 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_relay.c @@ -0,0 +1,941 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include <linux/bitfield.h> +#include <linux/delay.h> + +#include <drm/drm_managed.h> + +#include <kunit/static_stub.h> +#include <kunit/test-bug.h> + +#include "abi/guc_actions_sriov_abi.h" +#include "abi/guc_relay_actions_abi.h" +#include "abi/guc_relay_communication_abi.h" + +#include "xe_assert.h" +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_gt_sriov_printk.h" +#include "xe_guc.h" +#include "xe_guc_ct.h" +#include "xe_guc_hxg_helpers.h" +#include "xe_guc_relay.h" +#include "xe_guc_relay_types.h" +#include "xe_sriov.h" + +/* + * How long should we wait for the response? + * XXX this value is subject for the profiling. + */ +#define RELAY_TIMEOUT_MSEC (2500) + +static void relays_worker_fn(struct work_struct *w); + +static struct xe_guc *relay_to_guc(struct xe_guc_relay *relay) +{ + return container_of(relay, struct xe_guc, relay); +} + +static struct xe_guc_ct *relay_to_ct(struct xe_guc_relay *relay) +{ + return &relay_to_guc(relay)->ct; +} + +static struct xe_gt *relay_to_gt(struct xe_guc_relay *relay) +{ + return guc_to_gt(relay_to_guc(relay)); +} + +static struct xe_device *relay_to_xe(struct xe_guc_relay *relay) +{ + return gt_to_xe(relay_to_gt(relay)); +} + +#define relay_assert(relay, condition) xe_gt_assert(relay_to_gt(relay), condition) +#define relay_notice(relay, msg...) xe_gt_sriov_notice(relay_to_gt(relay), "relay: " msg) +#define relay_debug(relay, msg...) xe_gt_sriov_dbg_verbose(relay_to_gt(relay), "relay: " msg) + +static int relay_get_totalvfs(struct xe_guc_relay *relay) +{ + struct xe_device *xe = relay_to_xe(relay); + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + + KUNIT_STATIC_STUB_REDIRECT(relay_get_totalvfs, relay); + return IS_SRIOV_VF(xe) ? 0 : pci_sriov_get_totalvfs(pdev); +} + +static bool relay_is_ready(struct xe_guc_relay *relay) +{ + return mempool_initialized(&relay->pool); +} + +static u32 relay_get_next_rid(struct xe_guc_relay *relay) +{ + u32 rid; + + spin_lock(&relay->lock); + rid = ++relay->last_rid; + spin_unlock(&relay->lock); + + return rid; +} + +/** + * struct relay_transaction - internal data used to handle transactions + * + * Relation between struct relay_transaction members:: + * + * <-------------------- GUC_CTB_MAX_DWORDS --------------> + * <-------- GUC_RELAY_MSG_MAX_LEN ---> + * <--- offset ---> <--- request_len -------> + * +----------------+-------------------------+----------+--+ + * | | | | | + * +----------------+-------------------------+----------+--+ + * ^ ^ + * / / + * request_buf request + * + * <-------------------- GUC_CTB_MAX_DWORDS --------------> + * <-------- GUC_RELAY_MSG_MAX_LEN ---> + * <--- offset ---> <--- response_len ---> + * +----------------+----------------------+-------------+--+ + * | | | | | + * +----------------+----------------------+-------------+--+ + * ^ ^ + * / / + * response_buf response + */ +struct relay_transaction { + /** + * @incoming: indicates whether this transaction represents an incoming + * request from the remote VF/PF or this transaction + * represents outgoing request to the remote VF/PF. + */ + bool incoming; + + /** + * @remote: PF/VF identifier of the origin (or target) of the relay + * request message. + */ + u32 remote; + + /** @rid: identifier of the VF/PF relay message. */ + u32 rid; + + /** + * @request: points to the inner VF/PF request message, copied to the + * #response_buf starting at #offset. + */ + u32 *request; + + /** @request_len: length of the inner VF/PF request message. */ + u32 request_len; + + /** + * @response: points to the placeholder buffer where inner VF/PF + * response will be located, for outgoing transaction + * this could be caller's buffer (if provided) otherwise + * it points to the #response_buf starting at #offset. + */ + u32 *response; + + /** + * @response_len: length of the inner VF/PF response message (only + * if #status is 0), initially set to the size of the + * placeholder buffer where response message will be + * copied. + */ + u32 response_len; + + /** + * @offset: offset to the start of the inner VF/PF relay message inside + * buffers; this offset is equal the length of the outer GuC + * relay header message. + */ + u32 offset; + + /** + * @request_buf: buffer with VF/PF request message including outer + * transport message. + */ + u32 request_buf[GUC_CTB_MAX_DWORDS]; + + /** + * @response_buf: buffer with VF/PF response message including outer + * transport message. + */ + u32 response_buf[GUC_CTB_MAX_DWORDS]; + + /** + * @reply: status of the reply, 0 means that data pointed by the + * #response is valid. + */ + int reply; + + /** @done: completion of the outgoing transaction. */ + struct completion done; + + /** @link: transaction list link */ + struct list_head link; +}; + +static u32 prepare_pf2guc(u32 *msg, u32 target, u32 rid) +{ + msg[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, XE_GUC_ACTION_PF2GUC_RELAY_TO_VF); + msg[1] = FIELD_PREP(PF2GUC_RELAY_TO_VF_REQUEST_MSG_1_VFID, target); + msg[2] = FIELD_PREP(PF2GUC_RELAY_TO_VF_REQUEST_MSG_2_RELAY_ID, rid); + + return PF2GUC_RELAY_TO_VF_REQUEST_MSG_MIN_LEN; +} + +static u32 prepare_vf2guc(u32 *msg, u32 rid) +{ + msg[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, XE_GUC_ACTION_VF2GUC_RELAY_TO_PF); + msg[1] = FIELD_PREP(VF2GUC_RELAY_TO_PF_REQUEST_MSG_1_RELAY_ID, rid); + + return VF2GUC_RELAY_TO_PF_REQUEST_MSG_MIN_LEN; +} + +static struct relay_transaction * +__relay_get_transaction(struct xe_guc_relay *relay, bool incoming, u32 remote, u32 rid, + const u32 *action, u32 action_len, u32 *resp, u32 resp_size) +{ + struct relay_transaction *txn; + + relay_assert(relay, action_len >= GUC_RELAY_MSG_MIN_LEN); + relay_assert(relay, action_len <= GUC_RELAY_MSG_MAX_LEN); + relay_assert(relay, !(!!resp ^ !!resp_size)); + relay_assert(relay, resp_size <= GUC_RELAY_MSG_MAX_LEN); + relay_assert(relay, resp_size == 0 || resp_size >= GUC_RELAY_MSG_MIN_LEN); + + if (unlikely(!relay_is_ready(relay))) + return ERR_PTR(-ENODEV); + + /* + * For incoming requests we can't use GFP_KERNEL as those are delivered + * with CTB lock held which is marked as used in the reclaim path. + * Btw, that's one of the reason why we use mempool here! + */ + txn = mempool_alloc(&relay->pool, incoming ? GFP_ATOMIC : GFP_KERNEL); + if (!txn) + return ERR_PTR(-ENOMEM); + + txn->incoming = incoming; + txn->remote = remote; + txn->rid = rid; + txn->offset = remote ? + prepare_pf2guc(incoming ? txn->response_buf : txn->request_buf, remote, rid) : + prepare_vf2guc(incoming ? txn->response_buf : txn->request_buf, rid); + + relay_assert(relay, txn->offset); + relay_assert(relay, txn->offset + GUC_RELAY_MSG_MAX_LEN <= ARRAY_SIZE(txn->request_buf)); + relay_assert(relay, txn->offset + GUC_RELAY_MSG_MAX_LEN <= ARRAY_SIZE(txn->response_buf)); + + txn->request = txn->request_buf + txn->offset; + memcpy(&txn->request_buf[txn->offset], action, sizeof(u32) * action_len); + txn->request_len = action_len; + + txn->response = resp ?: txn->response_buf + txn->offset; + txn->response_len = resp_size ?: GUC_RELAY_MSG_MAX_LEN; + txn->reply = -ENOMSG; + INIT_LIST_HEAD(&txn->link); + init_completion(&txn->done); + + return txn; +} + +static struct relay_transaction * +relay_new_transaction(struct xe_guc_relay *relay, u32 target, const u32 *action, u32 len, + u32 *resp, u32 resp_size) +{ + u32 rid = relay_get_next_rid(relay); + + return __relay_get_transaction(relay, false, target, rid, action, len, resp, resp_size); +} + +static struct relay_transaction * +relay_new_incoming_transaction(struct xe_guc_relay *relay, u32 origin, u32 rid, + const u32 *action, u32 len) +{ + return __relay_get_transaction(relay, true, origin, rid, action, len, NULL, 0); +} + +static void relay_release_transaction(struct xe_guc_relay *relay, struct relay_transaction *txn) +{ + relay_assert(relay, list_empty(&txn->link)); + + txn->offset = 0; + txn->response = NULL; + txn->reply = -ESTALE; + mempool_free(txn, &relay->pool); +} + +static int relay_send_transaction(struct xe_guc_relay *relay, struct relay_transaction *txn) +{ + u32 len = txn->incoming ? txn->response_len : txn->request_len; + u32 *buf = txn->incoming ? txn->response_buf : txn->request_buf; + u32 *msg = buf + txn->offset; + int ret; + + relay_assert(relay, txn->offset); + relay_assert(relay, txn->offset + len <= GUC_CTB_MAX_DWORDS); + relay_assert(relay, len >= GUC_RELAY_MSG_MIN_LEN); + relay_assert(relay, len <= GUC_RELAY_MSG_MAX_LEN); + + relay_debug(relay, "sending %s.%u to %u = %*ph\n", + guc_hxg_type_to_string(FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0])), + txn->rid, txn->remote, (int)sizeof(u32) * len, msg); + + ret = xe_guc_ct_send_block(relay_to_ct(relay), buf, len + txn->offset); + + if (unlikely(ret > 0)) { + relay_notice(relay, "Unexpected data=%d from GuC, wrong ABI?\n", ret); + ret = -EPROTO; + } + if (unlikely(ret < 0)) { + relay_notice(relay, "Failed to send %s.%x to GuC (%pe) %*ph ...\n", + guc_hxg_type_to_string(FIELD_GET(GUC_HXG_MSG_0_TYPE, buf[0])), + FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, buf[0]), + ERR_PTR(ret), (int)sizeof(u32) * txn->offset, buf); + relay_notice(relay, "Failed to send %s.%u to %u (%pe) %*ph\n", + guc_hxg_type_to_string(FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0])), + txn->rid, txn->remote, ERR_PTR(ret), (int)sizeof(u32) * len, msg); + } + + return ret; +} + +static void __fini_relay(struct drm_device *drm, void *arg) +{ + struct xe_guc_relay *relay = arg; + + mempool_exit(&relay->pool); +} + +/** + * xe_guc_relay_init - Initialize a &xe_guc_relay + * @relay: the &xe_guc_relay to initialize + * + * Initialize remaining members of &xe_guc_relay that may depend + * on the SR-IOV mode. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_guc_relay_init(struct xe_guc_relay *relay) +{ + const int XE_RELAY_MEMPOOL_MIN_NUM = 1; + struct xe_device *xe = relay_to_xe(relay); + int err; + + relay_assert(relay, !relay_is_ready(relay)); + + if (!IS_SRIOV(xe)) + return 0; + + spin_lock_init(&relay->lock); + INIT_WORK(&relay->worker, relays_worker_fn); + INIT_LIST_HEAD(&relay->pending_relays); + INIT_LIST_HEAD(&relay->incoming_actions); + + err = mempool_init_kmalloc_pool(&relay->pool, XE_RELAY_MEMPOOL_MIN_NUM + + relay_get_totalvfs(relay), + sizeof(struct relay_transaction)); + if (err) + return err; + + relay_debug(relay, "using mempool with %d elements\n", relay->pool.min_nr); + + return drmm_add_action_or_reset(&xe->drm, __fini_relay, relay); +} + +static u32 to_relay_error(int err) +{ + /* XXX: assume that relay errors match errno codes */ + return err < 0 ? -err : GUC_RELAY_ERROR_UNDISCLOSED; +} + +static int from_relay_error(u32 error) +{ + /* XXX: assume that relay errors match errno codes */ + return error ? -error : -ENODATA; +} + +static u32 sanitize_relay_error(u32 error) +{ + /* XXX TBD if generic error codes will be allowed */ + if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG)) + error = GUC_RELAY_ERROR_UNDISCLOSED; + return error; +} + +static u32 sanitize_relay_error_hint(u32 hint) +{ + /* XXX TBD if generic error codes will be allowed */ + if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG)) + hint = 0; + return hint; +} + +static u32 prepare_error_reply(u32 *msg, u32 error, u32 hint) +{ + msg[0] = FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_RESPONSE_FAILURE) | + FIELD_PREP(GUC_HXG_FAILURE_MSG_0_HINT, hint) | + FIELD_PREP(GUC_HXG_FAILURE_MSG_0_ERROR, error); + + XE_WARN_ON(!FIELD_FIT(GUC_HXG_FAILURE_MSG_0_ERROR, error)); + XE_WARN_ON(!FIELD_FIT(GUC_HXG_FAILURE_MSG_0_HINT, hint)); + + return GUC_HXG_FAILURE_MSG_LEN; +} + +static void relay_testonly_nop(struct xe_guc_relay *relay) +{ + KUNIT_STATIC_STUB_REDIRECT(relay_testonly_nop, relay); +} + +static int relay_send_message_and_wait(struct xe_guc_relay *relay, + struct relay_transaction *txn, + u32 *buf, u32 buf_size) +{ + unsigned long timeout = msecs_to_jiffies(RELAY_TIMEOUT_MSEC); + u32 *msg = &txn->request_buf[txn->offset]; + u32 len = txn->request_len; + u32 type, action, data0; + int ret; + long n; + + type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]); + action = FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, msg[0]); + data0 = FIELD_GET(GUC_HXG_REQUEST_MSG_0_DATA0, msg[0]); + + relay_debug(relay, "%s.%u to %u action %#x:%u\n", + guc_hxg_type_to_string(type), + txn->rid, txn->remote, action, data0); + + /* list ordering does not need to match RID ordering */ + spin_lock(&relay->lock); + list_add_tail(&txn->link, &relay->pending_relays); + spin_unlock(&relay->lock); + +resend: + ret = relay_send_transaction(relay, txn); + if (unlikely(ret < 0)) + goto unlink; + +wait: + n = wait_for_completion_timeout(&txn->done, timeout); + if (unlikely(n == 0 && txn->reply)) { + ret = -ETIME; + goto unlink; + } + + relay_debug(relay, "%u.%u reply %d after %u msec\n", + txn->remote, txn->rid, txn->reply, jiffies_to_msecs(timeout - n)); + if (unlikely(txn->reply)) { + reinit_completion(&txn->done); + if (txn->reply == -EAGAIN) + goto resend; + if (txn->reply == -EBUSY) { + relay_testonly_nop(relay); + goto wait; + } + if (txn->reply > 0) + ret = from_relay_error(txn->reply); + else + ret = txn->reply; + goto unlink; + } + + relay_debug(relay, "%u.%u response %*ph\n", txn->remote, txn->rid, + (int)sizeof(u32) * txn->response_len, txn->response); + relay_assert(relay, txn->response_len >= GUC_RELAY_MSG_MIN_LEN); + ret = txn->response_len; + +unlink: + spin_lock(&relay->lock); + list_del_init(&txn->link); + spin_unlock(&relay->lock); + + if (unlikely(ret < 0)) { + relay_notice(relay, "Unsuccessful %s.%u %#x:%u to %u (%pe) %*ph\n", + guc_hxg_type_to_string(type), txn->rid, + action, data0, txn->remote, ERR_PTR(ret), + (int)sizeof(u32) * len, msg); + } + + return ret; +} + +static int relay_send_to(struct xe_guc_relay *relay, u32 target, + const u32 *msg, u32 len, u32 *buf, u32 buf_size) +{ + struct relay_transaction *txn; + int ret; + + relay_assert(relay, len >= GUC_RELAY_MSG_MIN_LEN); + relay_assert(relay, len <= GUC_RELAY_MSG_MAX_LEN); + relay_assert(relay, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_HOST); + relay_assert(relay, guc_hxg_type_is_action(FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]))); + + if (unlikely(!relay_is_ready(relay))) + return -ENODEV; + + txn = relay_new_transaction(relay, target, msg, len, buf, buf_size); + if (IS_ERR(txn)) + return PTR_ERR(txn); + + switch (FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0])) { + case GUC_HXG_TYPE_REQUEST: + ret = relay_send_message_and_wait(relay, txn, buf, buf_size); + break; + case GUC_HXG_TYPE_FAST_REQUEST: + relay_assert(relay, !GUC_HXG_TYPE_FAST_REQUEST); + fallthrough; + case GUC_HXG_TYPE_EVENT: + ret = relay_send_transaction(relay, txn); + break; + default: + ret = -EINVAL; + break; + } + + relay_release_transaction(relay, txn); + return ret; +} + +#ifdef CONFIG_PCI_IOV +/** + * xe_guc_relay_send_to_vf - Send a message to the VF. + * @relay: the &xe_guc_relay which will send the message + * @target: target VF number + * @msg: request message to be sent + * @len: length of the request message (in dwords, can't be 0) + * @buf: placeholder for the response message + * @buf_size: size of the response message placeholder (in dwords) + * + * This function can only be used by the driver running in the SR-IOV PF mode. + * + * Return: Non-negative response length (in dwords) or + * a negative error code on failure. + */ +int xe_guc_relay_send_to_vf(struct xe_guc_relay *relay, u32 target, + const u32 *msg, u32 len, u32 *buf, u32 buf_size) +{ + relay_assert(relay, IS_SRIOV_PF(relay_to_xe(relay))); + + return relay_send_to(relay, target, msg, len, buf, buf_size); +} +#endif + +/** + * xe_guc_relay_send_to_pf - Send a message to the PF. + * @relay: the &xe_guc_relay which will send the message + * @msg: request message to be sent + * @len: length of the message (in dwords, can't be 0) + * @buf: placeholder for the response message + * @buf_size: size of the response message placeholder (in dwords) + * + * This function can only be used by driver running in SR-IOV VF mode. + * + * Return: Non-negative response length (in dwords) or + * a negative error code on failure. + */ +int xe_guc_relay_send_to_pf(struct xe_guc_relay *relay, + const u32 *msg, u32 len, u32 *buf, u32 buf_size) +{ + relay_assert(relay, IS_SRIOV_VF(relay_to_xe(relay))); + + return relay_send_to(relay, PFID, msg, len, buf, buf_size); +} + +static int relay_handle_reply(struct xe_guc_relay *relay, u32 origin, + u32 rid, int reply, const u32 *msg, u32 len) +{ + struct relay_transaction *pending; + int err = -ESRCH; + + spin_lock(&relay->lock); + list_for_each_entry(pending, &relay->pending_relays, link) { + if (pending->remote != origin || pending->rid != rid) { + relay_debug(relay, "%u.%u still awaits response\n", + pending->remote, pending->rid); + continue; + } + err = 0; /* found! */ + if (reply == 0) { + if (len > pending->response_len) { + reply = -ENOBUFS; + err = -ENOBUFS; + } else { + memcpy(pending->response, msg, 4 * len); + pending->response_len = len; + } + } + pending->reply = reply; + complete_all(&pending->done); + break; + } + spin_unlock(&relay->lock); + + return err; +} + +static int relay_handle_failure(struct xe_guc_relay *relay, u32 origin, + u32 rid, const u32 *msg, u32 len) +{ + int error = FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, msg[0]); + u32 hint __maybe_unused = FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, msg[0]); + + relay_assert(relay, len); + relay_debug(relay, "%u.%u error %#x (%pe) hint %u debug %*ph\n", + origin, rid, error, ERR_PTR(-error), hint, 4 * (len - 1), msg + 1); + + return relay_handle_reply(relay, origin, rid, error ?: -EREMOTEIO, NULL, 0); +} + +static int relay_testloop_action_handler(struct xe_guc_relay *relay, u32 origin, + const u32 *msg, u32 len, u32 *response, u32 size) +{ + static ktime_t last_reply = 0; + u32 type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]); + u32 action = FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, msg[0]); + u32 opcode = FIELD_GET(GUC_HXG_REQUEST_MSG_0_DATA0, msg[0]); + ktime_t now = ktime_get(); + bool busy; + int ret; + + relay_assert(relay, guc_hxg_type_is_action(type)); + relay_assert(relay, action == GUC_RELAY_ACTION_VFXPF_TESTLOOP); + + if (!IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) + return -ECONNREFUSED; + + if (!last_reply) + last_reply = now; + busy = ktime_before(now, ktime_add_ms(last_reply, 2 * RELAY_TIMEOUT_MSEC)); + if (!busy) + last_reply = now; + + switch (opcode) { + case VFXPF_TESTLOOP_OPCODE_NOP: + if (type == GUC_HXG_TYPE_EVENT) + return 0; + return guc_hxg_msg_encode_success(response, 0); + case VFXPF_TESTLOOP_OPCODE_BUSY: + if (type == GUC_HXG_TYPE_EVENT) + return -EPROTO; + msleep(RELAY_TIMEOUT_MSEC / 8); + if (busy) + return -EINPROGRESS; + return guc_hxg_msg_encode_success(response, 0); + case VFXPF_TESTLOOP_OPCODE_RETRY: + if (type == GUC_HXG_TYPE_EVENT) + return -EPROTO; + msleep(RELAY_TIMEOUT_MSEC / 8); + if (busy) + return guc_hxg_msg_encode_retry(response, 0); + return guc_hxg_msg_encode_success(response, 0); + case VFXPF_TESTLOOP_OPCODE_ECHO: + if (type == GUC_HXG_TYPE_EVENT) + return -EPROTO; + if (size < len) + return -ENOBUFS; + ret = guc_hxg_msg_encode_success(response, len); + memcpy(response + ret, msg + ret, (len - ret) * sizeof(u32)); + return len; + case VFXPF_TESTLOOP_OPCODE_FAIL: + return -EHWPOISON; + default: + break; + } + + relay_notice(relay, "Unexpected action %#x opcode %#x\n", action, opcode); + return -EBADRQC; +} + +static int relay_action_handler(struct xe_guc_relay *relay, u32 origin, + const u32 *msg, u32 len, u32 *response, u32 size) +{ + u32 type; + int ret; + + relay_assert(relay, len >= GUC_HXG_MSG_MIN_LEN); + + if (FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, msg[0]) == GUC_RELAY_ACTION_VFXPF_TESTLOOP) + return relay_testloop_action_handler(relay, origin, msg, len, response, size); + + type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]); + + /* XXX: PF services will be added later */ + ret = -EOPNOTSUPP; + + if (type == GUC_HXG_TYPE_EVENT) + relay_assert(relay, ret <= 0); + + return ret; +} + +static struct relay_transaction *relay_dequeue_transaction(struct xe_guc_relay *relay) +{ + struct relay_transaction *txn; + + spin_lock(&relay->lock); + txn = list_first_entry_or_null(&relay->incoming_actions, struct relay_transaction, link); + if (txn) + list_del_init(&txn->link); + spin_unlock(&relay->lock); + + return txn; +} + +static void relay_process_incoming_action(struct xe_guc_relay *relay) +{ + struct relay_transaction *txn; + bool again = false; + u32 type; + int ret; + + txn = relay_dequeue_transaction(relay); + if (!txn) + return; + + type = FIELD_GET(GUC_HXG_MSG_0_TYPE, txn->request_buf[txn->offset]); + + ret = relay_action_handler(relay, txn->remote, + txn->request_buf + txn->offset, txn->request_len, + txn->response_buf + txn->offset, + ARRAY_SIZE(txn->response_buf) - txn->offset); + + if (ret == -EINPROGRESS) { + again = true; + ret = guc_hxg_msg_encode_busy(txn->response_buf + txn->offset, 0); + } + + if (ret > 0) { + txn->response_len = ret; + ret = relay_send_transaction(relay, txn); + } + + if (ret < 0) { + u32 error = to_relay_error(ret); + + relay_notice(relay, "Failed to handle %s.%u from %u (%pe) %*ph\n", + guc_hxg_type_to_string(type), txn->rid, txn->remote, + ERR_PTR(ret), 4 * txn->request_len, txn->request_buf + txn->offset); + + txn->response_len = prepare_error_reply(txn->response_buf + txn->offset, + txn->remote ? + sanitize_relay_error(error) : error, + txn->remote ? + sanitize_relay_error_hint(-ret) : -ret); + ret = relay_send_transaction(relay, txn); + again = false; + } + + if (again) { + spin_lock(&relay->lock); + list_add(&txn->link, &relay->incoming_actions); + spin_unlock(&relay->lock); + return; + } + + if (unlikely(ret < 0)) + relay_notice(relay, "Failed to process action.%u (%pe) %*ph\n", + txn->rid, ERR_PTR(ret), 4 * txn->request_len, + txn->request_buf + txn->offset); + + relay_release_transaction(relay, txn); +} + +static bool relay_needs_worker(struct xe_guc_relay *relay) +{ + return !list_empty(&relay->incoming_actions); +} + +static void relay_kick_worker(struct xe_guc_relay *relay) +{ + KUNIT_STATIC_STUB_REDIRECT(relay_kick_worker, relay); + queue_work(relay_to_xe(relay)->sriov.wq, &relay->worker); +} + +static void relays_worker_fn(struct work_struct *w) +{ + struct xe_guc_relay *relay = container_of(w, struct xe_guc_relay, worker); + + relay_process_incoming_action(relay); + + if (relay_needs_worker(relay)) + relay_kick_worker(relay); +} + +static int relay_queue_action_msg(struct xe_guc_relay *relay, u32 origin, u32 rid, + const u32 *msg, u32 len) +{ + struct relay_transaction *txn; + + txn = relay_new_incoming_transaction(relay, origin, rid, msg, len); + if (IS_ERR(txn)) + return PTR_ERR(txn); + + spin_lock(&relay->lock); + list_add_tail(&txn->link, &relay->incoming_actions); + spin_unlock(&relay->lock); + + relay_kick_worker(relay); + return 0; +} + +static int relay_process_msg(struct xe_guc_relay *relay, u32 origin, u32 rid, + const u32 *msg, u32 len) +{ + u32 type; + int err; + + if (unlikely(len < GUC_HXG_MSG_MIN_LEN)) + return -EPROTO; + + if (FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) != GUC_HXG_ORIGIN_HOST) + return -EPROTO; + + type = FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]); + relay_debug(relay, "received %s.%u from %u = %*ph\n", + guc_hxg_type_to_string(type), rid, origin, 4 * len, msg); + + switch (type) { + case GUC_HXG_TYPE_REQUEST: + case GUC_HXG_TYPE_FAST_REQUEST: + case GUC_HXG_TYPE_EVENT: + err = relay_queue_action_msg(relay, origin, rid, msg, len); + break; + case GUC_HXG_TYPE_RESPONSE_SUCCESS: + err = relay_handle_reply(relay, origin, rid, 0, msg, len); + break; + case GUC_HXG_TYPE_NO_RESPONSE_BUSY: + err = relay_handle_reply(relay, origin, rid, -EBUSY, NULL, 0); + break; + case GUC_HXG_TYPE_NO_RESPONSE_RETRY: + err = relay_handle_reply(relay, origin, rid, -EAGAIN, NULL, 0); + break; + case GUC_HXG_TYPE_RESPONSE_FAILURE: + err = relay_handle_failure(relay, origin, rid, msg, len); + break; + default: + err = -EBADRQC; + } + + if (unlikely(err)) + relay_notice(relay, "Failed to process %s.%u from %u (%pe) %*ph\n", + guc_hxg_type_to_string(type), rid, origin, + ERR_PTR(err), 4 * len, msg); + + return err; +} + +/** + * xe_guc_relay_process_guc2vf - Handle relay notification message from the GuC. + * @relay: the &xe_guc_relay which will handle the message + * @msg: message to be handled + * @len: length of the message (in dwords) + * + * This function will handle relay messages received from the GuC. + * + * This function is can only be used if driver is running in SR-IOV mode. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_guc_relay_process_guc2vf(struct xe_guc_relay *relay, const u32 *msg, u32 len) +{ + u32 rid; + + relay_assert(relay, len >= GUC_HXG_MSG_MIN_LEN); + relay_assert(relay, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_GUC); + relay_assert(relay, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]) == GUC_HXG_TYPE_EVENT); + relay_assert(relay, FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[0]) == + XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF); + + if (unlikely(!IS_SRIOV_VF(relay_to_xe(relay)) && !kunit_get_current_test())) + return -EPERM; + + if (unlikely(!relay_is_ready(relay))) + return -ENODEV; + + if (unlikely(len < GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN)) + return -EPROTO; + + if (unlikely(len > GUC2VF_RELAY_FROM_PF_EVENT_MSG_MAX_LEN)) + return -EMSGSIZE; + + if (unlikely(FIELD_GET(GUC_HXG_EVENT_MSG_0_DATA0, msg[0]))) + return -EPFNOSUPPORT; + + rid = FIELD_GET(GUC2VF_RELAY_FROM_PF_EVENT_MSG_1_RELAY_ID, msg[1]); + + return relay_process_msg(relay, PFID, rid, + msg + GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN, + len - GUC2VF_RELAY_FROM_PF_EVENT_MSG_MIN_LEN); +} + +#ifdef CONFIG_PCI_IOV +/** + * xe_guc_relay_process_guc2pf - Handle relay notification message from the GuC. + * @relay: the &xe_guc_relay which will handle the message + * @msg: message to be handled + * @len: length of the message (in dwords) + * + * This function will handle relay messages received from the GuC. + * + * This function can only be used if driver is running in SR-IOV PF mode. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_guc_relay_process_guc2pf(struct xe_guc_relay *relay, const u32 *msg, u32 len) +{ + u32 origin, rid; + int err; + + relay_assert(relay, len >= GUC_HXG_EVENT_MSG_MIN_LEN); + relay_assert(relay, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_GUC); + relay_assert(relay, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]) == GUC_HXG_TYPE_EVENT); + relay_assert(relay, FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[0]) == + XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF); + + if (unlikely(!IS_SRIOV_PF(relay_to_xe(relay)) && !kunit_get_current_test())) + return -EPERM; + + if (unlikely(!relay_is_ready(relay))) + return -ENODEV; + + if (unlikely(len < GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN)) + return -EPROTO; + + if (unlikely(len > GUC2PF_RELAY_FROM_VF_EVENT_MSG_MAX_LEN)) + return -EMSGSIZE; + + if (unlikely(FIELD_GET(GUC_HXG_EVENT_MSG_0_DATA0, msg[0]))) + return -EPFNOSUPPORT; + + origin = FIELD_GET(GUC2PF_RELAY_FROM_VF_EVENT_MSG_1_VFID, msg[1]); + rid = FIELD_GET(GUC2PF_RELAY_FROM_VF_EVENT_MSG_2_RELAY_ID, msg[2]); + + if (unlikely(origin > relay_get_totalvfs(relay))) + return -ENOENT; + + err = relay_process_msg(relay, origin, rid, + msg + GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN, + len - GUC2PF_RELAY_FROM_VF_EVENT_MSG_MIN_LEN); + + return err; +} +#endif + +#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST) +#include "tests/xe_guc_relay_test.c" +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_relay.h b/drivers/gpu/drm/xe/xe_guc_relay.h new file mode 100644 index 0000000000..385429aa18 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_relay.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GUC_RELAY_H_ +#define _XE_GUC_RELAY_H_ + +#include <linux/types.h> +#include <linux/errno.h> + +struct xe_guc_relay; + +int xe_guc_relay_init(struct xe_guc_relay *relay); + +int xe_guc_relay_send_to_pf(struct xe_guc_relay *relay, + const u32 *msg, u32 len, u32 *buf, u32 buf_size); + +int xe_guc_relay_process_guc2vf(struct xe_guc_relay *relay, const u32 *msg, u32 len); + +#ifdef CONFIG_PCI_IOV +int xe_guc_relay_send_to_vf(struct xe_guc_relay *relay, u32 target, + const u32 *msg, u32 len, u32 *buf, u32 buf_size); +int xe_guc_relay_process_guc2pf(struct xe_guc_relay *relay, const u32 *msg, u32 len); +#else +static inline int xe_guc_relay_send_to_vf(struct xe_guc_relay *relay, u32 target, + const u32 *msg, u32 len, u32 *buf, u32 buf_size) +{ + return -ENODEV; +} +static inline int xe_guc_relay_process_guc2pf(struct xe_guc_relay *relay, const u32 *msg, u32 len) +{ + return -ENODEV; +} +#endif + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_relay_types.h b/drivers/gpu/drm/xe/xe_guc_relay_types.h new file mode 100644 index 0000000000..5999fcb77e --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_relay_types.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_GUC_RELAY_TYPES_H_ +#define _XE_GUC_RELAY_TYPES_H_ + +#include <linux/mempool.h> +#include <linux/spinlock.h> +#include <linux/workqueue.h> + +/** + * struct xe_guc_relay - Data used by the VF-PF Relay Communication over GuC. + */ +struct xe_guc_relay { + /**@lock: protects all internal data. */ + spinlock_t lock; + + /** @worker: dispatches incoming action messages. */ + struct work_struct worker; + + /** @pending_relays: list of sent requests that await a response. */ + struct list_head pending_relays; + + /** @incoming_actions: list of incoming relay action messages to process. */ + struct list_head incoming_actions; + + /** @pool: pool of the relay message buffers. */ + mempool_t pool; + + /** @last_rid: last Relay-ID used while sending a message. */ + u32 last_rid; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index bd1079aa88..3757e0dc5c 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -23,6 +23,7 @@ #include "xe_force_wake.h" #include "xe_gpu_scheduler.h" #include "xe_gt.h" +#include "xe_gt_printk.h" #include "xe_guc.h" #include "xe_guc_ct.h" #include "xe_guc_exec_queue_types.h" @@ -311,7 +312,7 @@ static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa q->guc->id - GUC_ID_START_MLRC, order_base_2(q->width)); else - ida_simple_remove(&guc->submission_state.guc_ids, q->guc->id); + ida_free(&guc->submission_state.guc_ids, q->guc->id); } static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) @@ -335,8 +336,8 @@ static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) ret = bitmap_find_free_region(bitmap, GUC_ID_NUMBER_MLRC, order_base_2(q->width)); } else { - ret = ida_simple_get(&guc->submission_state.guc_ids, 0, - GUC_ID_NUMBER_SLRC, GFP_NOWAIT); + ret = ida_alloc_max(&guc->submission_state.guc_ids, + GUC_ID_NUMBER_SLRC - 1, GFP_NOWAIT); } if (ret < 0) return ret; @@ -811,7 +812,8 @@ static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p) static void simple_error_capture(struct xe_exec_queue *q) { struct xe_guc *guc = exec_queue_to_guc(q); - struct drm_printer p = drm_err_printer(""); + struct xe_device *xe = guc_to_xe(guc); + struct drm_printer p = drm_err_printer(&xe->drm, NULL); struct xe_hw_engine *hwe; enum xe_hw_engine_id id; u32 adj_logical_mask = q->logical_mask; @@ -928,13 +930,15 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) int i = 0; if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) { - xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_KERNEL)); - xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q))); - drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx", xe_sched_job_seqno(job), q->guc->id, q->flags); + xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL, + "Kernel-submitted job timed out\n"); + xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q), + "VM job timed out on non-killed execqueue\n"); + simple_error_capture(q); - xe_devcoredump(q); + xe_devcoredump(job); } else { drm_dbg(&xe->drm, "Timedout signaled job: seqno=%u, guc_id=%d, flags=0x%lx", xe_sched_job_seqno(job), q->guc->id, q->flags); @@ -1253,6 +1257,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) return 0; err_entity: + mutex_unlock(&guc->submission_state.lock); xe_sched_entity_fini(&ge->entity); err_sched: xe_sched_fini(&ge->sched); @@ -1348,21 +1353,6 @@ static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, return 0; } -static int guc_exec_queue_set_job_timeout(struct xe_exec_queue *q, u32 job_timeout_ms) -{ - struct xe_gpu_scheduler *sched = &q->guc->sched; - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - - xe_assert(xe, !exec_queue_registered(q)); - xe_assert(xe, !exec_queue_banned(q)); - xe_assert(xe, !exec_queue_killed(q)); - - sched->base.timeout = job_timeout_ms; - - return 0; -} - static int guc_exec_queue_suspend(struct xe_exec_queue *q) { struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; @@ -1413,7 +1403,6 @@ static const struct xe_exec_queue_ops guc_exec_queue_ops = { .set_priority = guc_exec_queue_set_priority, .set_timeslice = guc_exec_queue_set_timeslice, .set_preempt_timeout = guc_exec_queue_set_preempt_timeout, - .set_job_timeout = guc_exec_queue_set_job_timeout, .suspend = guc_exec_queue_suspend, .suspend_wait = guc_exec_queue_suspend_wait, .resume = guc_exec_queue_resume, @@ -1794,7 +1783,7 @@ guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps /** * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine. - * @q: Xe exec queue. + * @job: faulty Xe scheduled job. * * This can be printed out in a later stage like during dev_coredump * analysis. @@ -1803,21 +1792,17 @@ guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps * caller, using `xe_guc_exec_queue_snapshot_free`. */ struct xe_guc_submit_exec_queue_snapshot * -xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q) +xe_guc_exec_queue_snapshot_capture(struct xe_sched_job *job) { - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); + struct xe_exec_queue *q = job->q; struct xe_gpu_scheduler *sched = &q->guc->sched; - struct xe_sched_job *job; struct xe_guc_submit_exec_queue_snapshot *snapshot; int i; snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC); - if (!snapshot) { - drm_err(&xe->drm, "Skipping GuC Engine snapshot entirely.\n"); + if (!snapshot) return NULL; - } snapshot->guc.id = q->guc->id; memcpy(&snapshot->name, &q->name, sizeof(snapshot->name)); @@ -1833,9 +1818,7 @@ xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q) snapshot->lrc = kmalloc_array(q->width, sizeof(struct lrc_snapshot), GFP_ATOMIC); - if (!snapshot->lrc) { - drm_err(&xe->drm, "Skipping GuC Engine LRC snapshot.\n"); - } else { + if (snapshot->lrc) { for (i = 0; i < q->width; ++i) { struct xe_lrc *lrc = q->lrc + i; @@ -1863,17 +1846,17 @@ xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q) sizeof(struct pending_list_snapshot), GFP_ATOMIC); - if (!snapshot->pending_list) { - drm_err(&xe->drm, "Skipping GuC Engine pending_list snapshot.\n"); - } else { + if (snapshot->pending_list) { + struct xe_sched_job *job_iter; + i = 0; - list_for_each_entry(job, &sched->base.pending_list, drm.list) { + list_for_each_entry(job_iter, &sched->base.pending_list, drm.list) { snapshot->pending_list[i].seqno = - xe_sched_job_seqno(job); + xe_sched_job_seqno(job_iter); snapshot->pending_list[i].fence = - dma_fence_is_signaled(job->fence) ? 1 : 0; + dma_fence_is_signaled(job_iter->fence) ? 1 : 0; snapshot->pending_list[i].finished = - dma_fence_is_signaled(&job->drm.s_fence->finished) + dma_fence_is_signaled(&job_iter->drm.s_fence->finished) ? 1 : 0; i++; } @@ -1959,10 +1942,28 @@ void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *s static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p) { struct xe_guc_submit_exec_queue_snapshot *snapshot; + struct xe_gpu_scheduler *sched = &q->guc->sched; + struct xe_sched_job *job; + bool found = false; - snapshot = xe_guc_exec_queue_snapshot_capture(q); + spin_lock(&sched->base.job_list_lock); + list_for_each_entry(job, &sched->base.pending_list, drm.list) { + if (job->q == q) { + xe_sched_job_get(job); + found = true; + break; + } + } + spin_unlock(&sched->base.job_list_lock); + + if (!found) + return; + + snapshot = xe_guc_exec_queue_snapshot_capture(job); xe_guc_exec_queue_snapshot_print(snapshot, p); xe_guc_exec_queue_snapshot_free(snapshot); + + xe_sched_job_put(job); } /** diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h index fc97869c5b..723dc2bd8d 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.h +++ b/drivers/gpu/drm/xe/xe_guc_submit.h @@ -9,8 +9,8 @@ #include <linux/types.h> struct drm_printer; -struct xe_exec_queue; struct xe_guc; +struct xe_sched_job; int xe_guc_submit_init(struct xe_guc *guc); @@ -27,7 +27,7 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len); struct xe_guc_submit_exec_queue_snapshot * -xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q); +xe_guc_exec_queue_snapshot_capture(struct xe_sched_job *job); void xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, struct drm_printer *p); diff --git a/drivers/gpu/drm/xe/xe_guc_submit_types.h b/drivers/gpu/drm/xe/xe_guc_submit_types.h index 649b0a8526..72fc0f42b0 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit_types.h +++ b/drivers/gpu/drm/xe/xe_guc_submit_types.h @@ -102,9 +102,9 @@ struct xe_guc_submit_exec_queue_snapshot { /** @sched_props: scheduling properties */ struct { - /** @timeslice_us: timeslice period in micro-seconds */ + /** @sched_props.timeslice_us: timeslice period in micro-seconds */ u32 timeslice_us; - /** @preempt_timeout_us: preemption timeout in micro-seconds */ + /** @sched_props.preempt_timeout_us: preemption timeout in micro-seconds */ u32 preempt_timeout_us; } sched_props; @@ -118,11 +118,11 @@ struct xe_guc_submit_exec_queue_snapshot { /** @guc: GuC Engine Snapshot */ struct { - /** @wqi_head: work queue item head */ + /** @guc.wqi_head: work queue item head */ u32 wqi_head; - /** @wqi_tail: work queue item tail */ + /** @guc.wqi_tail: work queue item tail */ u32 wqi_tail; - /** @id: GuC id for this exec_queue */ + /** @guc.id: GuC id for this exec_queue */ u16 id; } guc; @@ -133,13 +133,13 @@ struct xe_guc_submit_exec_queue_snapshot { bool parallel_execution; /** @parallel: snapshot of the useful parallel scratch */ struct { - /** @wq_desc: Workqueue description */ + /** @parallel.wq_desc: Workqueue description */ struct { - /** @head: Workqueue Head */ + /** @parallel.wq_desc.head: Workqueue Head */ u32 head; - /** @tail: Workqueue Tail */ + /** @parallel.wq_desc.tail: Workqueue Tail */ u32 tail; - /** @status: Workqueue Status */ + /** @parallel.wq_desc.status: Workqueue Status */ u32 status; } wq_desc; /** @wq: Workqueue Items */ diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h index cd80802e89..edcd1a950b 100644 --- a/drivers/gpu/drm/xe/xe_guc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_types.h @@ -15,9 +15,23 @@ #include "xe_guc_fwif.h" #include "xe_guc_log_types.h" #include "xe_guc_pc_types.h" +#include "xe_guc_relay_types.h" #include "xe_uc_fw_types.h" /** + * struct xe_guc_db_mgr - GuC Doorbells Manager. + * + * Note: GuC Doorbells Manager is relying on &xe_guc::submission_state.lock + * to protect its members. + */ +struct xe_guc_db_mgr { + /** @count: number of doorbells to manage */ + unsigned int count; + /** @bitmap: bitmap to track allocated doorbells */ + unsigned long *bitmap; +}; + +/** * struct xe_guc - Graphic micro controller */ struct xe_guc { @@ -31,45 +45,50 @@ struct xe_guc { struct xe_guc_ct ct; /** @pc: GuC Power Conservation */ struct xe_guc_pc pc; + /** @dbm: GuC Doorbell Manager */ + struct xe_guc_db_mgr dbm; /** @submission_state: GuC submission state */ struct { - /** @exec_queue_lookup: Lookup an xe_engine from guc_id */ + /** @submission_state.exec_queue_lookup: Lookup an xe_engine from guc_id */ struct xarray exec_queue_lookup; - /** @guc_ids: used to allocate new guc_ids, single-lrc */ + /** @submission_state.guc_ids: used to allocate new guc_ids, single-lrc */ struct ida guc_ids; - /** @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc */ + /** @submission_state.guc_ids_bitmap: used to allocate new guc_ids, multi-lrc */ unsigned long *guc_ids_bitmap; - /** @stopped: submissions are stopped */ + /** @submission_state.stopped: submissions are stopped */ atomic_t stopped; - /** @lock: protects submission state */ + /** @submission_state.lock: protects submission state */ struct mutex lock; - /** @suspend: suspend fence state */ + /** @submission_state.suspend: suspend fence state */ struct { - /** @lock: suspend fences lock */ + /** @submission_state.suspend.lock: suspend fences lock */ spinlock_t lock; - /** @context: suspend fences context */ + /** @submission_state.suspend.context: suspend fences context */ u64 context; - /** @seqno: suspend fences seqno */ + /** @submission_state.suspend.seqno: suspend fences seqno */ u32 seqno; } suspend; #ifdef CONFIG_PROVE_LOCKING #define NUM_SUBMIT_WQ 256 - /** @submit_wq_pool: submission ordered workqueues pool */ + /** @submission_state.submit_wq_pool: submission ordered workqueues pool */ struct workqueue_struct *submit_wq_pool[NUM_SUBMIT_WQ]; - /** @submit_wq_idx: submission ordered workqueue index */ + /** @submission_state.submit_wq_idx: submission ordered workqueue index */ int submit_wq_idx; #endif - /** @enabled: submission is enabled */ + /** @submission_state.enabled: submission is enabled */ bool enabled; } submission_state; /** @hwconfig: Hardware config state */ struct { - /** @bo: buffer object of the hardware config */ + /** @hwconfig.bo: buffer object of the hardware config */ struct xe_bo *bo; - /** @size: size of the hardware config */ + /** @hwconfig.size: size of the hardware config */ u32 size; } hwconfig; + /** @relay: GuC Relay Communication used in SR-IOV */ + struct xe_guc_relay relay; + /** * @notify_reg: Register which is written to notify GuC of H2G messages */ diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.c b/drivers/gpu/drm/xe/xe_heci_gsc.c index bfdd33b9b2..1c9d38b6f5 100644 --- a/drivers/gpu/drm/xe/xe_heci_gsc.c +++ b/drivers/gpu/drm/xe/xe_heci_gsc.c @@ -29,7 +29,7 @@ static void heci_gsc_irq_unmask(struct irq_data *d) /* generic irq handling */ } -static struct irq_chip heci_gsc_irq_chip = { +static const struct irq_chip heci_gsc_irq_chip = { .name = "gsc_irq_chip", .irq_mask = heci_gsc_irq_mask, .irq_unmask = heci_gsc_irq_unmask, diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index 01b2d0bd26..6b9b1cbedd 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -105,6 +105,25 @@ out: return ret; } +int xe_huc_init_post_hwconfig(struct xe_huc *huc) +{ + struct xe_tile *tile = gt_to_tile(huc_to_gt(huc)); + struct xe_device *xe = huc_to_xe(huc); + int ret; + + if (!IS_DGFX(huc_to_xe(huc))) + return 0; + + if (!xe_uc_fw_is_loadable(&huc->fw)) + return 0; + + ret = xe_managed_bo_reinit_in_vram(xe, tile, &huc->fw.bo); + if (ret) + return ret; + + return 0; +} + int xe_huc_upload(struct xe_huc *huc) { if (!xe_uc_fw_is_loadable(&huc->fw)) diff --git a/drivers/gpu/drm/xe/xe_huc.h b/drivers/gpu/drm/xe/xe_huc.h index 5320172302..3ab56cc14b 100644 --- a/drivers/gpu/drm/xe/xe_huc.h +++ b/drivers/gpu/drm/xe/xe_huc.h @@ -17,6 +17,7 @@ enum xe_huc_auth_types { }; int xe_huc_init(struct xe_huc *huc); +int xe_huc_init_post_hwconfig(struct xe_huc *huc); int xe_huc_upload(struct xe_huc *huc); int xe_huc_auth(struct xe_huc *huc, enum xe_huc_auth_types type); bool xe_huc_is_authenticated(struct xe_huc *huc, enum xe_huc_auth_types type); diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 1fa5cf5eea..b5e83ea172 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -25,6 +25,7 @@ #include "xe_reg_sr.h" #include "xe_rtp.h" #include "xe_sched_job.h" +#include "xe_sriov.h" #include "xe_tuning.h" #include "xe_uc_fw.h" #include "xe_wa.h" @@ -34,6 +35,7 @@ struct engine_info { const char *name; unsigned int class : 8; unsigned int instance : 8; + unsigned int irq_offset : 8; enum xe_force_wake_domains domain; u32 mmio_base; }; @@ -43,6 +45,7 @@ static const struct engine_info engine_infos[] = { .name = "rcs0", .class = XE_ENGINE_CLASS_RENDER, .instance = 0, + .irq_offset = ilog2(INTR_RCS0), .domain = XE_FW_RENDER, .mmio_base = RENDER_RING_BASE, }, @@ -50,6 +53,7 @@ static const struct engine_info engine_infos[] = { .name = "bcs0", .class = XE_ENGINE_CLASS_COPY, .instance = 0, + .irq_offset = ilog2(INTR_BCS(0)), .domain = XE_FW_RENDER, .mmio_base = BLT_RING_BASE, }, @@ -57,6 +61,7 @@ static const struct engine_info engine_infos[] = { .name = "bcs1", .class = XE_ENGINE_CLASS_COPY, .instance = 1, + .irq_offset = ilog2(INTR_BCS(1)), .domain = XE_FW_RENDER, .mmio_base = XEHPC_BCS1_RING_BASE, }, @@ -64,6 +69,7 @@ static const struct engine_info engine_infos[] = { .name = "bcs2", .class = XE_ENGINE_CLASS_COPY, .instance = 2, + .irq_offset = ilog2(INTR_BCS(2)), .domain = XE_FW_RENDER, .mmio_base = XEHPC_BCS2_RING_BASE, }, @@ -71,6 +77,7 @@ static const struct engine_info engine_infos[] = { .name = "bcs3", .class = XE_ENGINE_CLASS_COPY, .instance = 3, + .irq_offset = ilog2(INTR_BCS(3)), .domain = XE_FW_RENDER, .mmio_base = XEHPC_BCS3_RING_BASE, }, @@ -78,6 +85,7 @@ static const struct engine_info engine_infos[] = { .name = "bcs4", .class = XE_ENGINE_CLASS_COPY, .instance = 4, + .irq_offset = ilog2(INTR_BCS(4)), .domain = XE_FW_RENDER, .mmio_base = XEHPC_BCS4_RING_BASE, }, @@ -85,6 +93,7 @@ static const struct engine_info engine_infos[] = { .name = "bcs5", .class = XE_ENGINE_CLASS_COPY, .instance = 5, + .irq_offset = ilog2(INTR_BCS(5)), .domain = XE_FW_RENDER, .mmio_base = XEHPC_BCS5_RING_BASE, }, @@ -92,12 +101,14 @@ static const struct engine_info engine_infos[] = { .name = "bcs6", .class = XE_ENGINE_CLASS_COPY, .instance = 6, + .irq_offset = ilog2(INTR_BCS(6)), .domain = XE_FW_RENDER, .mmio_base = XEHPC_BCS6_RING_BASE, }, [XE_HW_ENGINE_BCS7] = { .name = "bcs7", .class = XE_ENGINE_CLASS_COPY, + .irq_offset = ilog2(INTR_BCS(7)), .instance = 7, .domain = XE_FW_RENDER, .mmio_base = XEHPC_BCS7_RING_BASE, @@ -106,6 +117,7 @@ static const struct engine_info engine_infos[] = { .name = "bcs8", .class = XE_ENGINE_CLASS_COPY, .instance = 8, + .irq_offset = ilog2(INTR_BCS8), .domain = XE_FW_RENDER, .mmio_base = XEHPC_BCS8_RING_BASE, }, @@ -114,6 +126,7 @@ static const struct engine_info engine_infos[] = { .name = "vcs0", .class = XE_ENGINE_CLASS_VIDEO_DECODE, .instance = 0, + .irq_offset = 32 + ilog2(INTR_VCS(0)), .domain = XE_FW_MEDIA_VDBOX0, .mmio_base = BSD_RING_BASE, }, @@ -121,6 +134,7 @@ static const struct engine_info engine_infos[] = { .name = "vcs1", .class = XE_ENGINE_CLASS_VIDEO_DECODE, .instance = 1, + .irq_offset = 32 + ilog2(INTR_VCS(1)), .domain = XE_FW_MEDIA_VDBOX1, .mmio_base = BSD2_RING_BASE, }, @@ -128,6 +142,7 @@ static const struct engine_info engine_infos[] = { .name = "vcs2", .class = XE_ENGINE_CLASS_VIDEO_DECODE, .instance = 2, + .irq_offset = 32 + ilog2(INTR_VCS(2)), .domain = XE_FW_MEDIA_VDBOX2, .mmio_base = BSD3_RING_BASE, }, @@ -135,6 +150,7 @@ static const struct engine_info engine_infos[] = { .name = "vcs3", .class = XE_ENGINE_CLASS_VIDEO_DECODE, .instance = 3, + .irq_offset = 32 + ilog2(INTR_VCS(3)), .domain = XE_FW_MEDIA_VDBOX3, .mmio_base = BSD4_RING_BASE, }, @@ -142,6 +158,7 @@ static const struct engine_info engine_infos[] = { .name = "vcs4", .class = XE_ENGINE_CLASS_VIDEO_DECODE, .instance = 4, + .irq_offset = 32 + ilog2(INTR_VCS(4)), .domain = XE_FW_MEDIA_VDBOX4, .mmio_base = XEHP_BSD5_RING_BASE, }, @@ -149,6 +166,7 @@ static const struct engine_info engine_infos[] = { .name = "vcs5", .class = XE_ENGINE_CLASS_VIDEO_DECODE, .instance = 5, + .irq_offset = 32 + ilog2(INTR_VCS(5)), .domain = XE_FW_MEDIA_VDBOX5, .mmio_base = XEHP_BSD6_RING_BASE, }, @@ -156,6 +174,7 @@ static const struct engine_info engine_infos[] = { .name = "vcs6", .class = XE_ENGINE_CLASS_VIDEO_DECODE, .instance = 6, + .irq_offset = 32 + ilog2(INTR_VCS(6)), .domain = XE_FW_MEDIA_VDBOX6, .mmio_base = XEHP_BSD7_RING_BASE, }, @@ -163,6 +182,7 @@ static const struct engine_info engine_infos[] = { .name = "vcs7", .class = XE_ENGINE_CLASS_VIDEO_DECODE, .instance = 7, + .irq_offset = 32 + ilog2(INTR_VCS(7)), .domain = XE_FW_MEDIA_VDBOX7, .mmio_base = XEHP_BSD8_RING_BASE, }, @@ -170,6 +190,7 @@ static const struct engine_info engine_infos[] = { .name = "vecs0", .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, .instance = 0, + .irq_offset = 32 + ilog2(INTR_VECS(0)), .domain = XE_FW_MEDIA_VEBOX0, .mmio_base = VEBOX_RING_BASE, }, @@ -177,6 +198,7 @@ static const struct engine_info engine_infos[] = { .name = "vecs1", .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, .instance = 1, + .irq_offset = 32 + ilog2(INTR_VECS(1)), .domain = XE_FW_MEDIA_VEBOX1, .mmio_base = VEBOX2_RING_BASE, }, @@ -184,6 +206,7 @@ static const struct engine_info engine_infos[] = { .name = "vecs2", .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, .instance = 2, + .irq_offset = 32 + ilog2(INTR_VECS(2)), .domain = XE_FW_MEDIA_VEBOX2, .mmio_base = XEHP_VEBOX3_RING_BASE, }, @@ -191,6 +214,7 @@ static const struct engine_info engine_infos[] = { .name = "vecs3", .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, .instance = 3, + .irq_offset = 32 + ilog2(INTR_VECS(3)), .domain = XE_FW_MEDIA_VEBOX3, .mmio_base = XEHP_VEBOX4_RING_BASE, }, @@ -198,6 +222,7 @@ static const struct engine_info engine_infos[] = { .name = "ccs0", .class = XE_ENGINE_CLASS_COMPUTE, .instance = 0, + .irq_offset = ilog2(INTR_CCS(0)), .domain = XE_FW_RENDER, .mmio_base = COMPUTE0_RING_BASE, }, @@ -205,6 +230,7 @@ static const struct engine_info engine_infos[] = { .name = "ccs1", .class = XE_ENGINE_CLASS_COMPUTE, .instance = 1, + .irq_offset = ilog2(INTR_CCS(1)), .domain = XE_FW_RENDER, .mmio_base = COMPUTE1_RING_BASE, }, @@ -212,6 +238,7 @@ static const struct engine_info engine_infos[] = { .name = "ccs2", .class = XE_ENGINE_CLASS_COMPUTE, .instance = 2, + .irq_offset = ilog2(INTR_CCS(2)), .domain = XE_FW_RENDER, .mmio_base = COMPUTE2_RING_BASE, }, @@ -219,6 +246,7 @@ static const struct engine_info engine_infos[] = { .name = "ccs3", .class = XE_ENGINE_CLASS_COMPUTE, .instance = 3, + .irq_offset = ilog2(INTR_CCS(3)), .domain = XE_FW_RENDER, .mmio_base = COMPUTE3_RING_BASE, }, @@ -289,6 +317,19 @@ static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_gt *gt, xe_rtp_match_first_render_or_compute(gt, hwe); } +static bool xe_rtp_cfeg_wmtp_disabled(const struct xe_gt *gt, + const struct xe_hw_engine *hwe) +{ + if (GRAPHICS_VER(gt_to_xe(gt)) < 20) + return false; + + if (hwe->class != XE_ENGINE_CLASS_COMPUTE && + hwe->class != XE_ENGINE_CLASS_RENDER) + return false; + + return xe_mmio_read32(hwe->gt, XEHP_FUSE4) & CFEG_WMTP_DISABLE; +} + void xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe) { @@ -319,6 +360,14 @@ xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe) XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE)) }, + /* Disable WMTP if HW doesn't support it */ + { XE_RTP_NAME("DISABLE_WMTP_ON_UNSUPPORTED_HW"), + XE_RTP_RULES(FUNC(xe_rtp_cfeg_wmtp_disabled)), + XE_RTP_ACTIONS(FIELD_SET(CS_CHICKEN1(0), + PREEMPT_GPGPU_LEVEL_MASK, + PREEMPT_GPGPU_THREAD_GROUP_LEVEL)), + XE_RTP_ENTRY_FLAG(FOREACH_ENGINE) + }, {} }; @@ -397,6 +446,7 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe, hwe->class = info->class; hwe->instance = info->instance; hwe->mmio_base = info->mmio_base; + hwe->irq_offset = info->irq_offset; hwe->domain = info->domain; hwe->name = info->name; hwe->fence_irq = >->fence_irq[info->class]; @@ -700,7 +750,7 @@ struct xe_hw_engine_snapshot * xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe) { struct xe_hw_engine_snapshot *snapshot; - int len; + u64 val; if (!xe_hw_engine_is_valid(hwe)) return NULL; @@ -710,11 +760,7 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe) if (!snapshot) return NULL; - len = strlen(hwe->name) + 1; - snapshot->name = kzalloc(len, GFP_ATOMIC); - if (snapshot->name) - strscpy(snapshot->name, hwe->name, len); - + snapshot->name = kstrdup(hwe->name, GFP_ATOMIC); snapshot->class = hwe->class; snapshot->logical_instance = hwe->logical_instance; snapshot->forcewake.domain = hwe->domain; @@ -722,19 +768,35 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe) hwe->domain); snapshot->mmio_base = hwe->mmio_base; - snapshot->reg.ring_hwstam = hw_engine_mmio_read32(hwe, RING_HWSTAM(0)); - snapshot->reg.ring_hws_pga = hw_engine_mmio_read32(hwe, - RING_HWS_PGA(0)); - snapshot->reg.ring_execlist_status_lo = + /* no more VF accessible data below this point */ + if (IS_SRIOV_VF(gt_to_xe(hwe->gt))) + return snapshot; + + snapshot->reg.ring_execlist_status = hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0)); - snapshot->reg.ring_execlist_status_hi = - hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0)); - snapshot->reg.ring_execlist_sq_contents_lo = - hw_engine_mmio_read32(hwe, - RING_EXECLIST_SQ_CONTENTS_LO(0)); - snapshot->reg.ring_execlist_sq_contents_hi = - hw_engine_mmio_read32(hwe, - RING_EXECLIST_SQ_CONTENTS_HI(0)); + val = hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0)); + snapshot->reg.ring_execlist_status |= val << 32; + + snapshot->reg.ring_execlist_sq_contents = + hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_LO(0)); + val = hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_HI(0)); + snapshot->reg.ring_execlist_sq_contents |= val << 32; + + snapshot->reg.ring_acthd = hw_engine_mmio_read32(hwe, RING_ACTHD(0)); + val = hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0)); + snapshot->reg.ring_acthd |= val << 32; + + snapshot->reg.ring_bbaddr = hw_engine_mmio_read32(hwe, RING_BBADDR(0)); + val = hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0)); + snapshot->reg.ring_bbaddr |= val << 32; + + snapshot->reg.ring_dma_fadd = + hw_engine_mmio_read32(hwe, RING_DMA_FADD(0)); + val = hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0)); + snapshot->reg.ring_dma_fadd |= val << 32; + + snapshot->reg.ring_hwstam = hw_engine_mmio_read32(hwe, RING_HWSTAM(0)); + snapshot->reg.ring_hws_pga = hw_engine_mmio_read32(hwe, RING_HWS_PGA(0)); snapshot->reg.ring_start = hw_engine_mmio_read32(hwe, RING_START(0)); snapshot->reg.ring_head = hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR; @@ -748,16 +810,6 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe) snapshot->reg.ring_esr = hw_engine_mmio_read32(hwe, RING_ESR(0)); snapshot->reg.ring_emr = hw_engine_mmio_read32(hwe, RING_EMR(0)); snapshot->reg.ring_eir = hw_engine_mmio_read32(hwe, RING_EIR(0)); - snapshot->reg.ring_acthd_udw = - hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0)); - snapshot->reg.ring_acthd = hw_engine_mmio_read32(hwe, RING_ACTHD(0)); - snapshot->reg.ring_bbaddr_udw = - hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0)); - snapshot->reg.ring_bbaddr = hw_engine_mmio_read32(hwe, RING_BBADDR(0)); - snapshot->reg.ring_dma_fadd_udw = - hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0)); - snapshot->reg.ring_dma_fadd = - hw_engine_mmio_read32(hwe, RING_DMA_FADD(0)); snapshot->reg.ipehr = hw_engine_mmio_read32(hwe, RING_IPEHR(0)); if (snapshot->class == XE_ENGINE_CLASS_COMPUTE) @@ -786,33 +838,25 @@ void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, snapshot->forcewake.domain, snapshot->forcewake.ref); drm_printf(p, "\tHWSTAM: 0x%08x\n", snapshot->reg.ring_hwstam); drm_printf(p, "\tRING_HWS_PGA: 0x%08x\n", snapshot->reg.ring_hws_pga); - drm_printf(p, "\tRING_EXECLIST_STATUS_LO: 0x%08x\n", - snapshot->reg.ring_execlist_status_lo); - drm_printf(p, "\tRING_EXECLIST_STATUS_HI: 0x%08x\n", - snapshot->reg.ring_execlist_status_hi); - drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_LO: 0x%08x\n", - snapshot->reg.ring_execlist_sq_contents_lo); - drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_HI: 0x%08x\n", - snapshot->reg.ring_execlist_sq_contents_hi); + drm_printf(p, "\tRING_EXECLIST_STATUS: 0x%016llx\n", + snapshot->reg.ring_execlist_status); + drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS: 0x%016llx\n", + snapshot->reg.ring_execlist_sq_contents); drm_printf(p, "\tRING_START: 0x%08x\n", snapshot->reg.ring_start); - drm_printf(p, "\tRING_HEAD: 0x%08x\n", snapshot->reg.ring_head); - drm_printf(p, "\tRING_TAIL: 0x%08x\n", snapshot->reg.ring_tail); + drm_printf(p, "\tRING_HEAD: 0x%08x\n", snapshot->reg.ring_head); + drm_printf(p, "\tRING_TAIL: 0x%08x\n", snapshot->reg.ring_tail); drm_printf(p, "\tRING_CTL: 0x%08x\n", snapshot->reg.ring_ctl); drm_printf(p, "\tRING_MI_MODE: 0x%08x\n", snapshot->reg.ring_mi_mode); drm_printf(p, "\tRING_MODE: 0x%08x\n", snapshot->reg.ring_mode); - drm_printf(p, "\tRING_IMR: 0x%08x\n", snapshot->reg.ring_imr); - drm_printf(p, "\tRING_ESR: 0x%08x\n", snapshot->reg.ring_esr); - drm_printf(p, "\tRING_EMR: 0x%08x\n", snapshot->reg.ring_emr); - drm_printf(p, "\tRING_EIR: 0x%08x\n", snapshot->reg.ring_eir); - drm_printf(p, "\tACTHD: 0x%08x_%08x\n", snapshot->reg.ring_acthd_udw, - snapshot->reg.ring_acthd); - drm_printf(p, "\tBBADDR: 0x%08x_%08x\n", snapshot->reg.ring_bbaddr_udw, - snapshot->reg.ring_bbaddr); - drm_printf(p, "\tDMA_FADDR: 0x%08x_%08x\n", - snapshot->reg.ring_dma_fadd_udw, - snapshot->reg.ring_dma_fadd); - drm_printf(p, "\tIPEHR: 0x%08x\n\n", snapshot->reg.ipehr); + drm_printf(p, "\tRING_IMR: 0x%08x\n", snapshot->reg.ring_imr); + drm_printf(p, "\tRING_ESR: 0x%08x\n", snapshot->reg.ring_esr); + drm_printf(p, "\tRING_EMR: 0x%08x\n", snapshot->reg.ring_emr); + drm_printf(p, "\tRING_EIR: 0x%08x\n", snapshot->reg.ring_eir); + drm_printf(p, "\tACTHD: 0x%016llx\n", snapshot->reg.ring_acthd); + drm_printf(p, "\tBBADDR: 0x%016llx\n", snapshot->reg.ring_bbaddr); + drm_printf(p, "\tDMA_FADDR: 0x%016llx\n", snapshot->reg.ring_dma_fadd); + drm_printf(p, "\tIPEHR: 0x%08x\n", snapshot->reg.ipehr); if (snapshot->class == XE_ENGINE_CLASS_COMPUTE) drm_printf(p, "\tRCU_MODE: 0x%08x\n", snapshot->reg.rcu_mode); diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c index e49bc14f0e..2345fb42fa 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c @@ -73,7 +73,7 @@ static ssize_t job_timeout_max_show(struct kobject *kobj, return sprintf(buf, "%u\n", eclass->sched_props.job_timeout_max); } -static struct kobj_attribute job_timeout_max_attr = +static const struct kobj_attribute job_timeout_max_attr = __ATTR(job_timeout_max, 0644, job_timeout_max_show, job_timeout_max_store); static ssize_t job_timeout_min_store(struct kobject *kobj, @@ -109,7 +109,7 @@ static ssize_t job_timeout_min_show(struct kobject *kobj, return sprintf(buf, "%u\n", eclass->sched_props.job_timeout_min); } -static struct kobj_attribute job_timeout_min_attr = +static const struct kobj_attribute job_timeout_min_attr = __ATTR(job_timeout_min, 0644, job_timeout_min_show, job_timeout_min_store); static ssize_t job_timeout_store(struct kobject *kobj, @@ -142,7 +142,7 @@ static ssize_t job_timeout_show(struct kobject *kobj, return sprintf(buf, "%u\n", eclass->sched_props.job_timeout_ms); } -static struct kobj_attribute job_timeout_attr = +static const struct kobj_attribute job_timeout_attr = __ATTR(job_timeout_ms, 0644, job_timeout_show, job_timeout_store); static ssize_t job_timeout_default(struct kobject *kobj, @@ -153,7 +153,7 @@ static ssize_t job_timeout_default(struct kobject *kobj, return sprintf(buf, "%u\n", eclass->defaults.job_timeout_ms); } -static struct kobj_attribute job_timeout_def = +static const struct kobj_attribute job_timeout_def = __ATTR(job_timeout_ms, 0444, job_timeout_default, NULL); static ssize_t job_timeout_min_default(struct kobject *kobj, @@ -164,7 +164,7 @@ static ssize_t job_timeout_min_default(struct kobject *kobj, return sprintf(buf, "%u\n", eclass->defaults.job_timeout_min); } -static struct kobj_attribute job_timeout_min_def = +static const struct kobj_attribute job_timeout_min_def = __ATTR(job_timeout_min, 0444, job_timeout_min_default, NULL); static ssize_t job_timeout_max_default(struct kobject *kobj, @@ -175,7 +175,7 @@ static ssize_t job_timeout_max_default(struct kobject *kobj, return sprintf(buf, "%u\n", eclass->defaults.job_timeout_max); } -static struct kobj_attribute job_timeout_max_def = +static const struct kobj_attribute job_timeout_max_def = __ATTR(job_timeout_max, 0444, job_timeout_max_default, NULL); static ssize_t timeslice_duration_store(struct kobject *kobj, @@ -234,7 +234,7 @@ static ssize_t timeslice_duration_max_show(struct kobject *kobj, return sprintf(buf, "%u\n", eclass->sched_props.timeslice_max); } -static struct kobj_attribute timeslice_duration_max_attr = +static const struct kobj_attribute timeslice_duration_max_attr = __ATTR(timeslice_duration_max, 0644, timeslice_duration_max_show, timeslice_duration_max_store); @@ -272,7 +272,7 @@ static ssize_t timeslice_duration_min_show(struct kobject *kobj, return sprintf(buf, "%u\n", eclass->sched_props.timeslice_min); } -static struct kobj_attribute timeslice_duration_min_attr = +static const struct kobj_attribute timeslice_duration_min_attr = __ATTR(timeslice_duration_min, 0644, timeslice_duration_min_show, timeslice_duration_min_store); @@ -284,7 +284,7 @@ static ssize_t timeslice_duration_show(struct kobject *kobj, return sprintf(buf, "%u\n", eclass->sched_props.timeslice_us); } -static struct kobj_attribute timeslice_duration_attr = +static const struct kobj_attribute timeslice_duration_attr = __ATTR(timeslice_duration_us, 0644, timeslice_duration_show, timeslice_duration_store); @@ -296,7 +296,7 @@ static ssize_t timeslice_default(struct kobject *kobj, return sprintf(buf, "%u\n", eclass->defaults.timeslice_us); } -static struct kobj_attribute timeslice_duration_def = +static const struct kobj_attribute timeslice_duration_def = __ATTR(timeslice_duration_us, 0444, timeslice_default, NULL); static ssize_t timeslice_min_default(struct kobject *kobj, @@ -307,7 +307,7 @@ static ssize_t timeslice_min_default(struct kobject *kobj, return sprintf(buf, "%u\n", eclass->defaults.timeslice_min); } -static struct kobj_attribute timeslice_duration_min_def = +static const struct kobj_attribute timeslice_duration_min_def = __ATTR(timeslice_duration_min, 0444, timeslice_min_default, NULL); static ssize_t timeslice_max_default(struct kobject *kobj, @@ -318,7 +318,7 @@ static ssize_t timeslice_max_default(struct kobject *kobj, return sprintf(buf, "%u\n", eclass->defaults.timeslice_max); } -static struct kobj_attribute timeslice_duration_max_def = +static const struct kobj_attribute timeslice_duration_max_def = __ATTR(timeslice_duration_max, 0444, timeslice_max_default, NULL); static ssize_t preempt_timeout_store(struct kobject *kobj, @@ -351,7 +351,7 @@ static ssize_t preempt_timeout_show(struct kobject *kobj, return sprintf(buf, "%u\n", eclass->sched_props.preempt_timeout_us); } -static struct kobj_attribute preempt_timeout_attr = +static const struct kobj_attribute preempt_timeout_attr = __ATTR(preempt_timeout_us, 0644, preempt_timeout_show, preempt_timeout_store); static ssize_t preempt_timeout_default(struct kobject *kobj, @@ -363,7 +363,7 @@ static ssize_t preempt_timeout_default(struct kobject *kobj, return sprintf(buf, "%u\n", eclass->defaults.preempt_timeout_us); } -static struct kobj_attribute preempt_timeout_def = +static const struct kobj_attribute preempt_timeout_def = __ATTR(preempt_timeout_us, 0444, preempt_timeout_default, NULL); static ssize_t preempt_timeout_min_default(struct kobject *kobj, @@ -375,7 +375,7 @@ static ssize_t preempt_timeout_min_default(struct kobject *kobj, return sprintf(buf, "%u\n", eclass->defaults.preempt_timeout_min); } -static struct kobj_attribute preempt_timeout_min_def = +static const struct kobj_attribute preempt_timeout_min_def = __ATTR(preempt_timeout_min, 0444, preempt_timeout_min_default, NULL); static ssize_t preempt_timeout_max_default(struct kobject *kobj, @@ -387,7 +387,7 @@ static ssize_t preempt_timeout_max_default(struct kobject *kobj, return sprintf(buf, "%u\n", eclass->defaults.preempt_timeout_max); } -static struct kobj_attribute preempt_timeout_max_def = +static const struct kobj_attribute preempt_timeout_max_def = __ATTR(preempt_timeout_max, 0444, preempt_timeout_max_default, NULL); static ssize_t preempt_timeout_max_store(struct kobject *kobj, @@ -423,7 +423,7 @@ static ssize_t preempt_timeout_max_show(struct kobject *kobj, return sprintf(buf, "%u\n", eclass->sched_props.preempt_timeout_max); } -static struct kobj_attribute preempt_timeout_max_attr = +static const struct kobj_attribute preempt_timeout_max_attr = __ATTR(preempt_timeout_max, 0644, preempt_timeout_max_show, preempt_timeout_max_store); @@ -460,7 +460,7 @@ static ssize_t preempt_timeout_min_show(struct kobject *kobj, return sprintf(buf, "%u\n", eclass->sched_props.preempt_timeout_min); } -static struct kobj_attribute preempt_timeout_min_attr = +static const struct kobj_attribute preempt_timeout_min_attr = __ATTR(preempt_timeout_min, 0644, preempt_timeout_min_show, preempt_timeout_min_store); @@ -477,7 +477,7 @@ static const struct attribute *defaults[] = { NULL }; -static const struct attribute *files[] = { +static const struct attribute * const files[] = { &job_timeout_attr.attr, &job_timeout_min_attr.attr, &job_timeout_max_attr.attr, diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h index 39908dec04..d7f828c76c 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_types.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h @@ -79,23 +79,23 @@ struct xe_hw_engine_class_intf { * @defaults: default scheduling properties */ struct { - /** @set_job_timeout: Set job timeout in ms for engine */ + /** @sched_props.set_job_timeout: Set job timeout in ms for engine */ u32 job_timeout_ms; - /** @job_timeout_min: Min job timeout in ms for engine */ + /** @sched_props.job_timeout_min: Min job timeout in ms for engine */ u32 job_timeout_min; - /** @job_timeout_max: Max job timeout in ms for engine */ + /** @sched_props.job_timeout_max: Max job timeout in ms for engine */ u32 job_timeout_max; - /** @timeslice_us: timeslice period in micro-seconds */ + /** @sched_props.timeslice_us: timeslice period in micro-seconds */ u32 timeslice_us; - /** @timeslice_min: min timeslice period in micro-seconds */ + /** @sched_props.timeslice_min: min timeslice period in micro-seconds */ u32 timeslice_min; - /** @timeslice_max: max timeslice period in micro-seconds */ + /** @sched_props.timeslice_max: max timeslice period in micro-seconds */ u32 timeslice_max; - /** @preempt_timeout_us: preemption timeout in micro-seconds */ + /** @sched_props.preempt_timeout_us: preemption timeout in micro-seconds */ u32 preempt_timeout_us; - /** @preempt_timeout_min: min preemption timeout in micro-seconds */ + /** @sched_props.preempt_timeout_min: min preemption timeout in micro-seconds */ u32 preempt_timeout_min; - /** @preempt_timeout_max: max preemption timeout in micro-seconds */ + /** @sched_props.preempt_timeout_max: max preemption timeout in micro-seconds */ u32 preempt_timeout_max; } sched_props, defaults; }; @@ -116,6 +116,8 @@ struct xe_hw_engine { u16 instance; /** @logical_instance: logical instance of this hw engine */ u16 logical_instance; + /** @irq_offset: IRQ offset of this hw engine */ + u16 irq_offset; /** @mmio_base: MMIO base address of this hw engine*/ u32 mmio_base; /** @@ -162,62 +164,52 @@ struct xe_hw_engine_snapshot { u16 logical_instance; /** @forcewake: Force Wake information snapshot */ struct { - /** @domain: force wake domain of this hw engine */ + /** @forcewake.domain: force wake domain of this hw engine */ enum xe_force_wake_domains domain; - /** @ref: Forcewake ref for the above domain */ + /** @forcewake.ref: Forcewake ref for the above domain */ int ref; } forcewake; /** @mmio_base: MMIO base address of this hw engine*/ u32 mmio_base; /** @reg: Useful MMIO register snapshot */ struct { - /** @ring_hwstam: RING_HWSTAM */ + /** @reg.ring_execlist_status: RING_EXECLIST_STATUS */ + u64 ring_execlist_status; + /** @reg.ring_execlist_sq_contents: RING_EXECLIST_SQ_CONTENTS */ + u64 ring_execlist_sq_contents; + /** @reg.ring_acthd: RING_ACTHD */ + u64 ring_acthd; + /** @reg.ring_bbaddr: RING_BBADDR */ + u64 ring_bbaddr; + /** @reg.ring_dma_fadd: RING_DMA_FADD */ + u64 ring_dma_fadd; + /** @reg.ring_hwstam: RING_HWSTAM */ u32 ring_hwstam; - /** @ring_hws_pga: RING_HWS_PGA */ + /** @reg.ring_hws_pga: RING_HWS_PGA */ u32 ring_hws_pga; - /** @ring_execlist_status_lo: RING_EXECLIST_STATUS_LO */ - u32 ring_execlist_status_lo; - /** @ring_execlist_status_hi: RING_EXECLIST_STATUS_HI */ - u32 ring_execlist_status_hi; - /** @ring_execlist_sq_contents_lo: RING_EXECLIST_SQ_CONTENTS */ - u32 ring_execlist_sq_contents_lo; - /** @ring_execlist_sq_contents_hi: RING_EXECLIST_SQ_CONTENTS + 4 */ - u32 ring_execlist_sq_contents_hi; - /** @ring_start: RING_START */ + /** @reg.ring_start: RING_START */ u32 ring_start; - /** @ring_head: RING_HEAD */ + /** @reg.ring_head: RING_HEAD */ u32 ring_head; - /** @ring_tail: RING_TAIL */ + /** @reg.ring_tail: RING_TAIL */ u32 ring_tail; - /** @ring_ctl: RING_CTL */ + /** @reg.ring_ctl: RING_CTL */ u32 ring_ctl; - /** @ring_mi_mode: RING_MI_MODE */ + /** @reg.ring_mi_mode: RING_MI_MODE */ u32 ring_mi_mode; - /** @ring_mode: RING_MODE */ + /** @reg.ring_mode: RING_MODE */ u32 ring_mode; - /** @ring_imr: RING_IMR */ + /** @reg.ring_imr: RING_IMR */ u32 ring_imr; - /** @ring_esr: RING_ESR */ + /** @reg.ring_esr: RING_ESR */ u32 ring_esr; - /** @ring_emr: RING_EMR */ + /** @reg.ring_emr: RING_EMR */ u32 ring_emr; - /** @ring_eir: RING_EIR */ + /** @reg.ring_eir: RING_EIR */ u32 ring_eir; - /** @ring_acthd_udw: RING_ACTHD_UDW */ - u32 ring_acthd_udw; - /** @ring_acthd: RING_ACTHD */ - u32 ring_acthd; - /** @ring_bbaddr_udw: RING_BBADDR_UDW */ - u32 ring_bbaddr_udw; - /** @ring_bbaddr: RING_BBADDR */ - u32 ring_bbaddr; - /** @ring_dma_fadd_udw: RING_DMA_FADD_UDW */ - u32 ring_dma_fadd_udw; - /** @ring_dma_fadd: RING_DMA_FADD */ - u32 ring_dma_fadd; - /** @ipehr: IPEHR */ + /** @reg.ipehr: IPEHR */ u32 ipehr; - /** @rcu_mode: RCU_MODE */ + /** @reg.rcu_mode: RCU_MODE */ u32 rcu_mode; } reg; }; diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index a6f43446c7..9ac7fbe201 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -10,12 +10,14 @@ #include <drm/drm_managed.h> #include "regs/xe_gt_regs.h" #include "regs/xe_mchbar_regs.h" +#include "regs/xe_pcode_regs.h" #include "xe_device.h" #include "xe_gt.h" #include "xe_hwmon.h" #include "xe_mmio.h" #include "xe_pcode.h" #include "xe_pcode_api.h" +#include "xe_sriov.h" enum xe_hwmon_reg { REG_PKG_RAPL_LIMIT, @@ -77,32 +79,32 @@ static u32 xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg) switch (hwmon_reg) { case REG_PKG_RAPL_LIMIT: - if (xe->info.platform == XE_DG2) - reg = PCU_CR_PACKAGE_RAPL_LIMIT; - else if (xe->info.platform == XE_PVC) + if (xe->info.platform == XE_PVC) reg = PVC_GT0_PACKAGE_RAPL_LIMIT; + else if (xe->info.platform == XE_DG2) + reg = PCU_CR_PACKAGE_RAPL_LIMIT; break; case REG_PKG_POWER_SKU: - if (xe->info.platform == XE_DG2) - reg = PCU_CR_PACKAGE_POWER_SKU; - else if (xe->info.platform == XE_PVC) + if (xe->info.platform == XE_PVC) reg = PVC_GT0_PACKAGE_POWER_SKU; + else if (xe->info.platform == XE_DG2) + reg = PCU_CR_PACKAGE_POWER_SKU; break; case REG_PKG_POWER_SKU_UNIT: - if (xe->info.platform == XE_DG2) - reg = PCU_CR_PACKAGE_POWER_SKU_UNIT; - else if (xe->info.platform == XE_PVC) + if (xe->info.platform == XE_PVC) reg = PVC_GT0_PACKAGE_POWER_SKU_UNIT; + else if (xe->info.platform == XE_DG2) + reg = PCU_CR_PACKAGE_POWER_SKU_UNIT; break; case REG_GT_PERF_STATUS: if (xe->info.platform == XE_DG2) reg = GT_PERF_STATUS; break; case REG_PKG_ENERGY_STATUS: - if (xe->info.platform == XE_DG2) - reg = PCU_CR_PACKAGE_ENERGY_STATUS; - else if (xe->info.platform == XE_PVC) + if (xe->info.platform == XE_PVC) reg = PVC_GT0_PLATFORM_ENERGY_STATUS; + else if (xe->info.platform == XE_DG2) + reg = PCU_CR_PACKAGE_ENERGY_STATUS; break; default: drm_warn(&xe->drm, "Unknown xe hwmon reg id: %d\n", hwmon_reg); @@ -402,7 +404,7 @@ static const struct attribute_group *hwmon_groups[] = { NULL }; -static const struct hwmon_channel_info *hwmon_info[] = { +static const struct hwmon_channel_info * const hwmon_info[] = { HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_CRIT), HWMON_CHANNEL_INFO(curr, HWMON_C_CRIT), HWMON_CHANNEL_INFO(in, HWMON_I_INPUT), @@ -745,6 +747,10 @@ void xe_hwmon_register(struct xe_device *xe) if (!IS_DGFX(xe)) return; + /* hwmon is not available on VFs */ + if (IS_SRIOV_VF(xe)) + return; + hwmon = devm_kzalloc(dev, sizeof(*hwmon), GFP_KERNEL); if (!hwmon) return; diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index d1f5ba4bb7..2f5d179e0d 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -9,15 +9,18 @@ #include <drm/drm_managed.h> +#include "display/xe_display.h" #include "regs/xe_gt_regs.h" #include "regs/xe_regs.h" #include "xe_device.h" -#include "xe_display.h" #include "xe_drv.h" +#include "xe_gsc_proxy.h" #include "xe_gt.h" #include "xe_guc.h" #include "xe_hw_engine.h" +#include "xe_memirq.h" #include "xe_mmio.h" +#include "xe_sriov.h" /* * Interrupt registers for a unit are always consecutive and ordered @@ -129,6 +132,7 @@ void xe_irq_enable_hwe(struct xe_gt *gt) u32 ccs_mask, bcs_mask; u32 irqs, dmask, smask; u32 gsc_mask = 0; + u32 heci_mask = 0; if (xe_device_uc_enabled(xe)) { irqs = GT_RENDER_USER_INTERRUPT | @@ -178,14 +182,23 @@ void xe_irq_enable_hwe(struct xe_gt *gt) xe_mmio_write32(gt, VCS2_VCS3_INTR_MASK, ~dmask); xe_mmio_write32(gt, VECS0_VECS1_INTR_MASK, ~dmask); - if (xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_OTHER)) + /* + * the heci2 interrupt is enabled via the same register as the + * GSCCS interrupts, but it has its own mask register. + */ + if (xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_OTHER)) { gsc_mask = irqs; - else if (HAS_HECI_GSCFI(xe)) + heci_mask = GSC_IRQ_INTF(1); + } else if (HAS_HECI_GSCFI(xe)) { gsc_mask = GSC_IRQ_INTF(1); + } + if (gsc_mask) { - xe_mmio_write32(gt, GUNIT_GSC_INTR_ENABLE, gsc_mask); + xe_mmio_write32(gt, GUNIT_GSC_INTR_ENABLE, gsc_mask | heci_mask); xe_mmio_write32(gt, GUNIT_GSC_INTR_MASK, ~gsc_mask); } + if (heci_mask) + xe_mmio_write32(gt, HECI2_RSVD_INTR_MASK, ~(heci_mask << 16)); } } @@ -232,6 +245,8 @@ gt_other_irq_handler(struct xe_gt *gt, const u8 instance, const u16 iir) return xe_guc_irq_handler(>->uc.guc, iir); if (instance == OTHER_MEDIA_GUC_INSTANCE && xe_gt_is_media_type(gt)) return xe_guc_irq_handler(>->uc.guc, iir); + if (instance == OTHER_GSC_HECI2_INSTANCE && xe_gt_is_media_type(gt)) + return xe_gsc_proxy_irq_handler(>->uc.gsc, iir); if (instance != OTHER_GUC_INSTANCE && instance != OTHER_MEDIA_GUC_INSTANCE) { @@ -249,15 +264,23 @@ static struct xe_gt *pick_engine_gt(struct xe_tile *tile, if (MEDIA_VER(xe) < 13) return tile->primary_gt; - if (class == XE_ENGINE_CLASS_VIDEO_DECODE || - class == XE_ENGINE_CLASS_VIDEO_ENHANCE) + switch (class) { + case XE_ENGINE_CLASS_VIDEO_DECODE: + case XE_ENGINE_CLASS_VIDEO_ENHANCE: return tile->media_gt; - - if (class == XE_ENGINE_CLASS_OTHER && - (instance == OTHER_MEDIA_GUC_INSTANCE || instance == OTHER_GSC_INSTANCE)) - return tile->media_gt; - - return tile->primary_gt; + case XE_ENGINE_CLASS_OTHER: + switch (instance) { + case OTHER_MEDIA_GUC_INSTANCE: + case OTHER_GSC_INSTANCE: + case OTHER_GSC_HECI2_INSTANCE: + return tile->media_gt; + default: + break; + }; + fallthrough; + default: + return tile->primary_gt; + } } static void gt_irq_handler(struct xe_tile *tile, @@ -419,7 +442,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) * irq as device is inaccessible. */ if (master_ctl == REG_GENMASK(31, 0)) { - dev_dbg(tile_to_xe(tile)->drm.dev, + drm_dbg(&tile_to_xe(tile)->drm, "Ignore this IRQ as device might be in DPC containment.\n"); return IRQ_HANDLED; } @@ -484,6 +507,7 @@ static void gt_irq_reset(struct xe_tile *tile) HAS_HECI_GSCFI(tile_to_xe(tile))) { xe_mmio_write32(mmio, GUNIT_GSC_INTR_ENABLE, 0); xe_mmio_write32(mmio, GUNIT_GSC_INTR_MASK, ~0); + xe_mmio_write32(mmio, HECI2_RSVD_INTR_MASK, ~0); } xe_mmio_write32(mmio, GPM_WGBOXPERF_INTR_ENABLE, 0); @@ -498,6 +522,9 @@ static void xelp_irq_reset(struct xe_tile *tile) gt_irq_reset(tile); + if (IS_SRIOV_VF(tile_to_xe(tile))) + return; + mask_and_disable(tile, PCU_IRQ_OFFSET); } @@ -508,6 +535,9 @@ static void dg1_irq_reset(struct xe_tile *tile) gt_irq_reset(tile); + if (IS_SRIOV_VF(tile_to_xe(tile))) + return; + mask_and_disable(tile, PCU_IRQ_OFFSET); } @@ -518,11 +548,34 @@ static void dg1_irq_reset_mstr(struct xe_tile *tile) xe_mmio_write32(mmio, GFX_MSTR_IRQ, ~0); } +static void vf_irq_reset(struct xe_device *xe) +{ + struct xe_tile *tile; + unsigned int id; + + xe_assert(xe, IS_SRIOV_VF(xe)); + + if (GRAPHICS_VERx100(xe) < 1210) + xelp_intr_disable(xe); + else + xe_assert(xe, xe_device_has_memirq(xe)); + + for_each_tile(tile, xe, id) { + if (xe_device_has_memirq(xe)) + xe_memirq_reset(&tile->sriov.vf.memirq); + else + gt_irq_reset(tile); + } +} + static void xe_irq_reset(struct xe_device *xe) { struct xe_tile *tile; u8 id; + if (IS_SRIOV_VF(xe)) + return vf_irq_reset(xe); + for_each_tile(tile, xe, id) { if (GRAPHICS_VERx100(xe) >= 1210) dg1_irq_reset(tile); @@ -545,8 +598,26 @@ static void xe_irq_reset(struct xe_device *xe) } } +static void vf_irq_postinstall(struct xe_device *xe) +{ + struct xe_tile *tile; + unsigned int id; + + for_each_tile(tile, xe, id) + if (xe_device_has_memirq(xe)) + xe_memirq_postinstall(&tile->sriov.vf.memirq); + + if (GRAPHICS_VERx100(xe) < 1210) + xelp_intr_enable(xe, true); + else + xe_assert(xe, xe_device_has_memirq(xe)); +} + static void xe_irq_postinstall(struct xe_device *xe) { + if (IS_SRIOV_VF(xe)) + return vf_irq_postinstall(xe); + xe_display_irq_postinstall(xe, xe_root_mmio_gt(xe)); /* @@ -563,8 +634,30 @@ static void xe_irq_postinstall(struct xe_device *xe) xelp_intr_enable(xe, true); } +static irqreturn_t vf_mem_irq_handler(int irq, void *arg) +{ + struct xe_device *xe = arg; + struct xe_tile *tile; + unsigned int id; + + spin_lock(&xe->irq.lock); + if (!xe->irq.enabled) { + spin_unlock(&xe->irq.lock); + return IRQ_NONE; + } + spin_unlock(&xe->irq.lock); + + for_each_tile(tile, xe, id) + xe_memirq_handler(&tile->sriov.vf.memirq); + + return IRQ_HANDLED; +} + static irq_handler_t xe_irq_handler(struct xe_device *xe) { + if (IS_SRIOV_VF(xe) && xe_device_has_memirq(xe)) + return vf_mem_irq_handler; + if (GRAPHICS_VERx100(xe) >= 1210) return dg1_irq_handler; else @@ -590,8 +683,9 @@ static void irq_uninstall(struct drm_device *drm, void *arg) int xe_irq_install(struct xe_device *xe) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + unsigned int irq_flags = PCI_IRQ_MSIX; irq_handler_t irq_handler; - int err, irq; + int err, irq, nvec; irq_handler = xe_irq_handler(xe); if (!irq_handler) { @@ -601,7 +695,19 @@ int xe_irq_install(struct xe_device *xe) xe_irq_reset(xe); - err = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI | PCI_IRQ_MSIX); + nvec = pci_msix_vec_count(pdev); + if (nvec <= 0) { + if (nvec == -EINVAL) { + /* MSIX capability is not supported in the device, using MSI */ + irq_flags = PCI_IRQ_MSI; + nvec = 1; + } else { + drm_err(&xe->drm, "MSIX: Failed getting count\n"); + return nvec; + } + } + + err = pci_alloc_irq_vectors(pdev, nvec, nvec, irq_flags); if (err < 0) { drm_err(&xe->drm, "MSI/MSIX: Failed to enable support %d\n", err); return err; diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 72f04a656e..57066faf57 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -19,6 +19,8 @@ #include "xe_gt_printk.h" #include "xe_hw_fence.h" #include "xe_map.h" +#include "xe_memirq.h" +#include "xe_sriov.h" #include "xe_vm.h" #define LRC_VALID (1 << 0) @@ -529,6 +531,27 @@ static void set_context_control(u32 *regs, struct xe_hw_engine *hwe) /* TODO: Timestamp */ } +static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe) +{ + struct xe_memirq *memirq = >_to_tile(hwe->gt)->sriov.vf.memirq; + struct xe_device *xe = gt_to_xe(hwe->gt); + + if (!IS_SRIOV_VF(xe) || !xe_device_has_memirq(xe)) + return; + + regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM | + MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT; + regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr; + regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq); + + regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) | + MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED; + regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr; + regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq); + regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr; + regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq); +} + static int lrc_ring_mi_mode(struct xe_hw_engine *hwe) { struct xe_device *xe = gt_to_xe(hwe->gt); @@ -664,6 +687,7 @@ static void *empty_lrc_data(struct xe_hw_engine *hwe) regs = data + LRC_PPHWSP_SIZE; set_offsets(regs, reg_offsets(xe, hwe->class), hwe); set_context_control(regs, hwe); + set_memory_based_intr(regs, hwe); reset_stop_ring(regs, hwe); return data; @@ -953,6 +977,20 @@ static int dump_mi_command(struct drm_printer *p, drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]); return numdw; + case MI_LOAD_REGISTER_MEM & MI_OPCODE: + drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n", + inst_header, + dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "", + dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : ""); + if (numdw == 4) + drm_printf(p, " - %#6x = %#010llx\n", + dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2]))); + else + drm_printf(p, " - %*ph (%s)\n", + (int)sizeof(u32) * (numdw - 1), dw + 1, + numdw < 4 ? "truncated" : "malformed"); + return numdw; + case MI_FORCE_WAKEUP: drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header); return numdw; diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h index 7822033606..24f20ed66f 100644 --- a/drivers/gpu/drm/xe/xe_lrc_types.h +++ b/drivers/gpu/drm/xe/xe_lrc_types.h @@ -28,11 +28,11 @@ struct xe_lrc { /** @ring: submission ring state */ struct { - /** @size: size of submission ring */ + /** @ring.size: size of submission ring */ u32 size; - /** @tail: tail of submission ring */ + /** @ring.tail: tail of submission ring */ u32 tail; - /** @old_tail: shadow of tail */ + /** @ring.old_tail: shadow of tail */ u32 old_tail; } ring; diff --git a/drivers/gpu/drm/xe/xe_memirq.c b/drivers/gpu/drm/xe/xe_memirq.c new file mode 100644 index 0000000000..76e95535d7 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_memirq.c @@ -0,0 +1,430 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023 Intel Corporation + */ + +#include <drm/drm_managed.h> + +#include "regs/xe_gt_regs.h" +#include "regs/xe_guc_regs.h" +#include "regs/xe_regs.h" + +#include "xe_assert.h" +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_device_types.h" +#include "xe_gt.h" +#include "xe_gt_printk.h" +#include "xe_guc.h" +#include "xe_hw_engine.h" +#include "xe_map.h" +#include "xe_memirq.h" +#include "xe_sriov.h" +#include "xe_sriov_printk.h" + +#define memirq_assert(m, condition) xe_tile_assert(memirq_to_tile(m), condition) +#define memirq_debug(m, msg...) xe_sriov_dbg_verbose(memirq_to_xe(m), "MEMIRQ: " msg) + +static struct xe_tile *memirq_to_tile(struct xe_memirq *memirq) +{ + return container_of(memirq, struct xe_tile, sriov.vf.memirq); +} + +static struct xe_device *memirq_to_xe(struct xe_memirq *memirq) +{ + return tile_to_xe(memirq_to_tile(memirq)); +} + +static const char *guc_name(struct xe_guc *guc) +{ + return xe_gt_is_media_type(guc_to_gt(guc)) ? "media GuC" : "GuC"; +} + +/** + * DOC: Memory Based Interrupts + * + * MMIO register based interrupts infrastructure used for non-virtualized mode + * or SRIOV-8 (which supports 8 Virtual Functions) does not scale efficiently + * to allow delivering interrupts to a large number of Virtual machines or + * containers. Memory based interrupt status reporting provides an efficient + * and scalable infrastructure. + * + * For memory based interrupt status reporting hardware sequence is: + * * Engine writes the interrupt event to memory + * (Pointer to memory location is provided by SW. This memory surface must + * be mapped to system memory and must be marked as un-cacheable (UC) on + * Graphics IP Caches) + * * Engine triggers an interrupt to host. + */ + +/** + * DOC: Memory Based Interrupts Page Layout + * + * `Memory Based Interrupts`_ requires three different objects, which are + * called "page" in the specs, even if they aren't page-sized or aligned. + * + * To simplify the code we allocate a single page size object and then use + * offsets to embedded "pages". The address of those "pages" are then + * programmed in the HW via LRI and LRM in the context image. + * + * - _`Interrupt Status Report Page`: this page contains the interrupt + * status vectors for each unit. Each bit in the interrupt vectors is + * converted to a byte, with the byte being set to 0xFF when an + * interrupt is triggered; interrupt vectors are 16b big so each unit + * gets 16B. One space is reserved for each bit in one of the + * GT_INTR_DWx registers, so this object needs a total of 1024B. + * This object needs to be 4KiB aligned. + * + * - _`Interrupt Source Report Page`: this is the equivalent of the + * GEN11_GT_INTR_DWx registers, with each bit in those registers being + * mapped to a byte here. The offsets are the same, just bytes instead + * of bits. This object needs to be cacheline aligned. + * + * - Interrupt Mask: the HW needs a location to fetch the interrupt + * mask vector to be used by the LRM in the context, so we just use + * the next available space in the interrupt page. + * + * :: + * + * 0x0000 +===========+ <== Interrupt Status Report Page + * | | + * | | ____ +----+----------------+ + * | | / | 0 | USER INTERRUPT | + * +-----------+ __/ | 1 | | + * | HWE(n) | __ | | CTX SWITCH | + * +-----------+ \ | | WAIT SEMAPHORE | + * | | \____ | 15 | | + * | | +----+----------------+ + * | | + * 0x0400 +===========+ <== Interrupt Source Report Page + * | HWE(0) | + * | HWE(1) | + * | | + * | HWE(x) | + * 0x0440 +===========+ <== Interrupt Enable Mask + * | | + * | | + * +-----------+ + */ + +static void __release_xe_bo(struct drm_device *drm, void *arg) +{ + struct xe_bo *bo = arg; + + xe_bo_unpin_map_no_vm(bo); +} + +static int memirq_alloc_pages(struct xe_memirq *memirq) +{ + struct xe_device *xe = memirq_to_xe(memirq); + struct xe_tile *tile = memirq_to_tile(memirq); + struct xe_bo *bo; + int err; + + BUILD_BUG_ON(!IS_ALIGNED(XE_MEMIRQ_SOURCE_OFFSET, SZ_64)); + BUILD_BUG_ON(!IS_ALIGNED(XE_MEMIRQ_STATUS_OFFSET, SZ_4K)); + + /* XXX: convert to managed bo */ + bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K, + ttm_bo_type_kernel, + XE_BO_CREATE_SYSTEM_BIT | + XE_BO_CREATE_GGTT_BIT | + XE_BO_NEEDS_UC | + XE_BO_NEEDS_CPU_ACCESS); + if (IS_ERR(bo)) { + err = PTR_ERR(bo); + goto out; + } + + memirq_assert(memirq, !xe_bo_is_vram(bo)); + memirq_assert(memirq, !memirq->bo); + + iosys_map_memset(&bo->vmap, 0, 0, SZ_4K); + + memirq->bo = bo; + memirq->source = IOSYS_MAP_INIT_OFFSET(&bo->vmap, XE_MEMIRQ_SOURCE_OFFSET); + memirq->status = IOSYS_MAP_INIT_OFFSET(&bo->vmap, XE_MEMIRQ_STATUS_OFFSET); + memirq->mask = IOSYS_MAP_INIT_OFFSET(&bo->vmap, XE_MEMIRQ_ENABLE_OFFSET); + + memirq_assert(memirq, !memirq->source.is_iomem); + memirq_assert(memirq, !memirq->status.is_iomem); + memirq_assert(memirq, !memirq->mask.is_iomem); + + memirq_debug(memirq, "page offsets: source %#x status %#x\n", + xe_memirq_source_ptr(memirq), xe_memirq_status_ptr(memirq)); + + return drmm_add_action_or_reset(&xe->drm, __release_xe_bo, memirq->bo); + +out: + xe_sriov_err(memirq_to_xe(memirq), + "Failed to allocate memirq page (%pe)\n", ERR_PTR(err)); + return err; +} + +static void memirq_set_enable(struct xe_memirq *memirq, bool enable) +{ + iosys_map_wr(&memirq->mask, 0, u32, enable ? GENMASK(15, 0) : 0); + + memirq->enabled = enable; +} + +/** + * xe_memirq_init - Initialize data used by `Memory Based Interrupts`_. + * @memirq: the &xe_memirq to initialize + * + * Allocate `Interrupt Source Report Page`_ and `Interrupt Status Report Page`_ + * used by `Memory Based Interrupts`_. + * + * These allocations are managed and will be implicitly released on unload. + * + * Note: This function shall be called only by the VF driver. + * + * If this function fails then VF driver won't be able to operate correctly. + * If `Memory Based Interrupts`_ are not used this function will return 0. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_memirq_init(struct xe_memirq *memirq) +{ + struct xe_device *xe = memirq_to_xe(memirq); + int err; + + memirq_assert(memirq, IS_SRIOV_VF(xe)); + + if (!xe_device_has_memirq(xe)) + return 0; + + err = memirq_alloc_pages(memirq); + if (unlikely(err)) + return err; + + /* we need to start with all irqs enabled */ + memirq_set_enable(memirq, true); + + return 0; +} + +/** + * xe_memirq_source_ptr - Get GGTT's offset of the `Interrupt Source Report Page`_. + * @memirq: the &xe_memirq to query + * + * Shall be called only on VF driver when `Memory Based Interrupts`_ are used + * and xe_memirq_init() didn't fail. + * + * Return: GGTT's offset of the `Interrupt Source Report Page`_. + */ +u32 xe_memirq_source_ptr(struct xe_memirq *memirq) +{ + memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq))); + memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq))); + memirq_assert(memirq, memirq->bo); + + return xe_bo_ggtt_addr(memirq->bo) + XE_MEMIRQ_SOURCE_OFFSET; +} + +/** + * xe_memirq_status_ptr - Get GGTT's offset of the `Interrupt Status Report Page`_. + * @memirq: the &xe_memirq to query + * + * Shall be called only on VF driver when `Memory Based Interrupts`_ are used + * and xe_memirq_init() didn't fail. + * + * Return: GGTT's offset of the `Interrupt Status Report Page`_. + */ +u32 xe_memirq_status_ptr(struct xe_memirq *memirq) +{ + memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq))); + memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq))); + memirq_assert(memirq, memirq->bo); + + return xe_bo_ggtt_addr(memirq->bo) + XE_MEMIRQ_STATUS_OFFSET; +} + +/** + * xe_memirq_enable_ptr - Get GGTT's offset of the Interrupt Enable Mask. + * @memirq: the &xe_memirq to query + * + * Shall be called only on VF driver when `Memory Based Interrupts`_ are used + * and xe_memirq_init() didn't fail. + * + * Return: GGTT's offset of the Interrupt Enable Mask. + */ +u32 xe_memirq_enable_ptr(struct xe_memirq *memirq) +{ + memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq))); + memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq))); + memirq_assert(memirq, memirq->bo); + + return xe_bo_ggtt_addr(memirq->bo) + XE_MEMIRQ_ENABLE_OFFSET; +} + +/** + * xe_memirq_init_guc - Prepare GuC for `Memory Based Interrupts`_. + * @memirq: the &xe_memirq + * @guc: the &xe_guc to setup + * + * Register `Interrupt Source Report Page`_ and `Interrupt Status Report Page`_ + * to be used by the GuC when `Memory Based Interrupts`_ are required. + * + * Shall be called only on VF driver when `Memory Based Interrupts`_ are used + * and xe_memirq_init() didn't fail. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_memirq_init_guc(struct xe_memirq *memirq, struct xe_guc *guc) +{ + bool is_media = xe_gt_is_media_type(guc_to_gt(guc)); + u32 offset = is_media ? ilog2(INTR_MGUC) : ilog2(INTR_GUC); + u32 source, status; + int err; + + memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq))); + memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq))); + memirq_assert(memirq, memirq->bo); + + source = xe_memirq_source_ptr(memirq) + offset; + status = xe_memirq_status_ptr(memirq) + offset * SZ_16; + + err = xe_guc_self_cfg64(guc, GUC_KLV_SELF_CFG_MEMIRQ_SOURCE_ADDR_KEY, + source); + if (unlikely(err)) + goto failed; + + err = xe_guc_self_cfg64(guc, GUC_KLV_SELF_CFG_MEMIRQ_STATUS_ADDR_KEY, + status); + if (unlikely(err)) + goto failed; + + return 0; + +failed: + xe_sriov_err(memirq_to_xe(memirq), + "Failed to setup report pages in %s (%pe)\n", + guc_name(guc), ERR_PTR(err)); + return err; +} + +/** + * xe_memirq_reset - Disable processing of `Memory Based Interrupts`_. + * @memirq: struct xe_memirq + * + * This is part of the driver IRQ setup flow. + * + * This function shall only be used by the VF driver on platforms that use + * `Memory Based Interrupts`_. + */ +void xe_memirq_reset(struct xe_memirq *memirq) +{ + memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq))); + memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq))); + + if (memirq->bo) + memirq_set_enable(memirq, false); +} + +/** + * xe_memirq_postinstall - Enable processing of `Memory Based Interrupts`_. + * @memirq: the &xe_memirq + * + * This is part of the driver IRQ setup flow. + * + * This function shall only be used by the VF driver on platforms that use + * `Memory Based Interrupts`_. + */ +void xe_memirq_postinstall(struct xe_memirq *memirq) +{ + memirq_assert(memirq, IS_SRIOV_VF(memirq_to_xe(memirq))); + memirq_assert(memirq, xe_device_has_memirq(memirq_to_xe(memirq))); + + if (memirq->bo) + memirq_set_enable(memirq, true); +} + +static bool memirq_received(struct xe_memirq *memirq, struct iosys_map *vector, + u16 offset, const char *name) +{ + u8 value; + + value = iosys_map_rd(vector, offset, u8); + if (value) { + if (value != 0xff) + xe_sriov_err_ratelimited(memirq_to_xe(memirq), + "Unexpected memirq value %#x from %s at %u\n", + value, name, offset); + iosys_map_wr(vector, offset, u8, 0x00); + } + + return value; +} + +static void memirq_dispatch_engine(struct xe_memirq *memirq, struct iosys_map *status, + struct xe_hw_engine *hwe) +{ + memirq_debug(memirq, "STATUS %s %*ph\n", hwe->name, 16, status->vaddr); + + if (memirq_received(memirq, status, ilog2(GT_RENDER_USER_INTERRUPT), hwe->name)) + xe_hw_engine_handle_irq(hwe, GT_RENDER_USER_INTERRUPT); +} + +static void memirq_dispatch_guc(struct xe_memirq *memirq, struct iosys_map *status, + struct xe_guc *guc) +{ + const char *name = guc_name(guc); + + memirq_debug(memirq, "STATUS %s %*ph\n", name, 16, status->vaddr); + + if (memirq_received(memirq, status, ilog2(GUC_INTR_GUC2HOST), name)) + xe_guc_irq_handler(guc, GUC_INTR_GUC2HOST); +} + +/** + * xe_memirq_handler - The `Memory Based Interrupts`_ Handler. + * @memirq: the &xe_memirq + * + * This function reads and dispatches `Memory Based Interrupts`. + */ +void xe_memirq_handler(struct xe_memirq *memirq) +{ + struct xe_device *xe = memirq_to_xe(memirq); + struct xe_tile *tile = memirq_to_tile(memirq); + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + struct iosys_map map; + unsigned int gtid; + struct xe_gt *gt; + + if (!memirq->bo) + return; + + memirq_assert(memirq, !memirq->source.is_iomem); + memirq_debug(memirq, "SOURCE %*ph\n", 32, memirq->source.vaddr); + memirq_debug(memirq, "SOURCE %*ph\n", 32, memirq->source.vaddr + 32); + + for_each_gt(gt, xe, gtid) { + if (gt->tile != tile) + continue; + + for_each_hw_engine(hwe, gt, id) { + if (memirq_received(memirq, &memirq->source, hwe->irq_offset, "SRC")) { + map = IOSYS_MAP_INIT_OFFSET(&memirq->status, + hwe->irq_offset * SZ_16); + memirq_dispatch_engine(memirq, &map, hwe); + } + } + } + + /* GuC and media GuC (if present) must be checked separately */ + + if (memirq_received(memirq, &memirq->source, ilog2(INTR_GUC), "SRC")) { + map = IOSYS_MAP_INIT_OFFSET(&memirq->status, ilog2(INTR_GUC) * SZ_16); + memirq_dispatch_guc(memirq, &map, &tile->primary_gt->uc.guc); + } + + if (!tile->media_gt) + return; + + if (memirq_received(memirq, &memirq->source, ilog2(INTR_MGUC), "SRC")) { + map = IOSYS_MAP_INIT_OFFSET(&memirq->status, ilog2(INTR_MGUC) * SZ_16); + memirq_dispatch_guc(memirq, &map, &tile->media_gt->uc.guc); + } +} diff --git a/drivers/gpu/drm/xe/xe_memirq.h b/drivers/gpu/drm/xe/xe_memirq.h new file mode 100644 index 0000000000..2d40d03c30 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_memirq.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_MEMIRQ_H_ +#define _XE_MEMIRQ_H_ + +#include <linux/types.h> + +struct xe_guc; +struct xe_memirq; + +int xe_memirq_init(struct xe_memirq *memirq); + +u32 xe_memirq_source_ptr(struct xe_memirq *memirq); +u32 xe_memirq_status_ptr(struct xe_memirq *memirq); +u32 xe_memirq_enable_ptr(struct xe_memirq *memirq); + +void xe_memirq_reset(struct xe_memirq *memirq); +void xe_memirq_postinstall(struct xe_memirq *memirq); +void xe_memirq_handler(struct xe_memirq *memirq); + +int xe_memirq_init_guc(struct xe_memirq *memirq, struct xe_guc *guc); + +#endif diff --git a/drivers/gpu/drm/xe/xe_memirq_types.h b/drivers/gpu/drm/xe/xe_memirq_types.h new file mode 100644 index 0000000000..625b6b8736 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_memirq_types.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_MEMIRQ_TYPES_H_ +#define _XE_MEMIRQ_TYPES_H_ + +#include <linux/iosys-map.h> + +struct xe_bo; + +/* ISR */ +#define XE_MEMIRQ_STATUS_OFFSET 0x0 +/* IIR */ +#define XE_MEMIRQ_SOURCE_OFFSET 0x400 +/* IMR */ +#define XE_MEMIRQ_ENABLE_OFFSET 0x440 + +/** + * struct xe_memirq - Data used by the `Memory Based Interrupts`_. + * + * @bo: buffer object with `Memory Based Interrupts Page Layout`_. + * @source: iosys pointer to `Interrupt Source Report Page`_. + * @status: iosys pointer to `Interrupt Status Report Page`_. + * @mask: iosys pointer to Interrupt Enable Mask. + * @enabled: internal flag used to control processing of the interrupts. + */ +struct xe_memirq { + struct xe_bo *bo; + struct iosys_map source; + struct iosys_map status; + struct iosys_map mask; + bool enabled; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 7a6ad3469d..aca519f5b8 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -12,7 +12,8 @@ #include <drm/ttm/ttm_tt.h> #include <drm/xe_drm.h> -#include "generated/xe_wa_oob.h" +#include <generated/xe_wa_oob.h> + #include "instructions/xe_mi_commands.h" #include "regs/xe_gpu_commands.h" #include "tests/xe_test.h" @@ -32,7 +33,6 @@ #include "xe_sync.h" #include "xe_trace.h" #include "xe_vm.h" -#include "xe_wa.h" /** * struct xe_migrate - migrate context. @@ -71,6 +71,16 @@ struct xe_migrate { #define NUM_KERNEL_PDE 17 #define NUM_PT_SLOTS 32 #define LEVEL0_PAGE_TABLE_ENCODE_SIZE SZ_2M +#define MAX_NUM_PTE 512 + +/* + * Although MI_STORE_DATA_IMM's "length" field is 10-bits, 0x3FE is the largest + * legal value accepted. Since that instruction field is always stored in + * (val-2) format, this translates to 0x400 dwords for the true maximum length + * of the instruction. Subtracting the instruction header (1 dword) and + * address (2 dwords), that leaves 0x3FD dwords (0x1FE qwords) for PTE values. + */ +#define MAX_PTE_PER_SDI 0x1FE /** * xe_tile_migrate_engine() - Get this tile's migrate engine. @@ -288,10 +298,6 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, } /* - * Due to workaround 16017236439, odd instance hardware copy engines are - * faster than even instance ones. - * This function returns the mask involving all fast copy engines and the - * reserved copy engine to be used as logical mask for migrate engine. * Including the reserved copy engine is required to avoid deadlocks due to * migrate jobs servicing the faults gets stuck behind the job that faulted. */ @@ -305,8 +311,7 @@ static u32 xe_migrate_usm_logical_mask(struct xe_gt *gt) if (hwe->class != XE_ENGINE_CLASS_COPY) continue; - if (!XE_WA(gt, 16017236439) || - xe_gt_is_usm_hwe(gt, hwe) || hwe->instance & 1) + if (xe_gt_is_usm_hwe(gt, hwe)) logical_mask |= BIT(hwe->logical_instance); } @@ -357,10 +362,14 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) if (!hwe || !logical_mask) return ERR_PTR(-EINVAL); + /* + * XXX: Currently only reserving 1 (likely slow) BCS instance on + * PVC, may want to revisit if performance is needed. + */ m->q = xe_exec_queue_create(xe, vm, logical_mask, 1, hwe, EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_PERMANENT | - EXEC_QUEUE_FLAG_HIGH_PRIORITY); + EXEC_QUEUE_FLAG_HIGH_PRIORITY, 0); } else { m->q = xe_exec_queue_create_class(xe, primary_gt, vm, XE_ENGINE_CLASS_COPY, @@ -451,13 +460,13 @@ static u32 pte_update_size(struct xe_migrate *m, } else { /* Clip L0 to available size */ u64 size = min(*L0, (u64)avail_pts * SZ_2M); - u64 num_4k_pages = DIV_ROUND_UP(size, XE_PAGE_SIZE); + u32 num_4k_pages = (size + XE_PAGE_SIZE - 1) >> XE_PTE_SHIFT; *L0 = size; *L0_ofs = xe_migrate_vm_addr(pt_ofs, 0); /* MI_STORE_DATA_IMM */ - cmds += 3 * DIV_ROUND_UP(num_4k_pages, 0x1ff); + cmds += 3 * DIV_ROUND_UP(num_4k_pages, MAX_PTE_PER_SDI); /* PDE qwords */ cmds += num_4k_pages * 2; @@ -492,7 +501,7 @@ static void emit_pte(struct xe_migrate *m, ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE); while (ptes) { - u32 chunk = min(0x1ffU, ptes); + u32 chunk = min(MAX_PTE_PER_SDI, ptes); bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk); bb->cs[bb->len++] = ofs; @@ -1111,7 +1120,7 @@ static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs, * This shouldn't be possible in practice.. might change when 16K * pages are used. Hence the assert. */ - xe_tile_assert(tile, update->qwords <= 0x1ff); + xe_tile_assert(tile, update->qwords < MAX_NUM_PTE); if (!ppgtt_ofs) ppgtt_ofs = xe_migrate_vram_ofs(tile_to_xe(tile), xe_bo_addr(update->pt_bo, 0, @@ -1120,7 +1129,7 @@ static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs, do { u64 addr = ppgtt_ofs + ofs * 8; - chunk = min(update->qwords, 0x1ffU); + chunk = min(size, MAX_PTE_PER_SDI); /* Ensure populatefn can do memset64 by aligning bb->cs */ if (!(bb->len & 1)) @@ -1299,7 +1308,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m, batch_size = 6 + num_updates * 2; for (i = 0; i < num_updates; i++) { - u32 num_cmds = DIV_ROUND_UP(updates[i].qwords, 0x1ff); + u32 num_cmds = DIV_ROUND_UP(updates[i].qwords, MAX_PTE_PER_SDI); /* align noop + MI_STORE_DATA_IMM cmd prefix */ batch_size += 4 * num_cmds + updates[i].qwords * 2; diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 02f7808f28..7ba2477452 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -20,6 +20,7 @@ #include "xe_gt_mcr.h" #include "xe_macros.h" #include "xe_module.h" +#include "xe_sriov.h" #include "xe_tile.h" #define XEHP_MTCFG_ADDR XE_REG(0x101800) @@ -363,13 +364,19 @@ static int xe_verify_lmem_ready(struct xe_device *xe) { struct xe_gt *gt = xe_root_mmio_gt(xe); + if (!IS_DGFX(xe)) + return 0; + + if (IS_SRIOV_VF(xe)) + return 0; + /* * The boot firmware initializes local memory and assesses its health. * If memory training fails, the punit will have been instructed to * keep the GT powered down; we won't be able to communicate with it * and we should not continue with driver initialization. */ - if (IS_DGFX(xe) && !(xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT)) { + if (!(xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT)) { drm_err(&xe->drm, "VRAM not initialized by firmware\n"); return -ENODEV; } diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index ef79552e4f..609d997b3e 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -13,6 +13,7 @@ #include "xe_gt_mcr.h" #include "xe_mmio.h" #include "xe_platform_types.h" +#include "xe_sriov.h" #include "xe_step_types.h" #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) @@ -290,18 +291,6 @@ static const struct xe_mocs_entry dg2_mocs_desc[] = { MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)), }; -static const struct xe_mocs_entry dg2_mocs_desc_g10_ax[] = { - /* Wa_14011441408: Set Go to Memory for MOCS#0 */ - MOCS_ENTRY(0, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), - /* UC - Coherent; GO:Memory */ - MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), - /* UC - Non-Coherent; GO:Memory */ - MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)), - - /* WB - LC */ - MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)), -}; - static const struct xe_mocs_entry pvc_mocs_desc[] = { /* Error */ MOCS_ENTRY(0, 0, L3_3_WB), @@ -409,15 +398,8 @@ static unsigned int get_mocs_settings(struct xe_device *xe, info->unused_entries_index = 1; break; case XE_DG2: - if (xe->info.subplatform == XE_SUBPLATFORM_DG2_G10 && - xe->info.step.graphics >= STEP_A0 && - xe->info.step.graphics <= STEP_B0) { - info->size = ARRAY_SIZE(dg2_mocs_desc_g10_ax); - info->table = dg2_mocs_desc_g10_ax; - } else { - info->size = ARRAY_SIZE(dg2_mocs_desc); - info->table = dg2_mocs_desc; - } + info->size = ARRAY_SIZE(dg2_mocs_desc); + info->table = dg2_mocs_desc; info->uc_index = 1; info->n_entries = XELP_NUM_MOCS_ENTRIES; info->unused_entries_index = 3; @@ -558,6 +540,9 @@ void xe_mocs_init(struct xe_gt *gt) struct xe_mocs_info table; unsigned int flags; + if (IS_SRIOV_VF(gt_to_xe(gt))) + return; + /* * MOCS settings are split between "GLOB_MOCS" and/or "LNCFCMOCS" * registers depending on platform. diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index 1ff6bc79e7..e148934d55 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -13,6 +13,7 @@ #include "xe_gt.h" #include "xe_gt_mcr.h" #include "xe_mmio.h" +#include "xe_sriov.h" #define _PAT_ATS 0x47fc #define _PAT_INDEX(index) _PICK_EVEN_2RANGES(index, 8, \ @@ -433,6 +434,10 @@ void xe_pat_init_early(struct xe_device *xe) drm_err(&xe->drm, "Missing PAT table for platform with graphics version %d.%02d!\n", GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); } + + /* VFs can't program nor dump PAT settings */ + if (IS_SRIOV_VF(xe)) + xe->pat.ops = NULL; } void xe_pat_init(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index dcc5ded155..557f2d88a8 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -15,9 +15,9 @@ #include <drm/drm_drv.h> #include <drm/xe_pciids.h> +#include "display/xe_display.h" #include "regs/xe_gt_regs.h" #include "xe_device.h" -#include "xe_display.h" #include "xe_drv.h" #include "xe_gt.h" #include "xe_macros.h" @@ -165,7 +165,7 @@ static const struct xe_graphics_desc graphics_xelpg = { .has_asid = 1, \ .has_flat_ccs = 1, \ .has_range_tlb_invalidation = 1, \ - .has_usm = 0 /* FIXME: implementation missing */, \ + .has_usm = 1, \ .va_bits = 48, \ .vm_max_level = 4, \ .hw_engine_mask = \ @@ -340,14 +340,14 @@ static const struct xe_device_desc lnl_desc = { __diag_pop(); /* Map of GMD_ID values to graphics IP */ -static struct gmdid_map graphics_ip_map[] = { +static const struct gmdid_map graphics_ip_map[] = { { 1270, &graphics_xelpg }, { 1271, &graphics_xelpg }, { 2004, &graphics_xe2 }, }; /* Map of GMD_ID values to media IP */ -static struct gmdid_map media_ip_map[] = { +static const struct gmdid_map media_ip_map[] = { { 1300, &media_xelpmp }, { 2000, &media_xe2 }, }; @@ -774,6 +774,8 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) str_yes_no(xe_device_has_sriov(xe)), xe_sriov_mode_to_string(xe_device_sriov_mode(xe))); + xe_pm_init_early(xe); + err = xe_device_probe(xe); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c index b324dc2a5d..81f4ae2ea0 100644 --- a/drivers/gpu/drm/xe/xe_pcode.c +++ b/drivers/gpu/drm/xe/xe_pcode.c @@ -10,6 +10,7 @@ #include <drm/drm_managed.h> +#include "xe_device.h" #include "xe_gt.h" #include "xe_mmio.h" #include "xe_pcode_api.h" @@ -43,8 +44,6 @@ static int pcode_mailbox_status(struct xe_gt *gt) [PCODE_ERROR_MASK] = {-EPROTO, "Unknown"}, }; - lockdep_assert_held(>->pcode.lock); - err = xe_mmio_read32(gt, PCODE_MAILBOX) & PCODE_ERROR_MASK; if (err) { drm_err(>_to_xe(gt)->drm, "PCODE Mailbox failed: %d %s", err, @@ -55,17 +54,15 @@ static int pcode_mailbox_status(struct xe_gt *gt) return 0; } -static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1, - unsigned int timeout_ms, bool return_data, - bool atomic) +static int __pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1, + unsigned int timeout_ms, bool return_data, + bool atomic) { int err; if (gt_to_xe(gt)->info.skip_pcode) return 0; - lockdep_assert_held(>->pcode.lock); - if ((xe_mmio_read32(gt, PCODE_MAILBOX) & PCODE_READY) != 0) return -EAGAIN; @@ -87,6 +84,18 @@ static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1, return pcode_mailbox_status(gt); } +static int pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1, + unsigned int timeout_ms, bool return_data, + bool atomic) +{ + if (gt_to_xe(gt)->info.skip_pcode) + return 0; + + lockdep_assert_held(>->pcode.lock); + + return __pcode_mailbox_rw(gt, mbox, data0, data1, timeout_ms, return_data, atomic); +} + int xe_pcode_write_timeout(struct xe_gt *gt, u32 mbox, u32 data, int timeout) { int err; @@ -109,15 +118,19 @@ int xe_pcode_read(struct xe_gt *gt, u32 mbox, u32 *val, u32 *val1) return err; } -static int xe_pcode_try_request(struct xe_gt *gt, u32 mbox, - u32 request, u32 reply_mask, u32 reply, - u32 *status, bool atomic, int timeout_us) +static int pcode_try_request(struct xe_gt *gt, u32 mbox, + u32 request, u32 reply_mask, u32 reply, + u32 *status, bool atomic, int timeout_us, bool locked) { int slept, wait = 10; for (slept = 0; slept < timeout_us; slept += wait) { - *status = pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true, - atomic); + if (locked) + *status = pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true, + atomic); + else + *status = __pcode_mailbox_rw(gt, mbox, &request, NULL, 1, true, + atomic); if ((*status == 0) && ((request & reply_mask) == reply)) return 0; @@ -158,8 +171,8 @@ int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request, mutex_lock(>->pcode.lock); - ret = xe_pcode_try_request(gt, mbox, request, reply_mask, reply, &status, - false, timeout_base_ms * 1000); + ret = pcode_try_request(gt, mbox, request, reply_mask, reply, &status, + false, timeout_base_ms * 1000, true); if (!ret) goto out; @@ -177,8 +190,8 @@ int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request, "PCODE timeout, retrying with preemption disabled\n"); drm_WARN_ON_ONCE(>_to_xe(gt)->drm, timeout_base_ms > 1); preempt_disable(); - ret = xe_pcode_try_request(gt, mbox, request, reply_mask, reply, &status, - true, timeout_base_ms * 1000); + ret = pcode_try_request(gt, mbox, request, reply_mask, reply, &status, + true, 50 * 1000, true); preempt_enable(); out: @@ -238,59 +251,71 @@ unlock: } /** - * xe_pcode_init - Ensure PCODE is initialized - * @gt: gt instance + * xe_pcode_ready - Ensure PCODE is initialized + * @xe: xe instance + * @locked: true if lock held, false otherwise * - * This function ensures that PCODE is properly initialized. To be called during - * probe and resume paths. + * PCODE init mailbox is polled only on root gt of root tile + * as the root tile provides the initialization is complete only + * after all the tiles have completed the initialization. + * Called only on early probe without locks and with locks in + * resume path. * - * It returns 0 on success, and -error number on failure. + * Returns 0 on success, and -error number on failure. */ -int xe_pcode_init(struct xe_gt *gt) +int xe_pcode_ready(struct xe_device *xe, bool locked) { u32 status, request = DGFX_GET_INIT_STATUS; + struct xe_gt *gt = xe_root_mmio_gt(xe); int timeout_us = 180000000; /* 3 min */ int ret; - if (gt_to_xe(gt)->info.skip_pcode) + if (xe->info.skip_pcode) return 0; - if (!IS_DGFX(gt_to_xe(gt))) + if (!IS_DGFX(xe)) return 0; - mutex_lock(>->pcode.lock); - ret = xe_pcode_try_request(gt, DGFX_PCODE_STATUS, request, - DGFX_INIT_STATUS_COMPLETE, - DGFX_INIT_STATUS_COMPLETE, - &status, false, timeout_us); - mutex_unlock(>->pcode.lock); + if (locked) + mutex_lock(>->pcode.lock); + + ret = pcode_try_request(gt, DGFX_PCODE_STATUS, request, + DGFX_INIT_STATUS_COMPLETE, + DGFX_INIT_STATUS_COMPLETE, + &status, false, timeout_us, locked); + + if (locked) + mutex_unlock(>->pcode.lock); if (ret) - drm_err(>_to_xe(gt)->drm, + drm_err(&xe->drm, "PCODE initialization timedout after: 3 min\n"); return ret; } /** - * xe_pcode_probe - Prepare xe_pcode and also ensure PCODE is initialized. + * xe_pcode_init: initialize components of PCODE * @gt: gt instance * - * This function initializes the xe_pcode component, and when needed, it ensures - * that PCODE has properly performed its initialization and it is really ready - * to go. To be called once only during probe. - * - * It returns 0 on success, and -error number on failure. + * This function initializes the xe_pcode component. + * To be called once only during probe. */ -int xe_pcode_probe(struct xe_gt *gt) +void xe_pcode_init(struct xe_gt *gt) { drmm_mutex_init(>_to_xe(gt)->drm, >->pcode.lock); +} - if (gt_to_xe(gt)->info.skip_pcode) - return 0; - - if (!IS_DGFX(gt_to_xe(gt))) - return 0; - - return xe_pcode_init(gt); +/** + * xe_pcode_probe_early: initializes PCODE + * @xe: xe instance + * + * This function checks the initialization status of PCODE + * To be called once only during early probe without locks. + * + * Returns 0 on success, error code otherwise + */ +int xe_pcode_probe_early(struct xe_device *xe) +{ + return xe_pcode_ready(xe, false); } diff --git a/drivers/gpu/drm/xe/xe_pcode.h b/drivers/gpu/drm/xe/xe_pcode.h index 08cb1d047c..3f54c6d2a5 100644 --- a/drivers/gpu/drm/xe/xe_pcode.h +++ b/drivers/gpu/drm/xe/xe_pcode.h @@ -8,9 +8,11 @@ #include <linux/types.h> struct xe_gt; +struct xe_device; -int xe_pcode_probe(struct xe_gt *gt); -int xe_pcode_init(struct xe_gt *gt); +void xe_pcode_init(struct xe_gt *gt); +int xe_pcode_probe_early(struct xe_device *xe); +int xe_pcode_ready(struct xe_device *xe, bool locked); int xe_pcode_init_min_freq_table(struct xe_gt *gt, u32 min_gt_freq, u32 max_gt_freq); int xe_pcode_read(struct xe_gt *gt, u32 mbox, u32 *val, u32 *val1); diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h index 5935cfe302..f153ce96f6 100644 --- a/drivers/gpu/drm/xe/xe_pcode_api.h +++ b/drivers/gpu/drm/xe/xe_pcode_api.h @@ -42,6 +42,13 @@ #define POWER_SETUP_I1_SHIFT 6 /* 10.6 fixed point format */ #define POWER_SETUP_I1_DATA_MASK REG_GENMASK(15, 0) +#define PCODE_FREQUENCY_CONFIG 0x6e +/* Frequency Config Sub Commands (param1) */ +#define PCODE_MBOX_FC_SC_READ_FUSED_P0 0x0 +#define PCODE_MBOX_FC_SC_READ_FUSED_PN 0x1 +/* Domain IDs (param2) */ +#define PCODE_MBOX_DOMAIN_HBM 0x2 + struct pcode_err_decode { int errno; const char *str; diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index b429c2876a..944cf4d760 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -10,11 +10,11 @@ #include <drm/drm_managed.h> #include <drm/ttm/ttm_placement.h> +#include "display/xe_display.h" #include "xe_bo.h" #include "xe_bo_evict.h" #include "xe_device.h" #include "xe_device_sysfs.h" -#include "xe_display.h" #include "xe_ggtt.h" #include "xe_gt.h" #include "xe_guc.h" @@ -54,13 +54,15 @@ int xe_pm_suspend(struct xe_device *xe) u8 id; int err; + drm_dbg(&xe->drm, "Suspending device\n"); + for_each_gt(gt, xe, id) xe_gt_suspend_prepare(gt); /* FIXME: Super racey... */ err = xe_bo_evict_all(xe); if (err) - return err; + goto err; xe_display_pm_suspend(xe); @@ -68,7 +70,7 @@ int xe_pm_suspend(struct xe_device *xe) err = xe_gt_suspend(gt); if (err) { xe_display_pm_resume(xe); - return err; + goto err; } } @@ -76,7 +78,11 @@ int xe_pm_suspend(struct xe_device *xe) xe_display_pm_suspend_late(xe); + drm_dbg(&xe->drm, "Device suspended\n"); return 0; +err: + drm_dbg(&xe->drm, "Device suspend failed %d\n", err); + return err; } /** @@ -92,14 +98,14 @@ int xe_pm_resume(struct xe_device *xe) u8 id; int err; + drm_dbg(&xe->drm, "Resuming device\n"); + for_each_tile(tile, xe, id) xe_wa_apply_tile_workarounds(tile); - for_each_gt(gt, xe, id) { - err = xe_pcode_init(gt); - if (err) - return err; - } + err = xe_pcode_ready(xe, true); + if (err) + return err; xe_display_pm_resume_early(xe); @@ -109,7 +115,7 @@ int xe_pm_resume(struct xe_device *xe) */ err = xe_bo_restore_kernel(xe); if (err) - return err; + goto err; xe_irq_resume(xe); @@ -120,22 +126,35 @@ int xe_pm_resume(struct xe_device *xe) err = xe_bo_restore_user(xe); if (err) - return err; + goto err; + drm_dbg(&xe->drm, "Device resumed\n"); return 0; +err: + drm_dbg(&xe->drm, "Device resume failed %d\n", err); + return err; } -static bool xe_pm_pci_d3cold_capable(struct pci_dev *pdev) +static bool xe_pm_pci_d3cold_capable(struct xe_device *xe) { + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); struct pci_dev *root_pdev; root_pdev = pcie_find_root_port(pdev); if (!root_pdev) return false; - /* D3Cold requires PME capability and _PR3 power resource */ - if (!pci_pme_capable(root_pdev, PCI_D3cold) || !pci_pr3_present(root_pdev)) + /* D3Cold requires PME capability */ + if (!pci_pme_capable(root_pdev, PCI_D3cold)) { + drm_dbg(&xe->drm, "d3cold: PME# not supported\n"); + return false; + } + + /* D3Cold requires _PR3 power resource */ + if (!pci_pr3_present(root_pdev)) { + drm_dbg(&xe->drm, "d3cold: ACPI _PR3 not present\n"); return false; + } return true; } @@ -163,17 +182,21 @@ static void xe_pm_runtime_init(struct xe_device *xe) pm_runtime_put(dev); } -void xe_pm_init(struct xe_device *xe) +void xe_pm_init_early(struct xe_device *xe) { - struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + INIT_LIST_HEAD(&xe->mem_access.vram_userfault.list); + drmm_mutex_init(&xe->drm, &xe->mem_access.vram_userfault.lock); +} +void xe_pm_init(struct xe_device *xe) +{ /* For now suspend/resume is only allowed with GuC */ if (!xe_device_uc_enabled(xe)) return; drmm_mutex_init(&xe->drm, &xe->d3cold.lock); - xe->d3cold.capable = xe_pm_pci_d3cold_capable(pdev); + xe->d3cold.capable = xe_pm_pci_d3cold_capable(xe); if (xe->d3cold.capable) { xe_device_sysfs_init(xe); @@ -214,6 +237,7 @@ struct task_struct *xe_pm_read_callback_task(struct xe_device *xe) int xe_pm_runtime_suspend(struct xe_device *xe) { + struct xe_bo *bo, *on; struct xe_gt *gt; u8 id; int err = 0; @@ -247,6 +271,16 @@ int xe_pm_runtime_suspend(struct xe_device *xe) */ lock_map_acquire(&xe_device_mem_access_lockdep_map); + /* + * Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify + * also checks and delets bo entry from user fault list. + */ + mutex_lock(&xe->mem_access.vram_userfault.lock); + list_for_each_entry_safe(bo, on, + &xe->mem_access.vram_userfault.list, vram_userfault_link) + xe_bo_runtime_pm_release_mmap_offset(bo); + mutex_unlock(&xe->mem_access.vram_userfault.lock); + if (xe->d3cold.allowed) { err = xe_bo_evict_all(xe); if (err) @@ -286,11 +320,9 @@ int xe_pm_runtime_resume(struct xe_device *xe) xe->d3cold.power_lost = xe_guc_in_reset(>->uc.guc); if (xe->d3cold.allowed && xe->d3cold.power_lost) { - for_each_gt(gt, xe, id) { - err = xe_pcode_init(gt); - if (err) - goto out; - } + err = xe_pcode_ready(xe, true); + if (err) + goto out; /* * This only restores pinned memory which is the memory @@ -330,7 +362,7 @@ int xe_pm_runtime_put(struct xe_device *xe) int xe_pm_runtime_get_if_active(struct xe_device *xe) { - return pm_runtime_get_if_active(xe->drm.dev, true); + return pm_runtime_get_if_active(xe->drm.dev); } void xe_pm_assert_unbounded_bridge(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h index 6b9031f7af..64a97c6726 100644 --- a/drivers/gpu/drm/xe/xe_pm.h +++ b/drivers/gpu/drm/xe/xe_pm.h @@ -20,6 +20,7 @@ struct xe_device; int xe_pm_suspend(struct xe_device *xe); int xe_pm_resume(struct xe_device *xe); +void xe_pm_init_early(struct xe_device *xe); void xe_pm_init(struct xe_device *xe); void xe_pm_runtime_fini(struct xe_device *xe); int xe_pm_runtime_suspend(struct xe_device *xe); diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c index 7bce2a3326..7d50c6e89d 100644 --- a/drivers/gpu/drm/xe/xe_preempt_fence.c +++ b/drivers/gpu/drm/xe/xe_preempt_fence.c @@ -49,7 +49,7 @@ static bool preempt_fence_enable_signaling(struct dma_fence *fence) struct xe_exec_queue *q = pfence->q; pfence->error = q->ops->suspend(q); - queue_work(system_unbound_wq, &pfence->preempt_work); + queue_work(q->vm->xe->preempt_fence_wq, &pfence->preempt_work); return true; } diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index c1a833b1cb..4efc8c1a3d 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -877,8 +877,7 @@ static void xe_pt_commit_bind(struct xe_vma *vma, static int xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 *num_entries, - bool rebind) + struct xe_vm_pgtable_update *entries, u32 *num_entries) { int err; @@ -1136,8 +1135,7 @@ static int invalidation_fence_init(struct xe_gt *gt, spin_lock_irq(>->tlb_invalidation.lock); dma_fence_init(&ifence->base.base, &invalidation_fence_ops, >->tlb_invalidation.lock, - gt->tlb_invalidation.fence_context, - ++gt->tlb_invalidation.fence_seqno); + dma_fence_context_alloc(1), 1); spin_unlock_irq(>->tlb_invalidation.lock); INIT_LIST_HEAD(&ifence->base.link); @@ -1234,9 +1232,16 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue "Preparing bind, with range [%llx...%llx) engine %p.\n", xe_vma_start(vma), xe_vma_end(vma), q); - err = xe_pt_prepare_bind(tile, vma, entries, &num_entries, rebind); + err = xe_pt_prepare_bind(tile, vma, entries, &num_entries); if (err) goto err; + + err = dma_resv_reserve_fences(xe_vm_resv(vm), 1); + if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) + err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1); + if (err) + goto err; + xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries)); xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries); @@ -1579,6 +1584,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu struct dma_fence *fence = NULL; struct invalidation_fence *ifence; struct xe_range_fence *rfence; + int err; LLIST_HEAD(deferred); @@ -1596,6 +1602,12 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu xe_pt_calc_rfence_interval(vma, &unbind_pt_update, entries, num_entries); + err = dma_resv_reserve_fences(xe_vm_resv(vm), 1); + if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) + err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1); + if (err) + return ERR_PTR(err); + ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); if (!ifence) return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 1a6f2f51ba..075f9eaef0 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -198,7 +198,7 @@ static int query_engines(struct xe_device *xe, return -EINVAL; } - engines = kmalloc(size, GFP_KERNEL); + engines = kzalloc(size, GFP_KERNEL); if (!engines) return -ENOMEM; @@ -212,14 +212,10 @@ static int query_engines(struct xe_device *xe, engines->engines[i].instance.engine_instance = hwe->logical_instance; engines->engines[i].instance.gt_id = gt->info.id; - engines->engines[i].instance.pad = 0; - memset(engines->engines[i].reserved, 0, - sizeof(engines->engines[i].reserved)); i++; } - engines->pad = 0; engines->num_engines = i; if (copy_to_user(query_ptr, engines, size)) { @@ -520,6 +516,49 @@ static int query_gt_topology(struct xe_device *xe, return 0; } +static int +query_uc_fw_version(struct xe_device *xe, struct drm_xe_device_query *query) +{ + struct drm_xe_query_uc_fw_version __user *query_ptr = u64_to_user_ptr(query->data); + size_t size = sizeof(struct drm_xe_query_uc_fw_version); + struct drm_xe_query_uc_fw_version resp; + struct xe_uc_fw_version *version = NULL; + + if (query->size == 0) { + query->size = size; + return 0; + } else if (XE_IOCTL_DBG(xe, query->size != size)) { + return -EINVAL; + } + + if (copy_from_user(&resp, query_ptr, size)) + return -EFAULT; + + if (XE_IOCTL_DBG(xe, resp.pad || resp.pad2 || resp.reserved)) + return -EINVAL; + + switch (resp.uc_type) { + case XE_QUERY_UC_TYPE_GUC_SUBMISSION: { + struct xe_guc *guc = &xe->tiles[0].primary_gt->uc.guc; + + version = &guc->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY]; + break; + } + default: + return -EINVAL; + } + + resp.branch_ver = 0; + resp.major_ver = version->major; + resp.minor_ver = version->minor; + resp.patch_ver = version->patch; + + if (copy_to_user(query_ptr, &resp, size)) + return -EFAULT; + + return 0; +} + static int (* const xe_query_funcs[])(struct xe_device *xe, struct drm_xe_device_query *query) = { query_engines, @@ -529,6 +568,7 @@ static int (* const xe_query_funcs[])(struct xe_device *xe, query_hwconfig, query_gt_topology, query_engine_cycles, + query_uc_fw_version, }; int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file) diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c index 87adefb560..440ac572f6 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.c +++ b/drivers/gpu/drm/xe/xe_reg_sr.c @@ -231,7 +231,7 @@ void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe) if (err) goto err_force_wake; - p = drm_debug_printer(KBUILD_MODNAME); + p = drm_dbg_printer(&xe->drm, DRM_UT_DRIVER, NULL); xa_for_each(&sr->xa, reg, entry) { if (slot == RING_MAX_NONPRIV_SLOTS) { xe_gt_err(gt, diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c index e66ae1bdaf..3fa2ece7d2 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -7,9 +7,11 @@ #include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" +#include "regs/xe_regs.h" #include "xe_gt_types.h" #include "xe_platform_types.h" #include "xe_rtp.h" +#include "xe_step.h" #undef XE_REG_MCR #define XE_REG_MCR(...) XE_REG(__VA_ARGS__, .mcr = 1) @@ -56,6 +58,12 @@ static const struct xe_rtp_entry_sr register_whitelist[] = { RING_FORCE_TO_NONPRIV_DENY, XE_RTP_ACTION_FLAG(ENGINE_BASE))) }, + { XE_RTP_NAME("16020183090"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0), + ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(WHITELIST(CSBE_DEBUG_STATUS(RENDER_RING_BASE), 0)) + }, + {} }; diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 68495f8858..820c8d73c4 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -5,7 +5,8 @@ #include "xe_ring_ops.h" -#include "generated/xe_wa_oob.h" +#include <generated/xe_wa_oob.h> + #include "instructions/xe_mi_commands.h" #include "regs/xe_engine_regs.h" #include "regs/xe_gpu_commands.h" @@ -78,6 +79,16 @@ static int emit_store_imm_ggtt(u32 addr, u32 value, u32 *dw, int i) return i; } +static int emit_flush_dw(u32 *dw, int i) +{ + dw[i++] = MI_FLUSH_DW | MI_FLUSH_IMM_DW; + dw[i++] = 0; + dw[i++] = 0; + dw[i++] = 0; + + return i; +} + static int emit_flush_imm_ggtt(u32 addr, u32 value, bool invalidate_tlb, u32 *dw, int i) { @@ -113,6 +124,19 @@ static int emit_flush_invalidate(u32 flag, u32 *dw, int i) return i; } +static int +emit_pipe_control(u32 *dw, int i, u32 bit_group_0, u32 bit_group_1, u32 offset, u32 value) +{ + dw[i++] = GFX_OP_PIPE_CONTROL(6) | bit_group_0; + dw[i++] = bit_group_1; + dw[i++] = offset; + dw[i++] = 0; + dw[i++] = value; + dw[i++] = 0; + + return i; +} + static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw, int i) { @@ -131,14 +155,7 @@ static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw, flags &= ~mask_flags; - dw[i++] = GFX_OP_PIPE_CONTROL(6); - dw[i++] = flags; - dw[i++] = LRC_PPHWSP_SCRATCH_ADDR; - dw[i++] = 0; - dw[i++] = 0; - dw[i++] = 0; - - return i; + return emit_pipe_control(dw, i, 0, flags, LRC_PPHWSP_SCRATCH_ADDR, 0); } static int emit_store_imm_ppgtt_posted(u64 addr, u64 value, @@ -174,14 +191,7 @@ static int emit_render_cache_flush(struct xe_sched_job *job, u32 *dw, int i) else if (job->q->class == XE_ENGINE_CLASS_COMPUTE) flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS; - dw[i++] = GFX_OP_PIPE_CONTROL(6) | PIPE_CONTROL0_HDC_PIPELINE_FLUSH; - dw[i++] = flags; - dw[i++] = 0; - dw[i++] = 0; - dw[i++] = 0; - dw[i++] = 0; - - return i; + return emit_pipe_control(dw, i, PIPE_CONTROL0_HDC_PIPELINE_FLUSH, flags, 0, 0); } static int emit_pipe_control_to_ring_end(struct xe_hw_engine *hwe, u32 *dw, int i) @@ -189,14 +199,9 @@ static int emit_pipe_control_to_ring_end(struct xe_hw_engine *hwe, u32 *dw, int if (hwe->class != XE_ENGINE_CLASS_RENDER) return i; - if (XE_WA(hwe->gt, 16020292621)) { - dw[i++] = GFX_OP_PIPE_CONTROL(6); - dw[i++] = PIPE_CONTROL_LRI_POST_SYNC; - dw[i++] = RING_NOPID(hwe->mmio_base).addr; - dw[i++] = 0; - dw[i++] = 0; - dw[i++] = 0; - } + if (XE_WA(hwe->gt, 16020292621)) + i = emit_pipe_control(dw, i, 0, PIPE_CONTROL_LRI_POST_SYNC, + RING_NOPID(hwe->mmio_base).addr, 0); return i; } @@ -204,16 +209,13 @@ static int emit_pipe_control_to_ring_end(struct xe_hw_engine *hwe, u32 *dw, int static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw, int i) { - dw[i++] = GFX_OP_PIPE_CONTROL(6); - dw[i++] = (stall_only ? PIPE_CONTROL_CS_STALL : - PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_CS_STALL) | - PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_QW_WRITE; - dw[i++] = addr; - dw[i++] = 0; - dw[i++] = value; - dw[i++] = 0; /* We're thrashing one extra dword. */ + u32 flags = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_QW_WRITE; - return i; + if (!stall_only) + flags |= PIPE_CONTROL_FLUSH_ENABLE; + + return emit_pipe_control(dw, i, 0, flags, addr, value); } static u32 get_ppgtt_flag(struct xe_sched_job *job) @@ -241,10 +243,12 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc i = emit_bb_start(batch_addr, ppgtt_flag, dw, i); - if (job->user_fence.used) + if (job->user_fence.used) { + i = emit_flush_dw(dw, i); i = emit_store_imm_ppgtt_posted(job->user_fence.addr, job->user_fence.value, dw, i); + } i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i); @@ -300,10 +304,12 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, i = emit_bb_start(batch_addr, ppgtt_flag, dw, i); - if (job->user_fence.used) + if (job->user_fence.used) { + i = emit_flush_dw(dw, i); i = emit_store_imm_ppgtt_posted(job->user_fence.addr, job->user_fence.value, dw, i); + } i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i); diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index d07b9ee0ea..b0c7fa4693 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -287,3 +287,41 @@ int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm) return drm_sched_job_add_dependency(&job->drm, fence); } + +struct xe_sched_job_snapshot * +xe_sched_job_snapshot_capture(struct xe_sched_job *job) +{ + struct xe_exec_queue *q = job->q; + struct xe_device *xe = q->gt->tile->xe; + struct xe_sched_job_snapshot *snapshot; + size_t len = sizeof(*snapshot) + (sizeof(u64) * q->width); + u16 i; + + snapshot = kzalloc(len, GFP_ATOMIC); + if (!snapshot) + return NULL; + + snapshot->batch_addr_len = q->width; + for (i = 0; i < q->width; i++) + snapshot->batch_addr[i] = xe_device_uncanonicalize_addr(xe, job->batch_addr[i]); + + return snapshot; +} + +void xe_sched_job_snapshot_free(struct xe_sched_job_snapshot *snapshot) +{ + kfree(snapshot); +} + +void +xe_sched_job_snapshot_print(struct xe_sched_job_snapshot *snapshot, + struct drm_printer *p) +{ + u16 i; + + if (!snapshot) + return; + + for (i = 0; i < snapshot->batch_addr_len; i++) + drm_printf(p, "batch_addr[%u]: 0x%016llx\n", i, snapshot->batch_addr[i]); +} diff --git a/drivers/gpu/drm/xe/xe_sched_job.h b/drivers/gpu/drm/xe/xe_sched_job.h index 34f475ba7f..f1a660648c 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.h +++ b/drivers/gpu/drm/xe/xe_sched_job.h @@ -8,6 +8,7 @@ #include "xe_sched_job_types.h" +struct drm_printer; struct xe_vm; #define XE_SCHED_HANG_LIMIT 1 @@ -77,4 +78,8 @@ xe_sched_job_add_migrate_flush(struct xe_sched_job *job, u32 flags) bool xe_sched_job_is_migration(struct xe_exec_queue *q); +struct xe_sched_job_snapshot *xe_sched_job_snapshot_capture(struct xe_sched_job *job); +void xe_sched_job_snapshot_free(struct xe_sched_job_snapshot *snapshot); +void xe_sched_job_snapshot_print(struct xe_sched_job_snapshot *snapshot, struct drm_printer *p); + #endif diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h index 46ead63459..5e12724219 100644 --- a/drivers/gpu/drm/xe/xe_sched_job_types.h +++ b/drivers/gpu/drm/xe/xe_sched_job_types.h @@ -30,11 +30,11 @@ struct xe_sched_job { struct dma_fence *fence; /** @user_fence: write back value when BB is complete */ struct { - /** @used: user fence is used */ + /** @user_fence.used: user fence is used */ bool used; - /** @addr: address to write to */ + /** @user_fence.addr: address to write to */ u64 addr; - /** @value: write back value */ + /** @user_fence.value: write back value */ u64 value; } user_fence; /** @migrate_flush_flags: Additional flush flags for migration jobs */ @@ -45,4 +45,9 @@ struct xe_sched_job { u64 batch_addr[]; }; +struct xe_sched_job_snapshot { + u16 batch_addr_len; + u64 batch_addr[]; +}; + #endif diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c index 42a0e0c917..f295d91886 100644 --- a/drivers/gpu/drm/xe/xe_sriov.c +++ b/drivers/gpu/drm/xe/xe_sriov.c @@ -3,6 +3,8 @@ * Copyright © 2023 Intel Corporation */ +#include <drm/drm_managed.h> + #include "xe_assert.h" #include "xe_sriov.h" @@ -53,3 +55,33 @@ void xe_sriov_probe_early(struct xe_device *xe, bool has_sriov) drm_info(&xe->drm, "Running in %s mode\n", xe_sriov_mode_to_string(xe_device_sriov_mode(xe))); } + +static void fini_sriov(struct drm_device *drm, void *arg) +{ + struct xe_device *xe = arg; + + destroy_workqueue(xe->sriov.wq); + xe->sriov.wq = NULL; +} + +/** + * xe_sriov_init - Initialize SR-IOV specific data. + * @xe: the &xe_device to initialize + * + * In this function we create dedicated workqueue that will be used + * by the SR-IOV specific workers. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_init(struct xe_device *xe) +{ + if (!IS_SRIOV(xe)) + return 0; + + xe_assert(xe, !xe->sriov.wq); + xe->sriov.wq = alloc_workqueue("xe-sriov-wq", 0, 0); + if (!xe->sriov.wq) + return -ENOMEM; + + return drmm_add_action_or_reset(&xe->drm, fini_sriov, xe); +} diff --git a/drivers/gpu/drm/xe/xe_sriov.h b/drivers/gpu/drm/xe/xe_sriov.h index 5af73a3172..1545552162 100644 --- a/drivers/gpu/drm/xe/xe_sriov.h +++ b/drivers/gpu/drm/xe/xe_sriov.h @@ -13,6 +13,7 @@ const char *xe_sriov_mode_to_string(enum xe_sriov_mode mode); void xe_sriov_probe_early(struct xe_device *xe, bool has_sriov); +int xe_sriov_init(struct xe_device *xe); static inline enum xe_sriov_mode xe_device_sriov_mode(struct xe_device *xe) { diff --git a/drivers/gpu/drm/xe/xe_sriov_types.h b/drivers/gpu/drm/xe/xe_sriov_types.h index 999a4311b9..1a138108d1 100644 --- a/drivers/gpu/drm/xe/xe_sriov_types.h +++ b/drivers/gpu/drm/xe/xe_sriov_types.h @@ -9,6 +9,18 @@ #include <linux/build_bug.h> /** + * VFID - Virtual Function Identifier + * @n: VF number + * + * Helper macro to represent Virtual Function (VF) Identifier. + * VFID(0) is used as alias to the PFID that represents Physical Function. + * + * Note: According to PCI spec, SR-IOV VF's numbers are 1-based (VF1, VF2, ...). + */ +#define VFID(n) (n) +#define PFID VFID(0) + +/** * enum xe_sriov_mode - SR-IOV mode * @XE_SRIOV_MODE_NONE: bare-metal mode (non-virtualized) * @XE_SRIOV_MODE_PF: SR-IOV Physical Function (PF) mode diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs.c b/drivers/gpu/drm/xe/xe_tile_sysfs.c index 0f8d3e7fce..0662968d7b 100644 --- a/drivers/gpu/drm/xe/xe_tile_sysfs.c +++ b/drivers/gpu/drm/xe/xe_tile_sysfs.c @@ -9,6 +9,7 @@ #include "xe_tile.h" #include "xe_tile_sysfs.h" +#include "xe_vram_freq.h" static void xe_tile_sysfs_kobj_release(struct kobject *kobj) { @@ -50,6 +51,8 @@ void xe_tile_sysfs_init(struct xe_tile *tile) tile->sysfs = &kt->base; + xe_vram_freq_sysfs_init(tile); + err = drmm_add_action_or_reset(&xe->drm, tile_sysfs_fini, tile); if (err) drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n", diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index e5d7d5e2be..3107d2a124 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -11,7 +11,8 @@ #include <drm/ttm/ttm_placement.h> #include <drm/ttm/ttm_range_manager.h> -#include "generated/xe_wa_oob.h" +#include <generated/xe_wa_oob.h> + #include "regs/xe_gt_regs.h" #include "regs/xe_regs.h" #include "xe_bo.h" @@ -19,6 +20,7 @@ #include "xe_gt.h" #include "xe_mmio.h" #include "xe_res_cursor.h" +#include "xe_sriov.h" #include "xe_ttm_stolen_mgr.h" #include "xe_ttm_vram_mgr.h" #include "xe_wa.h" @@ -205,7 +207,9 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe) u64 stolen_size, io_size, pgsize; int err; - if (IS_DGFX(xe)) + if (IS_SRIOV_VF(xe)) + stolen_size = 0; + else if (IS_DGFX(xe)) stolen_size = detect_bar2_dgfx(xe, mgr); else if (GRAPHICS_VERx100(xe) >= 1270) stolen_size = detect_bar2_integrated(xe, mgr); diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index 53ccd338fd..5c83c75bc4 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -37,7 +37,14 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { XE_RTP_ACTIONS(FIELD_SET(XE2LPM_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) }, - + { XE_RTP_NAME("Tuning: Compression Overfetch"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2004, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_ACTIONS(CLR(CCCHKNREG1, ENCOMPPERFFIX)), + }, + { XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2004, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_ACTIONS(SET(L3SQCREG3, COMPPWOVERFETCHEN)) + }, {} }; diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index 25e1ddfd2f..7033f8c1b4 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -7,8 +7,10 @@ #include "xe_device.h" #include "xe_gsc.h" +#include "xe_gsc_proxy.h" #include "xe_gt.h" #include "xe_guc.h" +#include "xe_guc_db_mgr.h" #include "xe_guc_pc.h" #include "xe_guc_submit.h" #include "xe_huc.h" @@ -30,13 +32,15 @@ uc_to_xe(struct xe_uc *uc) /* Should be called once at driver load only */ int xe_uc_init(struct xe_uc *uc) { + struct xe_device *xe = uc_to_xe(uc); int ret; + xe_device_mem_access_get(xe); + /* * We call the GuC/HuC/GSC init functions even if GuC submission is off * to correctly move our tracking of the FW state to "disabled". */ - ret = xe_guc_init(&uc->guc); if (ret) goto err; @@ -50,7 +54,7 @@ int xe_uc_init(struct xe_uc *uc) goto err; if (!xe_device_uc_enabled(uc_to_xe(uc))) - return 0; + goto err; ret = xe_wopcm_init(&uc->wopcm); if (ret) @@ -60,9 +64,17 @@ int xe_uc_init(struct xe_uc *uc) if (ret) goto err; + ret = xe_guc_db_mgr_init(&uc->guc.dbm, ~0); + if (ret) + goto err; + + xe_device_mem_access_put(xe); + return 0; err: + xe_device_mem_access_put(xe); + return ret; } @@ -88,6 +100,10 @@ int xe_uc_init_post_hwconfig(struct xe_uc *uc) if (err) return err; + err = xe_huc_init_post_hwconfig(&uc->huc); + if (err) + return err; + return xe_gsc_init_post_hwconfig(&uc->gsc); } @@ -256,3 +272,16 @@ int xe_uc_suspend(struct xe_uc *uc) return xe_guc_suspend(&uc->guc); } + +/** + * xe_uc_remove() - Clean up the UC structures before driver removal + * @uc: the UC object + * + * This function should only act on objects/structures that must be cleaned + * before the driver removal callback is complete and therefore can't be + * deferred to a drmm action. + */ +void xe_uc_remove(struct xe_uc *uc) +{ + xe_gsc_remove(&uc->gsc); +} diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h index 5d5110c0c8..e4d4e3c99f 100644 --- a/drivers/gpu/drm/xe/xe_uc.h +++ b/drivers/gpu/drm/xe/xe_uc.h @@ -20,5 +20,6 @@ int xe_uc_stop(struct xe_uc *uc); int xe_uc_start(struct xe_uc *uc); int xe_uc_suspend(struct xe_uc *uc); int xe_uc_sanitize_reset(struct xe_uc *uc); +void xe_uc_remove(struct xe_uc *uc); #endif diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 9dff96dfe4..a9d25b3fa6 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -92,6 +92,7 @@ struct uc_fw_entry { const char *path; u16 major; u16 minor; + u16 patch; bool full_ver_required; }; }; @@ -102,14 +103,15 @@ struct fw_blobs_by_type { }; #define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \ - fw_def(METEORLAKE, major_ver(i915, guc, mtl, 70, 7)) \ - fw_def(DG2, major_ver(i915, guc, dg2, 70, 5)) \ - fw_def(DG1, major_ver(i915, guc, dg1, 70, 5)) \ - fw_def(ALDERLAKE_N, major_ver(i915, guc, tgl, 70, 5)) \ - fw_def(ALDERLAKE_P, major_ver(i915, guc, adlp, 70, 5)) \ - fw_def(ALDERLAKE_S, major_ver(i915, guc, tgl, 70, 5)) \ - fw_def(ROCKETLAKE, major_ver(i915, guc, tgl, 70, 5)) \ - fw_def(TIGERLAKE, major_ver(i915, guc, tgl, 70, 5)) + fw_def(LUNARLAKE, major_ver(xe, guc, lnl, 70, 19, 2)) \ + fw_def(METEORLAKE, major_ver(i915, guc, mtl, 70, 19, 2)) \ + fw_def(DG2, major_ver(i915, guc, dg2, 70, 19, 2)) \ + fw_def(DG1, major_ver(i915, guc, dg1, 70, 19, 2)) \ + fw_def(ALDERLAKE_N, major_ver(i915, guc, tgl, 70, 19, 2)) \ + fw_def(ALDERLAKE_P, major_ver(i915, guc, adlp, 70, 19, 2)) \ + fw_def(ALDERLAKE_S, major_ver(i915, guc, tgl, 70, 19, 2)) \ + fw_def(ROCKETLAKE, major_ver(i915, guc, tgl, 70, 19, 2)) \ + fw_def(TIGERLAKE, major_ver(i915, guc, tgl, 70, 19, 2)) #define XE_HUC_FIRMWARE_DEFS(fw_def, mmp_ver, no_ver) \ fw_def(METEORLAKE, no_ver(i915, huc_gsc, mtl)) \ @@ -121,24 +123,24 @@ struct fw_blobs_by_type { /* for the GSC FW we match the compatibility version and not the release one */ #define XE_GSC_FIRMWARE_DEFS(fw_def, major_ver) \ - fw_def(METEORLAKE, major_ver(i915, gsc, mtl, 1, 0)) + fw_def(METEORLAKE, major_ver(i915, gsc, mtl, 1, 0, 0)) #define MAKE_FW_PATH(dir__, uc__, shortname__, version__) \ __stringify(dir__) "/" __stringify(shortname__) "_" __stringify(uc__) version__ ".bin" #define fw_filename_mmp_ver(dir_, uc_, shortname_, a, b, c) \ MAKE_FW_PATH(dir_, uc_, shortname_, "_" __stringify(a ## . ## b ## . ## c)) -#define fw_filename_major_ver(dir_, uc_, shortname_, a, b) \ +#define fw_filename_major_ver(dir_, uc_, shortname_, a, b, c) \ MAKE_FW_PATH(dir_, uc_, shortname_, "_" __stringify(a)) #define fw_filename_no_ver(dir_, uc_, shortname_) \ MAKE_FW_PATH(dir_, uc_, shortname_, "") #define uc_fw_entry_mmp_ver(dir_, uc_, shortname_, a, b, c) \ { fw_filename_mmp_ver(dir_, uc_, shortname_, a, b, c), \ - a, b, true } -#define uc_fw_entry_major_ver(dir_, uc_, shortname_, a, b) \ - { fw_filename_major_ver(dir_, uc_, shortname_, a, b), \ - a, b } + a, b, c, true } +#define uc_fw_entry_major_ver(dir_, uc_, shortname_, a, b, c) \ + { fw_filename_major_ver(dir_, uc_, shortname_, a, b, c), \ + a, b, c } #define uc_fw_entry_no_ver(dir_, uc_, shortname_) \ { fw_filename_no_ver(dir_, uc_, shortname_), \ 0, 0 } @@ -221,6 +223,7 @@ uc_fw_auto_select(struct xe_device *xe, struct xe_uc_fw *uc_fw) uc_fw->path = entries[i].path; uc_fw->versions.wanted.major = entries[i].major; uc_fw->versions.wanted.minor = entries[i].minor; + uc_fw->versions.wanted.patch = entries[i].patch; uc_fw->full_ver_required = entries[i].full_ver_required; if (uc_fw->type == XE_UC_FW_TYPE_GSC) @@ -340,19 +343,22 @@ int xe_uc_fw_check_version_requirements(struct xe_uc_fw *uc_fw) * Otherwise, at least the major version. */ if (wanted->major != found->major || - (uc_fw->full_ver_required && wanted->minor != found->minor)) { - drm_notice(&xe->drm, "%s firmware %s: unexpected version: %u.%u != %u.%u\n", + (uc_fw->full_ver_required && + ((wanted->minor != found->minor) || + (wanted->patch != found->patch)))) { + drm_notice(&xe->drm, "%s firmware %s: unexpected version: %u.%u.%u != %u.%u.%u\n", xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, - found->major, found->minor, - wanted->major, wanted->minor); + found->major, found->minor, found->patch, + wanted->major, wanted->minor, wanted->patch); goto fail; } - if (wanted->minor > found->minor) { - drm_notice(&xe->drm, "%s firmware (%u.%u) is recommended, but only (%u.%u) was found in %s\n", + if (wanted->minor > found->minor || + (wanted->minor == found->minor && wanted->patch > found->patch)) { + drm_notice(&xe->drm, "%s firmware (%u.%u.%u) is recommended, but only (%u.%u.%u) was found in %s\n", xe_uc_fw_type_repr(uc_fw->type), - wanted->major, wanted->minor, - found->major, found->minor, + wanted->major, wanted->minor, wanted->patch, + found->major, found->minor, found->patch, uc_fw->path); drm_info(&xe->drm, "Consider updating your linux-firmware pkg or downloading from %s\n", XE_UC_FIRMWARE_URL); @@ -652,14 +658,18 @@ static int uc_fw_request(struct xe_uc_fw *uc_fw, const struct firmware **firmwar xe_assert(xe, !uc_fw->path); uc_fw_auto_select(xe, uc_fw); + uc_fw_override(uc_fw); xe_uc_fw_change_status(uc_fw, uc_fw->path ? XE_UC_FIRMWARE_SELECTED : XE_UC_FIRMWARE_NOT_SUPPORTED); - if (!xe_uc_fw_is_supported(uc_fw)) + if (!xe_uc_fw_is_supported(uc_fw)) { + if (uc_fw->type == XE_UC_FW_TYPE_GUC) { + drm_err(&xe->drm, "No GuC firmware defined for platform\n"); + return -ENOENT; + } return 0; - - uc_fw_override(uc_fw); + } /* an empty path means the firmware is disabled */ if (!xe_device_uc_enabled(xe) || !(*uc_fw->path)) { diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h index ee914a5d85..bc800b6968 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw_types.h +++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h @@ -124,11 +124,14 @@ struct xe_uc_fw { /** @versions: FW versions wanted and found */ struct { - /** @wanted: firmware version wanted by platform */ + /** @versions.wanted: firmware version wanted by platform */ struct xe_uc_fw_version wanted; - /** @wanted_type: type of firmware version wanted (release vs compatibility) */ + /** + * @versions.wanted_type: type of firmware version wanted + * (release vs compatibility) + */ enum xe_uc_fw_version_types wanted_type; - /** @found: fw versions found in firmware blob */ + /** @versions.found: fw versions found in firmware blob */ struct xe_uc_fw_version found[XE_UC_FW_VER_TYPE_COUNT]; } versions; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index a2024247be..32cd0c978a 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -13,11 +13,14 @@ #include <drm/ttm/ttm_execbuf_util.h> #include <drm/ttm/ttm_tt.h> #include <drm/xe_drm.h> +#include <linux/ascii85.h> #include <linux/delay.h> #include <linux/kthread.h> #include <linux/mm.h> #include <linux/swap.h> +#include <generated/xe_wa_oob.h> + #include "xe_assert.h" #include "xe_bo.h" #include "xe_device.h" @@ -34,7 +37,6 @@ #include "xe_res_cursor.h" #include "xe_sync.h" #include "xe_trace.h" -#include "generated/xe_wa_oob.h" #include "xe_wa.h" static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) @@ -480,17 +482,53 @@ static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) return 0; } +/** + * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas + * @vm: The vm for which we are rebinding. + * @exec: The struct drm_exec with the locked GEM objects. + * @num_fences: The number of fences to reserve for the operation, not + * including rebinds and validations. + * + * Validates all evicted gem objects and rebinds their vmas. Note that + * rebindings may cause evictions and hence the validation-rebind + * sequence is rerun until there are no more objects to validate. + * + * Return: 0 on success, negative error code on error. In particular, + * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if + * the drm_exec transaction needs to be restarted. + */ +int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, + unsigned int num_fences) +{ + struct drm_gem_object *obj; + unsigned long index; + int ret; + + do { + ret = drm_gpuvm_validate(&vm->gpuvm, exec); + if (ret) + return ret; + + ret = xe_vm_rebind(vm, false); + if (ret) + return ret; + } while (!list_empty(&vm->gpuvm.evict.list)); + + drm_exec_for_each_locked_object(exec, index, obj) { + ret = dma_resv_reserve_fences(obj->resv, num_fences); + if (ret) + return ret; + } + + return 0; +} + static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, bool *done) { int err; - /* - * 1 fence for each preempt fence plus a fence for each tile from a - * possible rebind - */ - err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, vm->preempt.num_exec_queues + - vm->xe->info.tile_count); + err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0); if (err) return err; @@ -505,7 +543,7 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, return 0; } - err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, vm->preempt.num_exec_queues); + err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0); if (err) return err; @@ -513,7 +551,13 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, if (err) return err; - return drm_gpuvm_validate(&vm->gpuvm, exec); + /* + * Add validation and rebinding to the locking loop since both can + * cause evictions which may require blocing dma_resv locks. + * The fence reservation here is intended for the new preempt fences + * we attach at the end of the rebind work. + */ + return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues); } static void preempt_rebind_work_func(struct work_struct *w) @@ -801,6 +845,7 @@ static void xe_vma_free(struct xe_vma *vma) #define VMA_CREATE_FLAG_READ_ONLY BIT(0) #define VMA_CREATE_FLAG_IS_NULL BIT(1) +#define VMA_CREATE_FLAG_DUMPABLE BIT(2) static struct xe_vma *xe_vma_create(struct xe_vm *vm, struct xe_bo *bo, @@ -813,6 +858,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, u8 id; bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY); bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL); + bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE); xe_assert(vm->xe, start < end); xe_assert(vm->xe, end < vm->size); @@ -847,6 +893,8 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, vma->gpuva.va.range = end - start + 1; if (read_only) vma->gpuva.flags |= XE_VMA_READ_ONLY; + if (dumpable) + vma->gpuva.flags |= XE_VMA_DUMPABLE; for_each_tile(tile, vm->xe, id) vma->tile_mask |= 0x1 << id; @@ -990,35 +1038,26 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) } /** - * xe_vm_prepare_vma() - drm_exec utility to lock a vma + * xe_vm_lock_vma() - drm_exec utility to lock a vma * @exec: The drm_exec object we're currently locking for. * @vma: The vma for witch we want to lock the vm resv and any attached * object's resv. - * @num_shared: The number of dma-fence slots to pre-allocate in the - * objects' reservation objects. * * Return: 0 on success, negative error code on error. In particular * may return -EDEADLK on WW transaction contention and -EINTR if * an interruptible wait is terminated by a signal. */ -int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma, - unsigned int num_shared) +int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma) { struct xe_vm *vm = xe_vma_vm(vma); struct xe_bo *bo = xe_vma_bo(vma); int err; XE_WARN_ON(!vm); - if (num_shared) - err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared); - else - err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); - if (!err && bo && !bo->vm) { - if (num_shared) - err = drm_exec_prepare_obj(exec, &bo->ttm.base, num_shared); - else - err = drm_exec_lock_obj(exec, &bo->ttm.base); - } + + err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); + if (!err && bo && !bo->vm) + err = drm_exec_lock_obj(exec, &bo->ttm.base); return err; } @@ -1030,7 +1069,7 @@ static void xe_vma_destroy_unlocked(struct xe_vma *vma) drm_exec_init(&exec, 0, 0); drm_exec_until_all_locked(&exec) { - err = xe_vm_prepare_vma(&exec, vma, 0); + err = xe_vm_lock_vma(&exec, vma); drm_exec_retry_on_contention(&exec); if (XE_WARN_ON(err)) break; @@ -1065,7 +1104,9 @@ static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma) xe_assert(vm->xe, xe_vma_vm(vma) == vm); lockdep_assert_held(&vm->lock); + mutex_lock(&vm->snap_mutex); err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva); + mutex_unlock(&vm->snap_mutex); XE_WARN_ON(err); /* Shouldn't be possible */ return err; @@ -1076,7 +1117,9 @@ static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma) xe_assert(vm->xe, xe_vma_vm(vma) == vm); lockdep_assert_held(&vm->lock); + mutex_lock(&vm->snap_mutex); drm_gpuva_remove(&vma->gpuva); + mutex_unlock(&vm->snap_mutex); if (vm->usm.last_fault_vma == vma) vm->usm.last_fault_vma = NULL; } @@ -1095,7 +1138,7 @@ static struct drm_gpuva_op *xe_vm_op_alloc(void) static void xe_vm_free(struct drm_gpuvm *gpuvm); -static struct drm_gpuvm_ops gpuvm_ops = { +static const struct drm_gpuvm_ops gpuvm_ops = { .op_alloc = xe_vm_op_alloc, .vm_bo_validate = xe_gpuvm_validate, .vm_free = xe_vm_free, @@ -1303,6 +1346,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) vm->flags = flags; init_rwsem(&vm->lock); + mutex_init(&vm->snap_mutex); INIT_LIST_HEAD(&vm->rebind_list); @@ -1428,6 +1472,7 @@ err_close: return ERR_PTR(err); err_no_resv: + mutex_destroy(&vm->snap_mutex); for_each_tile(tile, xe, id) xe_range_fence_tree_fini(&vm->rftree[id]); kfree(vm); @@ -1532,6 +1577,16 @@ void xe_vm_close_and_put(struct xe_vm *vm) xe->usm.num_vm_in_fault_mode--; else if (!(vm->flags & XE_VM_FLAG_MIGRATION)) xe->usm.num_vm_in_non_fault_mode--; + + if (vm->usm.asid) { + void *lookup; + + xe_assert(xe, xe->info.has_asid); + xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION)); + + lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); + xe_assert(xe, lookup == vm); + } mutex_unlock(&xe->usm.lock); for_each_tile(tile, xe, id) @@ -1547,21 +1602,17 @@ static void vm_destroy_work_func(struct work_struct *w) struct xe_device *xe = vm->xe; struct xe_tile *tile; u8 id; - void *lookup; /* xe_vm_close_and_put was not called? */ xe_assert(xe, !vm->size); - if (!(vm->flags & XE_VM_FLAG_MIGRATION)) { - xe_device_mem_access_put(xe); + if (xe_vm_in_preempt_fence_mode(vm)) + flush_work(&vm->preempt.rebind_work); - if (xe->info.has_asid && vm->usm.asid) { - mutex_lock(&xe->usm.lock); - lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); - xe_assert(xe, lookup == vm); - mutex_unlock(&xe->usm.lock); - } - } + mutex_destroy(&vm->snap_mutex); + + if (!(vm->flags & XE_VM_FLAG_MIGRATION)) + xe_device_mem_access_put(xe); for_each_tile(tile, xe, id) XE_WARN_ON(vm->pt_root[id]); @@ -2163,6 +2214,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, if (__op->op == DRM_GPUVA_OP_MAP) { op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; + op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE; op->map.pat_index = pat_index; } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { op->prefetch.region = prefetch_region; @@ -2328,6 +2380,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, struct xe_sync_entry *syncs, u32 num_syncs, struct list_head *ops_list, bool last) { + struct xe_device *xe = vm->xe; struct xe_vma_op *last_op = NULL; struct drm_gpuva_op *__op; int err = 0; @@ -2356,6 +2409,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, { flags |= op->map.is_null ? VMA_CREATE_FLAG_IS_NULL : 0; + flags |= op->map.dumpable ? + VMA_CREATE_FLAG_DUMPABLE : 0; vma = new_vma(vm, &op->base.map, op->map.pat_index, flags); @@ -2380,6 +2435,9 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, flags |= op->base.remap.unmap->va->flags & DRM_GPUVA_SPARSE ? VMA_CREATE_FLAG_IS_NULL : 0; + flags |= op->base.remap.unmap->va->flags & + XE_VMA_DUMPABLE ? + VMA_CREATE_FLAG_DUMPABLE : 0; vma = new_vma(vm, op->base.remap.prev, old->pat_index, flags); @@ -2401,6 +2459,9 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, xe_vma_end(vma) - xe_vma_start(old); op->remap.start = xe_vma_end(vma); + vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx", + (ULL)op->remap.start, + (ULL)op->remap.range); } } @@ -2411,6 +2472,9 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, flags |= op->base.remap.unmap->va->flags & DRM_GPUVA_SPARSE ? VMA_CREATE_FLAG_IS_NULL : 0; + flags |= op->base.remap.unmap->va->flags & + XE_VMA_DUMPABLE ? + VMA_CREATE_FLAG_DUMPABLE : 0; vma = new_vma(vm, op->base.remap.next, old->pat_index, flags); @@ -2431,6 +2495,9 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, op->remap.range -= xe_vma_end(old) - xe_vma_start(vma); + vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx", + (ULL)op->remap.start, + (ULL)op->remap.range); } } break; @@ -2473,7 +2540,7 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm, lockdep_assert_held_write(&vm->lock); - err = xe_vm_prepare_vma(exec, vma, 1); + err = xe_vm_lock_vma(exec, vma); if (err) return err; @@ -3284,3 +3351,168 @@ int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id) return 0; } + +struct xe_vm_snapshot { + unsigned long num_snaps; + struct { + u64 ofs, bo_ofs; + unsigned long len; + struct xe_bo *bo; + void *data; + struct mm_struct *mm; + } snap[]; +}; + +struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm) +{ + unsigned long num_snaps = 0, i; + struct xe_vm_snapshot *snap = NULL; + struct drm_gpuva *gpuva; + + if (!vm) + return NULL; + + mutex_lock(&vm->snap_mutex); + drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { + if (gpuva->flags & XE_VMA_DUMPABLE) + num_snaps++; + } + + if (num_snaps) + snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT); + if (!snap) + goto out_unlock; + + snap->num_snaps = num_snaps; + i = 0; + drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { + struct xe_vma *vma = gpuva_to_vma(gpuva); + struct xe_bo *bo = vma->gpuva.gem.obj ? + gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; + + if (!(gpuva->flags & XE_VMA_DUMPABLE)) + continue; + + snap->snap[i].ofs = xe_vma_start(vma); + snap->snap[i].len = xe_vma_size(vma); + if (bo) { + snap->snap[i].bo = xe_bo_get(bo); + snap->snap[i].bo_ofs = xe_vma_bo_offset(vma); + } else if (xe_vma_is_userptr(vma)) { + struct mm_struct *mm = + to_userptr_vma(vma)->userptr.notifier.mm; + + if (mmget_not_zero(mm)) + snap->snap[i].mm = mm; + else + snap->snap[i].data = ERR_PTR(-EFAULT); + + snap->snap[i].bo_ofs = xe_vma_userptr(vma); + } else { + snap->snap[i].data = ERR_PTR(-ENOENT); + } + i++; + } + +out_unlock: + mutex_unlock(&vm->snap_mutex); + return snap; +} + +void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) +{ + for (int i = 0; i < snap->num_snaps; i++) { + struct xe_bo *bo = snap->snap[i].bo; + struct iosys_map src; + int err; + + if (IS_ERR(snap->snap[i].data)) + continue; + + snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER); + if (!snap->snap[i].data) { + snap->snap[i].data = ERR_PTR(-ENOMEM); + goto cleanup_bo; + } + + if (bo) { + dma_resv_lock(bo->ttm.base.resv, NULL); + err = ttm_bo_vmap(&bo->ttm, &src); + if (!err) { + xe_map_memcpy_from(xe_bo_device(bo), + snap->snap[i].data, + &src, snap->snap[i].bo_ofs, + snap->snap[i].len); + ttm_bo_vunmap(&bo->ttm, &src); + } + dma_resv_unlock(bo->ttm.base.resv); + } else { + void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs; + + kthread_use_mm(snap->snap[i].mm); + if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len)) + err = 0; + else + err = -EFAULT; + kthread_unuse_mm(snap->snap[i].mm); + + mmput(snap->snap[i].mm); + snap->snap[i].mm = NULL; + } + + if (err) { + kvfree(snap->snap[i].data); + snap->snap[i].data = ERR_PTR(err); + } + +cleanup_bo: + xe_bo_put(bo); + snap->snap[i].bo = NULL; + } +} + +void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p) +{ + unsigned long i, j; + + for (i = 0; i < snap->num_snaps; i++) { + if (IS_ERR(snap->snap[i].data)) + goto uncaptured; + + drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len); + drm_printf(p, "[%llx].data: ", + snap->snap[i].ofs); + + for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) { + u32 *val = snap->snap[i].data + j; + char dumped[ASCII85_BUFSZ]; + + drm_puts(p, ascii85_encode(*val, dumped)); + } + + drm_puts(p, "\n"); + continue; + +uncaptured: + drm_printf(p, "Unable to capture range [%llx-%llx]: %li\n", + snap->snap[i].ofs, snap->snap[i].ofs + snap->snap[i].len - 1, + PTR_ERR(snap->snap[i].data)); + } +} + +void xe_vm_snapshot_free(struct xe_vm_snapshot *snap) +{ + unsigned long i; + + if (!snap) + return; + + for (i = 0; i < snap->num_snaps; i++) { + if (!IS_ERR(snap->snap[i].data)) + kvfree(snap->snap[i].data); + xe_bo_put(snap->snap[i].bo); + if (snap->snap[i].mm) + mmput(snap->snap[i].mm); + } + kvfree(snap); +} diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index 40188a28c5..306cd0934a 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -211,8 +211,6 @@ int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker); int xe_vm_invalidate_vma(struct xe_vma *vma); -extern struct ttm_device_funcs xe_ttm_funcs; - static inline void xe_vm_queue_rebind_worker(struct xe_vm *vm) { xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); @@ -244,8 +242,10 @@ bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end); int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id); -int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma, - unsigned int num_shared); +int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma); + +int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, + unsigned int num_fences); /** * xe_vm_resv() - Return's the vm's reservation object @@ -273,3 +273,8 @@ static inline void vm_dbg(const struct drm_device *dev, { /* noop */ } #endif #endif + +struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm); +void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap); +void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p); +void xe_vm_snapshot_free(struct xe_vm_snapshot *snap); diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 4027da3a10..badf394508 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -32,6 +32,7 @@ struct xe_vm; #define XE_VMA_PTE_1G (DRM_GPUVA_USERBITS << 7) #define XE_VMA_PTE_64K (DRM_GPUVA_USERBITS << 8) #define XE_VMA_PTE_COMPACT (DRM_GPUVA_USERBITS << 9) +#define XE_VMA_DUMPABLE (DRM_GPUVA_USERBITS << 10) /** struct xe_userptr - User pointer */ struct xe_userptr { @@ -163,6 +164,11 @@ struct xe_vm { * VM */ struct rw_semaphore lock; + /** + * @snap_mutex: Mutex used to guard insertions and removals from gpuva, + * so we can take a snapshot safely from devcoredump. + */ + struct mutex snap_mutex; /** * @rebind_list: list of VMAs that need rebinding. Protected by the @@ -184,30 +190,6 @@ struct xe_vm { */ struct xe_range_fence_tree rftree[XE_MAX_TILES_PER_DEVICE]; - /** @async_ops: async VM operations (bind / unbinds) */ - struct { - /** @list: list of pending async VM ops */ - struct list_head pending; - /** @work: worker to execute async VM ops */ - struct work_struct work; - /** @lock: protects list of pending async VM ops and fences */ - spinlock_t lock; - /** @fence: fence state */ - struct { - /** @context: context of async fence */ - u64 context; - /** @seqno: seqno of async fence */ - u32 seqno; - } fence; - /** @error: error state for async VM ops */ - int error; - /** - * @munmap_rebind_inflight: an munmap style VM bind is in the - * middle of a set of ops which requires a rebind at the end. - */ - bool munmap_rebind_inflight; - } async_ops; - const struct xe_pt_ops *pt_ops; /** @userptr: user pointer state */ @@ -296,6 +278,8 @@ struct xe_vma_op_map { struct xe_vma *vma; /** @is_null: is NULL binding */ bool is_null; + /** @dumpable: whether BO is dumped on GPU hang */ + bool dumpable; /** @pat_index: The pat index to use for this operation. */ u16 pat_index; }; diff --git a/drivers/gpu/drm/xe/xe_vram_freq.c b/drivers/gpu/drm/xe/xe_vram_freq.c new file mode 100644 index 0000000000..c5f6b5a5d1 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_vram_freq.c @@ -0,0 +1,130 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2024 Intel Corporation + */ +#include <linux/sysfs.h> +#include <drm/drm_managed.h> + +#include "xe_gt_types.h" +#include "xe_pcode.h" +#include "xe_pcode_api.h" +#include "xe_tile.h" +#include "xe_tile_sysfs.h" +#include "xe_vram_freq.h" + +/** + * DOC: Xe VRAM freq + * + * Provides sysfs entries for vram frequency in tile + * + * device/tile#/memory/freq0/max_freq - This is maximum frequency. This value is read-only as it + * is the fixed fuse point P0. It is not the system + * configuration. + * device/tile#/memory/freq0/min_freq - This is minimum frequency. This value is read-only as it + * is the fixed fuse point PN. It is not the system + * configuration. + */ + +static struct xe_tile *dev_to_tile(struct device *dev) +{ + return kobj_to_tile(dev->kobj.parent); +} + +static ssize_t max_freq_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct xe_tile *tile = dev_to_tile(dev); + struct xe_gt *gt = tile->primary_gt; + u32 val, mbox; + int err; + + mbox = REG_FIELD_PREP(PCODE_MB_COMMAND, PCODE_FREQUENCY_CONFIG) + | REG_FIELD_PREP(PCODE_MB_PARAM1, PCODE_MBOX_FC_SC_READ_FUSED_P0) + | REG_FIELD_PREP(PCODE_MB_PARAM2, PCODE_MBOX_DOMAIN_HBM); + + err = xe_pcode_read(gt, mbox, &val, NULL); + if (err) + return err; + + /* data_out - Fused P0 for domain ID in units of 50 MHz */ + val *= 50; + + return sysfs_emit(buf, "%u\n", val); +} +static DEVICE_ATTR_RO(max_freq); + +static ssize_t min_freq_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct xe_tile *tile = dev_to_tile(dev); + struct xe_gt *gt = tile->primary_gt; + u32 val, mbox; + int err; + + mbox = REG_FIELD_PREP(PCODE_MB_COMMAND, PCODE_FREQUENCY_CONFIG) + | REG_FIELD_PREP(PCODE_MB_PARAM1, PCODE_MBOX_FC_SC_READ_FUSED_PN) + | REG_FIELD_PREP(PCODE_MB_PARAM2, PCODE_MBOX_DOMAIN_HBM); + + err = xe_pcode_read(gt, mbox, &val, NULL); + if (err) + return err; + + /* data_out - Fused Pn for domain ID in units of 50 MHz */ + val *= 50; + + return sysfs_emit(buf, "%u\n", val); +} +static DEVICE_ATTR_RO(min_freq); + +static struct attribute *freq_attrs[] = { + &dev_attr_max_freq.attr, + &dev_attr_min_freq.attr, + NULL +}; + +static const struct attribute_group freq_group_attrs = { + .name = "freq0", + .attrs = freq_attrs, +}; + +static void vram_freq_sysfs_fini(struct drm_device *drm, void *arg) +{ + struct kobject *kobj = arg; + + sysfs_remove_group(kobj, &freq_group_attrs); + kobject_put(kobj); +} + +/** + * xe_vram_freq_sysfs_init - Initialize vram frequency sysfs component + * @tile: Xe Tile object + * + * It needs to be initialized after the main tile component is ready + */ +void xe_vram_freq_sysfs_init(struct xe_tile *tile) +{ + struct xe_device *xe = tile_to_xe(tile); + struct kobject *kobj; + int err; + + if (xe->info.platform != XE_PVC) + return; + + kobj = kobject_create_and_add("memory", tile->sysfs); + if (!kobj) { + drm_warn(&xe->drm, "failed to add memory directory, err: %d\n", -ENOMEM); + return; + } + + err = sysfs_create_group(kobj, &freq_group_attrs); + if (err) { + kobject_put(kobj); + drm_warn(&xe->drm, "failed to register vram freq sysfs, err: %d\n", err); + return; + } + + err = drmm_add_action_or_reset(&xe->drm, vram_freq_sysfs_fini, kobj); + if (err) + drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n", + __func__, err); +} diff --git a/drivers/gpu/drm/xe/xe_vram_freq.h b/drivers/gpu/drm/xe/xe_vram_freq.h new file mode 100644 index 0000000000..cbe8c12fbd --- /dev/null +++ b/drivers/gpu/drm/xe/xe_vram_freq.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_VRAM_FREQ_H_ +#define _XE_VRAM_FREQ_H_ + +struct xe_tile; + +void xe_vram_freq_sysfs_init(struct xe_tile *tile); + +#endif /* _XE_VRAM_FREQ_H_ */ diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 5f61dd87c5..a0264eedd4 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -9,7 +9,8 @@ #include <kunit/visibility.h> #include <linux/compiler_types.h> -#include "generated/xe_wa_oob.h" +#include <generated/xe_wa_oob.h> + #include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" #include "regs/xe_regs.h" @@ -125,13 +126,6 @@ static const struct xe_rtp_entry_sr gt_was[] = { /* DG2 */ - { XE_RTP_NAME("16010515920"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), - GRAPHICS_STEP(A0, B0), - ENGINE_CLASS(VIDEO_DECODE)), - XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F18(0), ALNUNIT_CLKGATE_DIS)), - XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), - }, { XE_RTP_NAME("22010523718"), XE_RTP_RULES(SUBPLATFORM(DG2, G10)), XE_RTP_ACTIONS(SET(UNSLICE_UNIT_LEVEL_CLKGATE, CG3DDISCFEG_CLKGATE_DIS)) @@ -140,61 +134,6 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_RULES(SUBPLATFORM(DG2, G10)), XE_RTP_ACTIONS(SET(SUBSLICE_UNIT_LEVEL_CLKGATE, DSS_ROUTER_CLKGATE_DIS)) }, - { XE_RTP_NAME("14012362059"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)), - XE_RTP_ACTIONS(SET(XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB)) - }, - { XE_RTP_NAME("14012362059"), - XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0)), - XE_RTP_ACTIONS(SET(XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB)) - }, - { XE_RTP_NAME("14010948348"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)), - XE_RTP_ACTIONS(SET(UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS)) - }, - { XE_RTP_NAME("14011037102"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)), - XE_RTP_ACTIONS(SET(UNSLCGCTL9444, LTCDD_CLKGATE_DIS)) - }, - { XE_RTP_NAME("14011371254"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)), - XE_RTP_ACTIONS(SET(XEHP_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS)) - }, - { XE_RTP_NAME("14011431319"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)), - XE_RTP_ACTIONS(SET(UNSLCGCTL9440, - GAMTLBOACS_CLKGATE_DIS | - GAMTLBVDBOX7_CLKGATE_DIS | GAMTLBVDBOX6_CLKGATE_DIS | - GAMTLBVDBOX5_CLKGATE_DIS | GAMTLBVDBOX4_CLKGATE_DIS | - GAMTLBVDBOX3_CLKGATE_DIS | GAMTLBVDBOX2_CLKGATE_DIS | - GAMTLBVDBOX1_CLKGATE_DIS | GAMTLBVDBOX0_CLKGATE_DIS | - GAMTLBKCR_CLKGATE_DIS | GAMTLBGUC_CLKGATE_DIS | - GAMTLBBLT_CLKGATE_DIS), - SET(UNSLCGCTL9444, - GAMTLBGFXA0_CLKGATE_DIS | GAMTLBGFXA1_CLKGATE_DIS | - GAMTLBCOMPA0_CLKGATE_DIS | GAMTLBCOMPA1_CLKGATE_DIS | - GAMTLBCOMPB0_CLKGATE_DIS | GAMTLBCOMPB1_CLKGATE_DIS | - GAMTLBCOMPC0_CLKGATE_DIS | GAMTLBCOMPC1_CLKGATE_DIS | - GAMTLBCOMPD0_CLKGATE_DIS | GAMTLBCOMPD1_CLKGATE_DIS | - GAMTLBMERT_CLKGATE_DIS | - GAMTLBVEBOX3_CLKGATE_DIS | GAMTLBVEBOX2_CLKGATE_DIS | - GAMTLBVEBOX1_CLKGATE_DIS | GAMTLBVEBOX0_CLKGATE_DIS)) - }, - { XE_RTP_NAME("14010569222"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)), - XE_RTP_ACTIONS(SET(UNSLICE_UNIT_LEVEL_CLKGATE, GAMEDIA_CLKGATE_DIS)) - }, - { XE_RTP_NAME("14011028019"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)), - XE_RTP_ACTIONS(SET(SSMCGCTL9530, RTFUNIT_CLKGATE_DIS)) - }, - { XE_RTP_NAME("14010680813"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)), - XE_RTP_ACTIONS(SET(XEHP_GAMSTLB_CTRL, - CONTROL_BLOCK_CLKGATE_DIS | - EGRESS_BLOCK_CLKGATE_DIS | - TAG_BLOCK_CLKGATE_DIS)) - }, { XE_RTP_NAME("14014830051"), XE_RTP_RULES(PLATFORM(DG2)), XE_RTP_ACTIONS(CLR(SARB_CHICKEN1, COMP_CKN_IN)) @@ -212,10 +151,6 @@ static const struct xe_rtp_entry_sr gt_was[] = { INVALIDATION_BROADCAST_MODE_DIS | GLOBAL_INVALIDATION_MODE)) }, - { XE_RTP_NAME("14010648519"), - XE_RTP_RULES(PLATFORM(DG2)), - XE_RTP_ACTIONS(SET(XEHP_L3NODEARBCFG, XEHP_LNESPARE)) - }, /* PVC */ @@ -377,13 +312,6 @@ static const struct xe_rtp_entry_sr engine_was[] = { POLYGON_TRIFAN_LINELOOP_DISABLE)) }, { XE_RTP_NAME("22012826095, 22013059131"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(B0, C0), - FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(FIELD_SET(LSC_CHICKEN_BIT_0_UDW, - MAXREQS_PER_BANK, - REG_FIELD_PREP(MAXREQS_PER_BANK, 2))) - }, - { XE_RTP_NAME("22012826095, 22013059131"), XE_RTP_RULES(SUBPLATFORM(DG2, G11), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(FIELD_SET(LSC_CHICKEN_BIT_0_UDW, @@ -391,27 +319,10 @@ static const struct xe_rtp_entry_sr engine_was[] = { REG_FIELD_PREP(MAXREQS_PER_BANK, 2))) }, { XE_RTP_NAME("22013059131"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(B0, C0), - FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, FORCE_1_SUB_MESSAGE_PER_FRAGMENT)) - }, - { XE_RTP_NAME("22013059131"), XE_RTP_RULES(SUBPLATFORM(DG2, G11), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, FORCE_1_SUB_MESSAGE_PER_FRAGMENT)) }, - { XE_RTP_NAME("14010918519"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0), - FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, - FORCE_SLM_FENCE_SCOPE_TO_TILE | - FORCE_UGM_FENCE_SCOPE_TO_TILE, - /* - * Ignore read back as it always returns 0 in these - * steps - */ - .read_mask = 0)) - }, { XE_RTP_NAME("14015227452"), XE_RTP_RULES(PLATFORM(DG2), FUNC(xe_rtp_match_first_render_or_compute)), @@ -428,22 +339,12 @@ static const struct xe_rtp_entry_sr engine_was[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, UGM_FRAGMENT_THRESHOLD_TO_3)) }, - { XE_RTP_NAME("16011620976, 22015475538"), + { XE_RTP_NAME("22015475538"), XE_RTP_RULES(PLATFORM(DG2), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8)) }, { XE_RTP_NAME("22012654132"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0), - FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(CACHE_MODE_SS, ENABLE_PREFETCH_INTO_IC, - /* - * Register can't be read back for verification on - * DG2 due to Wa_14012342262 - */ - .read_mask = 0)) - }, - { XE_RTP_NAME("22012654132"), XE_RTP_RULES(SUBPLATFORM(DG2, G11), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(CACHE_MODE_SS, ENABLE_PREFETCH_INTO_IC, @@ -461,68 +362,11 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(ROW_CHICKEN2, DISABLE_READ_SUPPRESSION)) }, - { XE_RTP_NAME("14013392000"), - XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0), - ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(ROW_CHICKEN2, ENABLE_LARGE_GRF_MODE)) - }, - { XE_RTP_NAME("14012419201"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0), - ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(ROW_CHICKEN4, - DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX)) - }, - { XE_RTP_NAME("14012419201"), - XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0), - ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(ROW_CHICKEN4, - DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX)) - }, - { XE_RTP_NAME("1308578152"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(B0, C0), - ENGINE_CLASS(RENDER), - FUNC(xe_rtp_match_first_gslice_fused_off)), - XE_RTP_ACTIONS(CLR(CS_DEBUG_MODE1(RENDER_RING_BASE), - REPLAY_MODE_GRANULARITY)) - }, { XE_RTP_NAME("22010960976, 14013347512"), XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(CLR(XEHP_HDC_CHICKEN0, LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK)) }, - { XE_RTP_NAME("1608949956, 14010198302"), - XE_RTP_RULES(PLATFORM(DG2), ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(ROW_CHICKEN, - MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE)) - }, - { XE_RTP_NAME("22010430635"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0), - ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(ROW_CHICKEN4, - DISABLE_GRF_CLEAR)) - }, - { XE_RTP_NAME("14013202645"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(B0, C0), - ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(RT_CTRL, DIS_NULL_QUERY)) - }, - { XE_RTP_NAME("14013202645"), - XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0), - ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(RT_CTRL, DIS_NULL_QUERY)) - }, - { XE_RTP_NAME("22012532006"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0), - ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, - DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA)) - }, - { XE_RTP_NAME("22012532006"), - XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0), - ENGINE_CLASS(RENDER)), - XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, - DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA)) - }, { XE_RTP_NAME("14015150844"), XE_RTP_RULES(PLATFORM(DG2), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(XEHP_HDC_CHICKEN0, DIS_ATOMIC_CHAINING_TYPED_WRITES, @@ -612,7 +456,10 @@ static const struct xe_rtp_entry_sr engine_was[] = { PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS, XE_RTP_ACTION_FLAG(ENGINE_BASE))) }, - + { XE_RTP_NAME("16018610683"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, SLM_WMTP_RESTORE)) + }, {} }; @@ -652,21 +499,6 @@ static const struct xe_rtp_entry_sr lrc_was[] = { /* DG2 */ - { XE_RTP_NAME("16011186671"), - XE_RTP_RULES(SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0)), - XE_RTP_ACTIONS(CLR(VFLSKPD, DIS_MULT_MISS_RD_SQUASH), - SET(VFLSKPD, DIS_OVER_FETCH_CACHE)) - }, - { XE_RTP_NAME("14010469329"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)), - XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN3, - XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE)) - }, - { XE_RTP_NAME("14010698770, 22010613112, 22010465075"), - XE_RTP_RULES(SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0)), - XE_RTP_ACTIONS(SET(XEHP_COMMON_SLICE_CHICKEN3, - DISABLE_CPS_AWARE_COLOR_PIPE)) - }, { XE_RTP_NAME("16013271637"), XE_RTP_RULES(PLATFORM(DG2)), XE_RTP_ACTIONS(SET(XEHP_SLICE_COMMON_ECO_CHICKEN1, @@ -708,6 +540,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271)), XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE)) }, + { XE_RTP_NAME("14019877138"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT)) + }, /* Xe2_LPG */ @@ -739,6 +575,11 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD)) }, + { XE_RTP_NAME("16020183090"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0), + ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(INSTPM(RENDER_RING_BASE), ENABLE_SEMAPHORE_POLL_BIT)) + }, {} }; diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 727bdc4292..b138cbd51b 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -1,13 +1,8 @@ 22012773006 GRAPHICS_VERSION_RANGE(1200, 1250) -16011759253 SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, B0) 14014475959 GRAPHICS_VERSION_RANGE(1270, 1271), GRAPHICS_STEP(A0, B0) PLATFORM(DG2) 22011391025 PLATFORM(DG2) -14012197797 PLATFORM(DG2), GRAPHICS_STEP(A0, B0) -16011777198 SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0) - SUBPLATFORM(DG2, G11), GRAPHICS_STEP(A0, B0) -22012727170 SUBPLATFORM(DG2, G10), GRAPHICS_STEP(A0, C0) - SUBPLATFORM(DG2, G11) +22012727170 SUBPLATFORM(DG2, G11) 22012727685 SUBPLATFORM(DG2, G11) 16015675438 PLATFORM(PVC) SUBPLATFORM(DG2, G10) @@ -22,3 +17,8 @@ 14019821291 MEDIA_VERSION_RANGE(1300, 2000) 14015076503 MEDIA_VERSION(1300) 16020292621 GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0) +14018913170 GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0) + MEDIA_VERSION(2000), GRAPHICS_STEP(A0, A1) + GRAPHICS_VERSION_RANGE(1270, 1274) + MEDIA_VERSION(1300) + PLATFORM(DG2) diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c index a75eeba7bf..f697213392 100644 --- a/drivers/gpu/drm/xe/xe_wait_user_fence.c +++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c @@ -148,7 +148,7 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data, if (q) { if (q->ops->reset_status(q)) { - drm_info(&xe->drm, "exec gueue reset detected\n"); + drm_info(&xe->drm, "exec queue reset detected\n"); err = -EIO; break; } diff --git a/drivers/gpu/drm/xe/xe_wopcm_types.h b/drivers/gpu/drm/xe/xe_wopcm_types.h index 486d850c40..99d34837c4 100644 --- a/drivers/gpu/drm/xe/xe_wopcm_types.h +++ b/drivers/gpu/drm/xe/xe_wopcm_types.h @@ -16,9 +16,9 @@ struct xe_wopcm { u32 size; /** @guc: GuC WOPCM Region info */ struct { - /** @base: GuC WOPCM base which is offset from WOPCM base */ + /** @guc.base: GuC WOPCM base which is offset from WOPCM base */ u32 base; - /** @size: Size of the GuC WOPCM region */ + /** @guc.size: Size of the GuC WOPCM region */ u32 size; } guc; }; |