diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-08-07 13:18:06 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-08-07 13:18:06 +0000 |
commit | 638a9e433ecd61e64761352dbec1fa4f5874c941 (patch) | |
tree | fdbff74a238d7a5a7d1cef071b7230bc064b9f25 /drivers/gpu/drm/xe | |
parent | Releasing progress-linux version 6.9.12-1~progress7.99u1. (diff) | |
download | linux-638a9e433ecd61e64761352dbec1fa4f5874c941.tar.xz linux-638a9e433ecd61e64761352dbec1fa4f5874c941.zip |
Merging upstream version 6.10.3.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/gpu/drm/xe')
178 files changed, 7721 insertions, 1992 deletions
diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index 1a556d087e..63f1e2d164 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -26,6 +26,7 @@ config DRM_XE select INPUT if ACPI select ACPI_VIDEO if X86 && ACPI select ACPI_BUTTON if ACPI + select X86_PLATFORM_DEVICES if X86 && ACPI select ACPI_WMI if X86 && ACPI select SYNC_FILE select IOSF_MBI @@ -41,6 +42,7 @@ config DRM_XE select MMU_NOTIFIER select WANT_DEV_COREDUMP select AUXILIARY_BUS + select HMM_MIRROR help Experimental driver for Intel Xe series GPUs diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index c29a850859..b165bbf52a 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -32,7 +32,7 @@ endif # Enable -Werror in CI and development subdir-ccflags-$(CONFIG_DRM_XE_WERROR) += -Werror -subdir-ccflags-y += -I$(obj) -I$(srctree)/$(src) +subdir-ccflags-y += -I$(obj) -I$(src) # generated sources hostprogs := xe_gen_wa_oob @@ -43,12 +43,13 @@ quiet_cmd_wa_oob = GEN $(notdir $(generated_oob)) cmd_wa_oob = mkdir -p $(@D); $^ $(generated_oob) $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \ - $(srctree)/$(src)/xe_wa_oob.rules + $(src)/xe_wa_oob.rules $(call cmd,wa_oob) uses_generated_oob := \ $(obj)/xe_gsc.o \ $(obj)/xe_guc.o \ + $(obj)/xe_guc_ads.o \ $(obj)/xe_migrate.o \ $(obj)/xe_ring_ops.o \ $(obj)/xe_vm.o \ @@ -97,6 +98,8 @@ xe-y += xe_bb.o \ xe_guc_db_mgr.o \ xe_guc_debugfs.o \ xe_guc_hwconfig.o \ + xe_guc_id_mgr.o \ + xe_guc_klv_helpers.o \ xe_guc_log.o \ xe_guc_pc.o \ xe_guc_submit.o \ @@ -145,6 +148,8 @@ xe-y += xe_bb.o \ xe_wa.o \ xe_wopcm.o +xe-$(CONFIG_HMM_MIRROR) += xe_hmm.o + # graphics hardware monitoring (HWMON) support xe-$(CONFIG_HWMON) += xe_hwmon.o @@ -155,9 +160,14 @@ xe-y += \ xe_sriov.o xe-$(CONFIG_PCI_IOV) += \ + xe_gt_sriov_pf.o \ + xe_gt_sriov_pf_config.o \ + xe_gt_sriov_pf_control.o \ + xe_gt_sriov_pf_policy.o \ xe_lmtt.o \ xe_lmtt_2l.o \ - xe_lmtt_ml.o + xe_lmtt_ml.o \ + xe_sriov_pf.o # include helpers for tests even when XE is built-in ifdef CONFIG_DRM_XE_KUNIT_TEST @@ -166,15 +176,12 @@ endif # i915 Display compat #defines and #includes subdir-ccflags-$(CONFIG_DRM_XE_DISPLAY) += \ - -I$(srctree)/$(src)/display/ext \ - -I$(srctree)/$(src)/compat-i915-headers \ + -I$(src)/display/ext \ + -I$(src)/compat-i915-headers \ -I$(srctree)/drivers/gpu/drm/i915/display/ \ -Ddrm_i915_gem_object=xe_bo \ -Ddrm_i915_private=xe_device -CFLAGS_i915-display/intel_fbdev.o = -Wno-override-init -CFLAGS_i915-display/intel_display_device.o = -Wno-override-init - # Rule to build SOC code shared with i915 $(obj)/i915-soc/%.o: $(srctree)/drivers/gpu/drm/i915/soc/%.c FORCE $(call cmd,force_checksrc) @@ -257,6 +264,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/intel_global_state.o \ i915-display/intel_gmbus.o \ i915-display/intel_hdcp.o \ + i915-display/intel_hdcp_gsc_message.o \ i915-display/intel_hdmi.o \ i915-display/intel_hotplug.o \ i915-display/intel_hotplug_irq.o \ @@ -278,6 +286,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \ i915-display/intel_vdsc.o \ i915-display/intel_vga.o \ i915-display/intel_vrr.o \ + i915-display/intel_dmc_wl.o \ i915-display/intel_wm.o \ i915-display/skl_scaler.o \ i915-display/skl_universal_plane.o \ @@ -310,7 +319,7 @@ ifneq ($(CONFIG_DRM_XE_DISPLAY),y) endif always-$(CONFIG_DRM_XE_WERROR) += \ - $(patsubst %.h,%.hdrtest, $(shell cd $(srctree)/$(src) && find * -name '*.h' $(hdrtest_find_args))) + $(patsubst %.h,%.hdrtest, $(shell cd $(src) && find * -name '*.h' $(hdrtest_find_args))) quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@) cmd_hdrtest = $(CC) -DHDRTEST $(filter-out $(CFLAGS_GCOV), $(c_flags)) -S -o /dev/null -x c /dev/null -include $<; touch $@ diff --git a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h index 5496a58908..c1ad09b364 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h @@ -3,8 +3,8 @@ * Copyright © 2023 Intel Corporation */ -#ifndef _GUC_ACTIONS_PF_ABI_H -#define _GUC_ACTIONS_PF_ABI_H +#ifndef _ABI_GUC_ACTIONS_SRIOV_ABI_H +#define _ABI_GUC_ACTIONS_SRIOV_ABI_H #include "guc_communication_ctb_abi.h" @@ -171,4 +171,200 @@ #define VF2GUC_RELAY_TO_PF_REQUEST_MSG_n_RELAY_DATAx GUC_HXG_REQUEST_MSG_n_DATAn #define VF2GUC_RELAY_TO_PF_REQUEST_MSG_NUM_RELAY_DATA GUC_RELAY_MSG_MAX_LEN +/** + * DOC: GUC2PF_VF_STATE_NOTIFY + * + * The GUC2PF_VF_STATE_NOTIFY message is used by the GuC to notify PF about change + * of the VF state. + * + * This G2H message is sent as `CTB HXG Message`_. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_EVENT_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | DATA0 = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_ACTION_GUC2PF_VF_STATE_NOTIFY` = 0x5106 | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | DATA1 = **VFID** - VF identifier | + * +---+-------+--------------------------------------------------------------+ + * | 2 | 31:0 | DATA2 = **EVENT** - notification event: | + * | | | | + * | | | - _`GUC_PF_NOTIFY_VF_ENABLE` = 1 (only if VFID = 0) | + * | | | - _`GUC_PF_NOTIFY_VF_FLR` = 1 | + * | | | - _`GUC_PF_NOTIFY_VF_FLR_DONE` = 2 | + * | | | - _`GUC_PF_NOTIFY_VF_PAUSE_DONE` = 3 | + * | | | - _`GUC_PF_NOTIFY_VF_FIXUP_DONE` = 4 | + * +---+-------+--------------------------------------------------------------+ + */ +#define GUC_ACTION_GUC2PF_VF_STATE_NOTIFY 0x5106u + +#define GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_LEN (GUC_HXG_EVENT_MSG_MIN_LEN + 2u) +#define GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_0_MBZ GUC_HXG_EVENT_MSG_0_DATA0 +#define GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_1_VFID GUC_HXG_EVENT_MSG_n_DATAn +#define GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_2_EVENT GUC_HXG_EVENT_MSG_n_DATAn +#define GUC_PF_NOTIFY_VF_ENABLE 1u +#define GUC_PF_NOTIFY_VF_FLR 1u +#define GUC_PF_NOTIFY_VF_FLR_DONE 2u +#define GUC_PF_NOTIFY_VF_PAUSE_DONE 3u +#define GUC_PF_NOTIFY_VF_FIXUP_DONE 4u + +/** + * DOC: PF2GUC_UPDATE_VGT_POLICY + * + * This message is used by the PF to set `GuC VGT Policy KLVs`_. + * + * This message must be sent as `CTB HXG Message`_. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_ACTION_PF2GUC_UPDATE_VGT_POLICY` = 0x5502 | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | **CFG_ADDR_LO** - dword aligned GGTT offset that | + * | | | represents the start of `GuC VGT Policy KLVs`_ list. | + * +---+-------+--------------------------------------------------------------+ + * | 2 | 31:0 | **CFG_ADDR_HI** - upper 32 bits of above offset. | + * +---+-------+--------------------------------------------------------------+ + * | 3 | 31:0 | **CFG_SIZE** - size (in dwords) of the config buffer | + * +---+-------+--------------------------------------------------------------+ + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | **COUNT** - number of KLVs successfully applied | + * +---+-------+--------------------------------------------------------------+ + */ +#define GUC_ACTION_PF2GUC_UPDATE_VGT_POLICY 0x5502u + +#define PF2GUC_UPDATE_VGT_POLICY_REQUEST_MSG_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 3u) +#define PF2GUC_UPDATE_VGT_POLICY_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 +#define PF2GUC_UPDATE_VGT_POLICY_REQUEST_MSG_1_CFG_ADDR_LO GUC_HXG_REQUEST_MSG_n_DATAn +#define PF2GUC_UPDATE_VGT_POLICY_REQUEST_MSG_2_CFG_ADDR_HI GUC_HXG_REQUEST_MSG_n_DATAn +#define PF2GUC_UPDATE_VGT_POLICY_REQUEST_MSG_3_CFG_SIZE GUC_HXG_REQUEST_MSG_n_DATAn + +#define PF2GUC_UPDATE_VGT_POLICY_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN +#define PF2GUC_UPDATE_VGT_POLICY_RESPONSE_MSG_0_COUNT GUC_HXG_RESPONSE_MSG_0_DATA0 + +/** + * DOC: PF2GUC_UPDATE_VF_CFG + * + * The `PF2GUC_UPDATE_VF_CFG`_ message is used by PF to provision single VF in GuC. + * + * This message must be sent as `CTB HXG Message`_. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_ACTION_PF2GUC_UPDATE_VF_CFG` = 0x5503 | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | **VFID** - identifier of the VF that the KLV | + * | | | configurations are being applied to | + * +---+-------+--------------------------------------------------------------+ + * | 2 | 31:0 | **CFG_ADDR_LO** - dword aligned GGTT offset that represents | + * | | | the start of a list of virtualization related KLV configs | + * | | | that are to be applied to the VF. | + * | | | If this parameter is zero, the list is not parsed. | + * | | | If full configs address parameter is zero and configs_size is| + * | | | zero associated VF config shall be reset to its default state| + * +---+-------+--------------------------------------------------------------+ + * | 3 | 31:0 | **CFG_ADDR_HI** - upper 32 bits of configs address. | + * +---+-------+--------------------------------------------------------------+ + * | 4 | 31:0 | **CFG_SIZE** - size (in dwords) of the config buffer | + * +---+-------+--------------------------------------------------------------+ + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | **COUNT** - number of KLVs successfully applied | + * +---+-------+--------------------------------------------------------------+ + */ +#define GUC_ACTION_PF2GUC_UPDATE_VF_CFG 0x5503u + +#define PF2GUC_UPDATE_VF_CFG_REQUEST_MSG_LEN (GUC_HXG_REQUEST_MSG_MIN_LEN + 4u) +#define PF2GUC_UPDATE_VF_CFG_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 +#define PF2GUC_UPDATE_VF_CFG_REQUEST_MSG_1_VFID GUC_HXG_REQUEST_MSG_n_DATAn +#define PF2GUC_UPDATE_VF_CFG_REQUEST_MSG_2_CFG_ADDR_LO GUC_HXG_REQUEST_MSG_n_DATAn +#define PF2GUC_UPDATE_VF_CFG_REQUEST_MSG_3_CFG_ADDR_HI GUC_HXG_REQUEST_MSG_n_DATAn +#define PF2GUC_UPDATE_VF_CFG_REQUEST_MSG_4_CFG_SIZE GUC_HXG_REQUEST_MSG_n_DATAn + +#define PF2GUC_UPDATE_VF_CFG_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN +#define PF2GUC_UPDATE_VF_CFG_RESPONSE_MSG_0_COUNT GUC_HXG_RESPONSE_MSG_0_DATA0 + +/** + * DOC: PF2GUC_VF_CONTROL + * + * The PF2GUC_VF_CONTROL message is used by the PF to trigger VF state change + * maintained by the GuC. + * + * This H2G message must be sent as `CTB HXG Message`_. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | DATA0 = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_ACTION_PF2GUC_VF_CONTROL_CMD` = 0x5506 | + * +---+-------+--------------------------------------------------------------+ + * | 1 | 31:0 | DATA1 = **VFID** - VF identifier | + * +---+-------+--------------------------------------------------------------+ + * | 2 | 31:0 | DATA2 = **COMMAND** - control command: | + * | | | | + * | | | - _`GUC_PF_TRIGGER_VF_PAUSE` = 1 | + * | | | - _`GUC_PF_TRIGGER_VF_RESUME` = 2 | + * | | | - _`GUC_PF_TRIGGER_VF_STOP` = 3 | + * | | | - _`GUC_PF_TRIGGER_VF_FLR_START` = 4 | + * | | | - _`GUC_PF_TRIGGER_VF_FLR_FINISH` = 5 | + * +---+-------+--------------------------------------------------------------+ + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | DATA0 = MBZ | + * +---+-------+--------------------------------------------------------------+ + */ +#define GUC_ACTION_PF2GUC_VF_CONTROL 0x5506u + +#define PF2GUC_VF_CONTROL_REQUEST_MSG_LEN (GUC_HXG_EVENT_MSG_MIN_LEN + 2u) +#define PF2GUC_VF_CONTROL_REQUEST_MSG_0_MBZ GUC_HXG_EVENT_MSG_0_DATA0 +#define PF2GUC_VF_CONTROL_REQUEST_MSG_1_VFID GUC_HXG_EVENT_MSG_n_DATAn +#define PF2GUC_VF_CONTROL_REQUEST_MSG_2_COMMAND GUC_HXG_EVENT_MSG_n_DATAn +#define GUC_PF_TRIGGER_VF_PAUSE 1u +#define GUC_PF_TRIGGER_VF_RESUME 2u +#define GUC_PF_TRIGGER_VF_STOP 3u +#define GUC_PF_TRIGGER_VF_FLR_START 4u +#define GUC_PF_TRIGGER_VF_FLR_FINISH 5u + #endif diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h index 0400bc0fcc..511cf974d5 100644 --- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h @@ -319,4 +319,14 @@ enum { #define GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_KEY 0x8a0b #define GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_LEN 1u +/* + * Workaround keys: + */ +enum xe_guc_klv_ids { + GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED = 0x9002, + GUC_WORKAROUND_KLV_ID_GAM_PFQ_SHADOW_TAIL_POLLING = 0x9005, + GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE = 0x9007, + GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE = 0x9008, +}; + #endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h index 854a7bb535..cd46322761 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h @@ -19,14 +19,12 @@ #include "xe_bo.h" #include "xe_pm.h" #include "xe_step.h" -#include "i915_gem.h" #include "i915_gem_stolen.h" #include "i915_gpu_error.h" #include "i915_reg_defs.h" #include "i915_utils.h" #include "intel_gt_types.h" #include "intel_step.h" -#include "intel_uc_fw.h" #include "intel_uncore.h" #include "intel_runtime_pm.h" #include <linux/pm_runtime.h> @@ -41,12 +39,8 @@ static inline struct drm_i915_private *kdev_to_i915(struct device *kdev) return dev_get_drvdata(kdev); } - -#define INTEL_JASPERLAKE 0 -#define INTEL_ELKHARTLAKE 0 #define IS_PLATFORM(xe, x) ((xe)->info.platform == x) #define INTEL_INFO(dev_priv) (&((dev_priv)->info)) -#define INTEL_DEVID(dev_priv) ((dev_priv)->info.devid) #define IS_I830(dev_priv) (dev_priv && 0) #define IS_I845G(dev_priv) (dev_priv && 0) #define IS_I85X(dev_priv) (dev_priv && 0) @@ -86,11 +80,10 @@ static inline struct drm_i915_private *kdev_to_i915(struct device *kdev) #define IS_ALDERLAKE_S(dev_priv) IS_PLATFORM(dev_priv, XE_ALDERLAKE_S) #define IS_ALDERLAKE_P(dev_priv) (IS_PLATFORM(dev_priv, XE_ALDERLAKE_P) || \ IS_PLATFORM(dev_priv, XE_ALDERLAKE_N)) -#define IS_XEHPSDV(dev_priv) (dev_priv && 0) #define IS_DG2(dev_priv) IS_PLATFORM(dev_priv, XE_DG2) -#define IS_PONTEVECCHIO(dev_priv) IS_PLATFORM(dev_priv, XE_PVC) #define IS_METEORLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_METEORLAKE) #define IS_LUNARLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_LUNARLAKE) +#define IS_BATTLEMAGE(dev_priv) IS_PLATFORM(dev_priv, XE_BATTLEMAGE) #define IS_HASWELL_ULT(dev_priv) (dev_priv && 0) #define IS_BROADWELL_ULT(dev_priv) (dev_priv && 0) @@ -98,19 +91,12 @@ static inline struct drm_i915_private *kdev_to_i915(struct device *kdev) #define IP_VER(ver, rel) ((ver) << 8 | (rel)) -#define INTEL_DISPLAY_ENABLED(xe) (HAS_DISPLAY((xe)) && !intel_opregion_headless_sku((xe))) - -#define IS_GRAPHICS_VER(xe, first, last) \ - ((xe)->info.graphics_verx100 >= first * 100 && \ - (xe)->info.graphics_verx100 <= (last*100 + 99)) #define IS_MOBILE(xe) (xe && 0) -#define HAS_LLC(xe) (!IS_DGFX((xe))) #define HAS_GMD_ID(xe) GRAPHICS_VERx100(xe) >= 1270 /* Workarounds not handled yet */ #define IS_DISPLAY_STEP(xe, first, last) ({u8 __step = (xe)->info.step.display; first <= __step && __step <= last; }) -#define IS_GRAPHICS_STEP(xe, first, last) ({u8 __step = (xe)->info.step.graphics; first <= __step && __step <= last; }) #define IS_LP(xe) (0) #define IS_GEN9_LP(xe) (0) @@ -127,27 +113,6 @@ static inline struct drm_i915_private *kdev_to_i915(struct device *kdev) #define IS_KABYLAKE_ULT(xe) (xe && 0) #define IS_SKYLAKE_ULT(xe) (xe && 0) -#define IS_DG1_GRAPHICS_STEP(xe, first, last) (IS_DG1(xe) && IS_GRAPHICS_STEP(xe, first, last)) -#define IS_DG2_GRAPHICS_STEP(xe, variant, first, last) \ - ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_ ## variant && \ - IS_GRAPHICS_STEP(xe, first, last)) -#define IS_XEHPSDV_GRAPHICS_STEP(xe, first, last) (IS_XEHPSDV(xe) && IS_GRAPHICS_STEP(xe, first, last)) - -/* XXX: No basedie stepping support yet */ -#define IS_PVC_BD_STEP(xe, first, last) (!WARN_ON(1) && IS_PONTEVECCHIO(xe)) - -#define IS_TIGERLAKE_DISPLAY_STEP(xe, first, last) (IS_TIGERLAKE(xe) && IS_DISPLAY_STEP(xe, first, last)) -#define IS_ROCKETLAKE_DISPLAY_STEP(xe, first, last) (IS_ROCKETLAKE(xe) && IS_DISPLAY_STEP(xe, first, last)) -#define IS_DG1_DISPLAY_STEP(xe, first, last) (IS_DG1(xe) && IS_DISPLAY_STEP(xe, first, last)) -#define IS_DG2_DISPLAY_STEP(xe, first, last) (IS_DG2(xe) && IS_DISPLAY_STEP(xe, first, last)) -#define IS_ADLP_DISPLAY_STEP(xe, first, last) (IS_ALDERLAKE_P(xe) && IS_DISPLAY_STEP(xe, first, last)) -#define IS_ADLS_DISPLAY_STEP(xe, first, last) (IS_ALDERLAKE_S(xe) && IS_DISPLAY_STEP(xe, first, last)) -#define IS_JSL_EHL_DISPLAY_STEP(xe, first, last) (IS_JSL_EHL(xe) && IS_DISPLAY_STEP(xe, first, last)) -#define IS_MTL_DISPLAY_STEP(xe, first, last) (IS_METEORLAKE(xe) && IS_DISPLAY_STEP(xe, first, last)) - -/* FIXME: Add subplatform here */ -#define IS_MTL_GRAPHICS_STEP(xe, sub, first, last) (IS_METEORLAKE(xe) && IS_DISPLAY_STEP(xe, first, last)) - #define IS_DG2_G10(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G10) #define IS_DG2_G11(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G11) #define IS_DG2_G12(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G12) @@ -155,30 +120,31 @@ static inline struct drm_i915_private *kdev_to_i915(struct device *kdev) #define IS_ICL_WITH_PORT_F(xe) (xe && 0) #define HAS_FLAT_CCS(xe) (xe_device_has_flat_ccs(xe)) #define to_intel_bo(x) gem_to_xe_bo((x)) -#define mkwrite_device_info(xe) (INTEL_INFO(xe)) #define HAS_128_BYTE_Y_TILING(xe) (xe || 1) -#define intel_has_gpu_reset(a) (a && 0) - #include "intel_wakeref.h" static inline intel_wakeref_t intel_runtime_pm_get(struct xe_runtime_pm *pm) { struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm); - if (xe_pm_runtime_get(xe) < 0) { - xe_pm_runtime_put(xe); - return 0; - } - return 1; + return xe_pm_runtime_resume_and_get(xe); } static inline intel_wakeref_t intel_runtime_pm_get_if_in_use(struct xe_runtime_pm *pm) { struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm); - return xe_pm_runtime_get_if_active(xe); + return xe_pm_runtime_get_if_in_use(xe); +} + +static inline intel_wakeref_t intel_runtime_pm_get_noresume(struct xe_runtime_pm *pm) +{ + struct xe_device *xe = container_of(pm, struct xe_device, runtime_pm); + + xe_pm_runtime_get_noresume(xe); + return true; } static inline void intel_runtime_pm_put_unchecked(struct xe_runtime_pm *pm) @@ -218,7 +184,6 @@ struct i915_sched_attr { #define RUNTIME_INFO(xe) (&(xe)->info.i915_runtime) #define FORCEWAKE_ALL XE_FORCEWAKE_ALL -#define HPD_STORM_DEFAULT_THRESHOLD 50 #ifdef CONFIG_ARM64 /* diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_fixed.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_fixed.h deleted file mode 100644 index 12c671fd52..0000000000 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_fixed.h +++ /dev/null @@ -1,6 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2023 Intel Corporation - */ - -#include "../../i915/i915_fixed.h" diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_gem.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem.h deleted file mode 100644 index 06b723a479..0000000000 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_gem.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2023 Intel Corporation - */ - -#ifndef __I915_GEM_H__ -#define __I915_GEM_H__ -#define GEM_BUG_ON -#endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h index bd233007c1..cb6c759882 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_gem_stolen.h @@ -17,10 +17,15 @@ static inline int i915_gem_stolen_insert_node_in_range(struct xe_device *xe, { struct xe_bo *bo; int err; - u32 flags = XE_BO_CREATE_PINNED_BIT | XE_BO_CREATE_STOLEN_BIT; + u32 flags = XE_BO_FLAG_PINNED | XE_BO_FLAG_STOLEN; - if (align) + if (start < SZ_4K) + start = SZ_4K; + + if (align) { size = ALIGN(size, align); + start = ALIGN(start, align); + } bo = xe_bo_create_locked_range(xe, xe_device_get_root_tile(xe), NULL, size, start, end, diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_vgpu.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_vgpu.h index 80b024d435..4931c7198f 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_vgpu.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_vgpu.h @@ -9,36 +9,10 @@ #include <linux/types.h> struct drm_i915_private; -struct i915_ggtt; -static inline void intel_vgpu_detect(struct drm_i915_private *i915) -{ -} static inline bool intel_vgpu_active(struct drm_i915_private *i915) { return false; } -static inline void intel_vgpu_register(struct drm_i915_private *i915) -{ -} -static inline bool intel_vgpu_has_full_ppgtt(struct drm_i915_private *i915) -{ - return false; -} -static inline bool intel_vgpu_has_hwsp_emulation(struct drm_i915_private *i915) -{ - return false; -} -static inline bool intel_vgpu_has_huge_gtt(struct drm_i915_private *i915) -{ - return false; -} -static inline int intel_vgt_balloon(struct i915_ggtt *ggtt) -{ - return 0; -} -static inline void intel_vgt_deballoon(struct i915_ggtt *ggtt) -{ -} #endif /* _I915_VGPU_H_ */ diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_uc_fw.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_uc_fw.h deleted file mode 100644 index 0097453289..0000000000 --- a/drivers/gpu/drm/xe/compat-i915-headers/intel_uc_fw.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2023 Intel Corporation - */ - -#ifndef _INTEL_UC_FW_H_ -#define _INTEL_UC_FW_H_ - -#define INTEL_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git" - -#endif diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h index cd26ddc0f6..ef79793caa 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h @@ -25,15 +25,15 @@ static inline u32 intel_uncore_read(struct intel_uncore *uncore, return xe_mmio_read32(__compat_uncore_to_gt(uncore), reg); } -static inline u32 intel_uncore_read8(struct intel_uncore *uncore, - i915_reg_t i915_reg) +static inline u8 intel_uncore_read8(struct intel_uncore *uncore, + i915_reg_t i915_reg) { struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); return xe_mmio_read8(__compat_uncore_to_gt(uncore), reg); } -static inline u32 intel_uncore_read16(struct intel_uncore *uncore, +static inline u16 intel_uncore_read16(struct intel_uncore *uncore, i915_reg_t i915_reg) { struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg)); diff --git a/drivers/gpu/drm/xe/display/intel_fb_bo.c b/drivers/gpu/drm/xe/display/intel_fb_bo.c index a9c1f9885c..e18521acc5 100644 --- a/drivers/gpu/drm/xe/display/intel_fb_bo.c +++ b/drivers/gpu/drm/xe/display/intel_fb_bo.c @@ -11,7 +11,7 @@ void intel_fb_bo_framebuffer_fini(struct xe_bo *bo) { - if (bo->flags & XE_BO_CREATE_PINNED_BIT) { + if (bo->flags & XE_BO_FLAG_PINNED) { /* Unpin our kernel fb first */ xe_bo_lock(bo, false); xe_bo_unpin(bo); @@ -33,9 +33,9 @@ int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb, if (ret) goto err; - if (!(bo->flags & XE_BO_SCANOUT_BIT)) { + if (!(bo->flags & XE_BO_FLAG_SCANOUT)) { /* - * XE_BO_SCANOUT_BIT should ideally be set at creation, or is + * XE_BO_FLAG_SCANOUT should ideally be set at creation, or is * automatically set when creating FB. We cannot change caching * mode when the boect is VM_BINDed, so we can only set * coherency with display when unbound. @@ -45,7 +45,7 @@ int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb, ret = -EINVAL; goto err; } - bo->flags |= XE_BO_SCANOUT_BIT; + bo->flags |= XE_BO_FLAG_SCANOUT; } ttm_bo_unreserve(&bo->ttm); return 0; diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c index 51ae3561fd..9e4bcfdbc7 100644 --- a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c +++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c @@ -42,9 +42,9 @@ struct drm_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, if (!IS_DGFX(dev_priv)) { obj = xe_bo_create_pin_map(dev_priv, xe_device_get_root_tile(dev_priv), NULL, size, - ttm_bo_type_kernel, XE_BO_SCANOUT_BIT | - XE_BO_CREATE_STOLEN_BIT | - XE_BO_CREATE_PINNED_BIT); + ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT | + XE_BO_FLAG_STOLEN | + XE_BO_FLAG_PINNED); if (!IS_ERR(obj)) drm_info(&dev_priv->drm, "Allocated fbdev into stolen\n"); else @@ -52,9 +52,9 @@ struct drm_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, } if (IS_ERR(obj)) { obj = xe_bo_create_pin_map(dev_priv, xe_device_get_root_tile(dev_priv), NULL, size, - ttm_bo_type_kernel, XE_BO_SCANOUT_BIT | - XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(dev_priv)) | - XE_BO_CREATE_PINNED_BIT); + ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT | + XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(dev_priv)) | + XE_BO_FLAG_PINNED); } if (IS_ERR(obj)) { @@ -81,8 +81,8 @@ int intel_fbdev_fb_fill_info(struct drm_i915_private *i915, struct fb_info *info { struct pci_dev *pdev = to_pci_dev(i915->drm.dev); - if (!(obj->flags & XE_BO_CREATE_SYSTEM_BIT)) { - if (obj->flags & XE_BO_CREATE_STOLEN_BIT) + if (!(obj->flags & XE_BO_FLAG_SYSTEM)) { + if (obj->flags & XE_BO_FLAG_STOLEN) info->fix.smem_start = xe_ttm_stolen_io_offset(obj, 0); else info->fix.smem_start = diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 6ec375c1c4..0de0566e5b 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -51,14 +51,6 @@ bool xe_display_driver_probe_defer(struct pci_dev *pdev) return intel_display_driver_probe_defer(pdev); } -static void xe_display_last_close(struct drm_device *dev) -{ - struct xe_device *xe = to_xe_device(dev); - - if (xe->info.enable_display) - intel_fbdev_restore_mode(to_xe_device(dev)); -} - /** * xe_display_driver_set_hooks - Add driver flags and hooks for display * @driver: DRM device driver @@ -73,7 +65,6 @@ void xe_display_driver_set_hooks(struct drm_driver *driver) return; driver->driver_features |= DRIVER_MODESET | DRIVER_ATOMIC; - driver->lastclose = xe_display_last_close; } static void unset_display_features(struct xe_device *xe) @@ -101,8 +92,6 @@ static void display_destroy(struct drm_device *dev, void *dummy) */ int xe_display_create(struct xe_device *xe) { - int err; - spin_lock_init(&xe->display.fb_tracking.lock); xe->display.hotplug.dp_wq = alloc_ordered_workqueue("xe-dp", 0); @@ -110,11 +99,7 @@ int xe_display_create(struct xe_device *xe) drmm_mutex_init(&xe->drm, &xe->sb_lock); xe->enabled_irq_mask = ~0; - err = drmm_add_action_or_reset(&xe->drm, display_destroy, NULL); - if (err) - return err; - - return 0; + return drmm_add_action_or_reset(&xe->drm, display_destroy, NULL); } static void xe_display_fini_nommio(struct drm_device *dev, void *dummy) @@ -218,9 +203,7 @@ void xe_display_fini(struct xe_device *xe) if (!xe->info.enable_display) return; - /* poll work can call into fbdev, hence clean that up afterwards */ intel_hpd_poll_fini(xe); - intel_fbdev_fini(xe); intel_hdcp_component_fini(xe); intel_audio_deinit(xe); diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c index 27c2fb1c00..44c9fd2143 100644 --- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c +++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c @@ -45,8 +45,8 @@ bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *d obj = xe_bo_create_pin_map(i915, xe_device_get_root_tile(i915), NULL, PAGE_ALIGN(size), ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(i915)) | - XE_BO_CREATE_GGTT_BIT); + XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(i915)) | + XE_BO_FLAG_GGTT); if (IS_ERR(obj)) { kfree(vma); return false; diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index 722c84a566..3e1ae37c4c 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -10,6 +10,7 @@ #include "intel_fb_pin.h" #include "xe_ggtt.h" #include "xe_gt.h" +#include "xe_pm.h" #include <drm/ttm/ttm_bo.h> @@ -30,7 +31,7 @@ write_dpt_rotated(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, u32 bo_ for (row = 0; row < height; row++) { u64 pte = ggtt->pt_ops->pte_encode_bo(bo, src_idx * XE_PAGE_SIZE, - xe->pat.idx[XE_CACHE_WB]); + xe->pat.idx[XE_CACHE_NONE]); iosys_map_wr(map, *dpt_ofs, u64, pte); *dpt_ofs += 8; @@ -62,7 +63,7 @@ write_dpt_remapped(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, for (column = 0; column < width; column++) { iosys_map_wr(map, *dpt_ofs, u64, pte_encode_bo(bo, src_idx * XE_PAGE_SIZE, - xe->pat.idx[XE_CACHE_WB])); + xe->pat.idx[XE_CACHE_NONE])); *dpt_ofs += 8; src_idx++; @@ -99,18 +100,21 @@ static int __xe_pin_fb_vma_dpt(struct intel_framebuffer *fb, if (IS_DGFX(xe)) dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size, ttm_bo_type_kernel, - XE_BO_CREATE_VRAM0_BIT | - XE_BO_CREATE_GGTT_BIT); + XE_BO_FLAG_VRAM0 | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_PAGETABLE); else dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size, ttm_bo_type_kernel, - XE_BO_CREATE_STOLEN_BIT | - XE_BO_CREATE_GGTT_BIT); + XE_BO_FLAG_STOLEN | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_PAGETABLE); if (IS_ERR(dpt)) dpt = xe_bo_create_pin_map(xe, tile0, NULL, dpt_size, ttm_bo_type_kernel, - XE_BO_CREATE_SYSTEM_BIT | - XE_BO_CREATE_GGTT_BIT); + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_PAGETABLE); if (IS_ERR(dpt)) return PTR_ERR(dpt); @@ -119,7 +123,7 @@ static int __xe_pin_fb_vma_dpt(struct intel_framebuffer *fb, for (x = 0; x < size / XE_PAGE_SIZE; x++) { u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x * XE_PAGE_SIZE, - xe->pat.idx[XE_CACHE_WB]); + xe->pat.idx[XE_CACHE_NONE]); iosys_map_wr(&dpt->vmap, x * 8, u64, pte); } @@ -165,7 +169,7 @@ write_ggtt_rotated(struct xe_bo *bo, struct xe_ggtt *ggtt, u32 *ggtt_ofs, u32 bo for (row = 0; row < height; row++) { u64 pte = ggtt->pt_ops->pte_encode_bo(bo, src_idx * XE_PAGE_SIZE, - xe->pat.idx[XE_CACHE_WB]); + xe->pat.idx[XE_CACHE_NONE]); xe_ggtt_set_pte(ggtt, *ggtt_ofs, pte); *ggtt_ofs += XE_PAGE_SIZE; @@ -190,7 +194,7 @@ static int __xe_pin_fb_vma_ggtt(struct intel_framebuffer *fb, /* TODO: Consider sharing framebuffer mapping? * embed i915_vma inside intel_framebuffer */ - xe_device_mem_access_get(tile_to_xe(ggtt->tile)); + xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile)); ret = mutex_lock_interruptible(&ggtt->lock); if (ret) goto out; @@ -211,7 +215,7 @@ static int __xe_pin_fb_vma_ggtt(struct intel_framebuffer *fb, for (x = 0; x < size; x += XE_PAGE_SIZE) { u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x, - xe->pat.idx[XE_CACHE_WB]); + xe->pat.idx[XE_CACHE_NONE]); xe_ggtt_set_pte(ggtt, vma->node.start + x, pte); } @@ -238,11 +242,10 @@ static int __xe_pin_fb_vma_ggtt(struct intel_framebuffer *fb, rot_info->plane[i].dst_stride); } - xe_ggtt_invalidate(ggtt); out_unlock: mutex_unlock(&ggtt->lock); out: - xe_device_mem_access_put(tile_to_xe(ggtt->tile)); + xe_pm_runtime_put(tile_to_xe(ggtt->tile)); return ret; } @@ -260,7 +263,7 @@ static struct i915_vma *__xe_pin_fb_vma(struct intel_framebuffer *fb, if (IS_DGFX(to_xe_device(bo->ttm.base.dev)) && intel_fb_rc_ccs_cc_plane(&fb->base) >= 0 && - !(bo->flags & XE_BO_NEEDS_CPU_ACCESS)) { + !(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS)) { struct xe_tile *tile = xe_device_get_root_tile(xe); /* @@ -321,7 +324,7 @@ static void __xe_unpin_fb_vma(struct i915_vma *vma) xe_bo_unpin_map_no_vm(vma->dpt); else if (!drm_mm_node_allocated(&vma->bo->ggtt_node) || vma->bo->ggtt_node.start != vma->node.start) - xe_ggtt_remove_node(ggtt, &vma->node); + xe_ggtt_remove_node(ggtt, &vma->node, false); ttm_bo_reserve(&vma->bo->ttm, false, false, NULL); ttm_bo_unpin(&vma->bo->ttm); @@ -353,7 +356,7 @@ int intel_plane_pin_fb(struct intel_plane_state *plane_state) struct i915_vma *vma; /* We reject creating !SCANOUT fb's, so this is weird.. */ - drm_WARN_ON(bo->ttm.base.dev, !(bo->flags & XE_BO_SCANOUT_BIT)); + drm_WARN_ON(bo->ttm.base.dev, !(bo->flags & XE_BO_FLAG_SCANOUT)); vma = __xe_pin_fb_vma(to_intel_framebuffer(fb), &plane_state->view.gtt); if (IS_ERR(vma)) @@ -381,4 +384,4 @@ struct i915_address_space *intel_dpt_create(struct intel_framebuffer *fb) void intel_dpt_destroy(struct i915_address_space *vm) { return; -}
\ No newline at end of file +} diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c index 0f11a39333..b3d3c065dd 100644 --- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c +++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c @@ -3,32 +3,254 @@ * Copyright 2023, Intel Corporation. */ -#include "i915_drv.h" +#include <drm/drm_print.h> +#include <drm/i915_hdcp_interface.h> +#include <linux/delay.h> + +#include "abi/gsc_command_header_abi.h" #include "intel_hdcp_gsc.h" +#include "intel_hdcp_gsc_message.h" +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_device_types.h" +#include "xe_gsc_proxy.h" +#include "xe_gsc_submit.h" +#include "xe_gt.h" +#include "xe_map.h" +#include "xe_pm.h" +#include "xe_uc_fw.h" + +#define HECI_MEADDRESS_HDCP 18 + +struct intel_hdcp_gsc_message { + struct xe_bo *hdcp_bo; + u64 hdcp_cmd_in; + u64 hdcp_cmd_out; +}; -bool intel_hdcp_gsc_cs_required(struct drm_i915_private *i915) +#define HDCP_GSC_HEADER_SIZE sizeof(struct intel_gsc_mtl_header) + +bool intel_hdcp_gsc_cs_required(struct xe_device *xe) { - return true; + return DISPLAY_VER(xe) >= 14; } -bool intel_hdcp_gsc_check_status(struct drm_i915_private *i915) +bool intel_hdcp_gsc_check_status(struct xe_device *xe) { - return false; + struct xe_tile *tile = xe_device_get_root_tile(xe); + struct xe_gt *gt = tile->media_gt; + bool ret = true; + + if (!xe_uc_fw_is_enabled(>->uc.gsc.fw)) + return false; + + xe_pm_runtime_get(xe); + if (xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC)) { + drm_dbg_kms(&xe->drm, + "failed to get forcewake to check proxy status\n"); + ret = false; + goto out; + } + + if (!xe_gsc_proxy_init_done(>->uc.gsc)) + ret = false; + + xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC); +out: + xe_pm_runtime_put(xe); + return ret; } -int intel_hdcp_gsc_init(struct drm_i915_private *i915) +/*This function helps allocate memory for the command that we will send to gsc cs */ +static int intel_hdcp_gsc_initialize_message(struct xe_device *xe, + struct intel_hdcp_gsc_message *hdcp_message) { - drm_info(&i915->drm, "HDCP support not yet implemented\n"); - return -ENODEV; + struct xe_bo *bo = NULL; + u64 cmd_in, cmd_out; + int ret = 0; + + /* allocate object of two page for HDCP command memory and store it */ + bo = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe), NULL, PAGE_SIZE * 2, + ttm_bo_type_kernel, + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT); + + if (IS_ERR(bo)) { + drm_err(&xe->drm, "Failed to allocate bo for HDCP streaming command!\n"); + ret = PTR_ERR(bo); + goto out; + } + + cmd_in = xe_bo_ggtt_addr(bo); + cmd_out = cmd_in + PAGE_SIZE; + xe_map_memset(xe, &bo->vmap, 0, 0, bo->size); + + hdcp_message->hdcp_bo = bo; + hdcp_message->hdcp_cmd_in = cmd_in; + hdcp_message->hdcp_cmd_out = cmd_out; +out: + return ret; } -void intel_hdcp_gsc_fini(struct drm_i915_private *i915) +static int intel_hdcp_gsc_hdcp2_init(struct xe_device *xe) { + struct intel_hdcp_gsc_message *hdcp_message; + int ret; + + hdcp_message = kzalloc(sizeof(*hdcp_message), GFP_KERNEL); + + if (!hdcp_message) + return -ENOMEM; + + /* + * NOTE: No need to lock the comp mutex here as it is already + * going to be taken before this function called + */ + ret = intel_hdcp_gsc_initialize_message(xe, hdcp_message); + if (ret) { + drm_err(&xe->drm, "Could not initialize hdcp_message\n"); + kfree(hdcp_message); + return ret; + } + + xe->display.hdcp.hdcp_message = hdcp_message; + return ret; } -ssize_t intel_hdcp_gsc_msg_send(struct drm_i915_private *i915, u8 *msg_in, +static const struct i915_hdcp_ops gsc_hdcp_ops = { + .initiate_hdcp2_session = intel_hdcp_gsc_initiate_session, + .verify_receiver_cert_prepare_km = + intel_hdcp_gsc_verify_receiver_cert_prepare_km, + .verify_hprime = intel_hdcp_gsc_verify_hprime, + .store_pairing_info = intel_hdcp_gsc_store_pairing_info, + .initiate_locality_check = intel_hdcp_gsc_initiate_locality_check, + .verify_lprime = intel_hdcp_gsc_verify_lprime, + .get_session_key = intel_hdcp_gsc_get_session_key, + .repeater_check_flow_prepare_ack = + intel_hdcp_gsc_repeater_check_flow_prepare_ack, + .verify_mprime = intel_hdcp_gsc_verify_mprime, + .enable_hdcp_authentication = intel_hdcp_gsc_enable_authentication, + .close_hdcp_session = intel_hdcp_gsc_close_session, +}; + +int intel_hdcp_gsc_init(struct xe_device *xe) +{ + struct i915_hdcp_arbiter *data; + int ret; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + mutex_lock(&xe->display.hdcp.hdcp_mutex); + xe->display.hdcp.arbiter = data; + xe->display.hdcp.arbiter->hdcp_dev = xe->drm.dev; + xe->display.hdcp.arbiter->ops = &gsc_hdcp_ops; + ret = intel_hdcp_gsc_hdcp2_init(xe); + if (ret) + kfree(data); + + mutex_unlock(&xe->display.hdcp.hdcp_mutex); + + return ret; +} + +void intel_hdcp_gsc_fini(struct xe_device *xe) +{ + struct intel_hdcp_gsc_message *hdcp_message = + xe->display.hdcp.hdcp_message; + struct i915_hdcp_arbiter *arb = xe->display.hdcp.arbiter; + + if (hdcp_message) { + xe_bo_unpin_map_no_vm(hdcp_message->hdcp_bo); + kfree(hdcp_message); + xe->display.hdcp.hdcp_message = NULL; + } + + kfree(arb); + xe->display.hdcp.arbiter = NULL; +} + +static int xe_gsc_send_sync(struct xe_device *xe, + struct intel_hdcp_gsc_message *hdcp_message, + u32 msg_size_in, u32 msg_size_out, + u32 addr_out_off) +{ + struct xe_gt *gt = hdcp_message->hdcp_bo->tile->media_gt; + struct iosys_map *map = &hdcp_message->hdcp_bo->vmap; + struct xe_gsc *gsc = >->uc.gsc; + int ret; + + ret = xe_gsc_pkt_submit_kernel(gsc, hdcp_message->hdcp_cmd_in, msg_size_in, + hdcp_message->hdcp_cmd_out, msg_size_out); + if (ret) { + drm_err(&xe->drm, "failed to send gsc HDCP msg (%d)\n", ret); + return ret; + } + + if (xe_gsc_check_and_update_pending(xe, map, 0, map, addr_out_off)) + return -EAGAIN; + + ret = xe_gsc_read_out_header(xe, map, addr_out_off, + sizeof(struct hdcp_cmd_header), NULL); + + return ret; +} + +ssize_t intel_hdcp_gsc_msg_send(struct xe_device *xe, u8 *msg_in, size_t msg_in_len, u8 *msg_out, size_t msg_out_len) { - return -ENODEV; + const size_t max_msg_size = PAGE_SIZE - HDCP_GSC_HEADER_SIZE; + struct intel_hdcp_gsc_message *hdcp_message; + u64 host_session_id; + u32 msg_size_in, msg_size_out; + u32 addr_out_off, addr_in_wr_off = 0; + int ret, tries = 0; + + if (msg_in_len > max_msg_size || msg_out_len > max_msg_size) { + ret = -ENOSPC; + goto out; + } + + msg_size_in = msg_in_len + HDCP_GSC_HEADER_SIZE; + msg_size_out = msg_out_len + HDCP_GSC_HEADER_SIZE; + hdcp_message = xe->display.hdcp.hdcp_message; + addr_out_off = PAGE_SIZE; + + host_session_id = xe_gsc_create_host_session_id(); + xe_pm_runtime_get_noresume(xe); + addr_in_wr_off = xe_gsc_emit_header(xe, &hdcp_message->hdcp_bo->vmap, + addr_in_wr_off, HECI_MEADDRESS_HDCP, + host_session_id, msg_in_len); + xe_map_memcpy_to(xe, &hdcp_message->hdcp_bo->vmap, addr_in_wr_off, + msg_in, msg_in_len); + /* + * Keep sending request in case the pending bit is set no need to add + * message handle as we are using same address hence loc. of header is + * same and it will contain the message handle. we will send the message + * 20 times each message 50 ms apart + */ + do { + ret = xe_gsc_send_sync(xe, hdcp_message, msg_size_in, msg_size_out, + addr_out_off); + + /* Only try again if gsc says so */ + if (ret != -EAGAIN) + break; + + msleep(50); + + } while (++tries < 20); + + if (ret) + goto out; + + xe_map_memcpy_from(xe, msg_out, &hdcp_message->hdcp_bo->vmap, + addr_out_off + HDCP_GSC_HEADER_SIZE, + msg_out_len); + +out: + xe_pm_runtime_put(xe); + return ret; } diff --git a/drivers/gpu/drm/xe/display/xe_plane_initial.c b/drivers/gpu/drm/xe/display/xe_plane_initial.c index 866d1dd6ee..9693c56d38 100644 --- a/drivers/gpu/drm/xe/display/xe_plane_initial.c +++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c @@ -6,6 +6,7 @@ /* for ioread64 */ #include <linux/io-64-nonatomic-lo-hi.h> +#include "regs/xe_gtt_defs.h" #include "xe_ggtt.h" #include "i915_drv.h" @@ -62,7 +63,7 @@ initial_plane_bo(struct xe_device *xe, if (plane_config->size == 0) return NULL; - flags = XE_BO_CREATE_PINNED_BIT | XE_BO_SCANOUT_BIT | XE_BO_CREATE_GGTT_BIT; + flags = XE_BO_FLAG_PINNED | XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT; base = round_down(plane_config->base, page_size); if (IS_DGFX(xe)) { @@ -79,7 +80,7 @@ initial_plane_bo(struct xe_device *xe, } phys_base = pte & ~(page_size - 1); - flags |= XE_BO_CREATE_VRAM0_BIT; + flags |= XE_BO_FLAG_VRAM0; /* * We don't currently expect this to ever be placed in the @@ -101,7 +102,7 @@ initial_plane_bo(struct xe_device *xe, if (!stolen) return NULL; phys_base = base; - flags |= XE_BO_CREATE_STOLEN_BIT; + flags |= XE_BO_FLAG_STOLEN; /* * If the FB is too big, just don't use it since fbdev is not very diff --git a/drivers/gpu/drm/xe/instructions/xe_gfx_state_commands.h b/drivers/gpu/drm/xe/instructions/xe_gfx_state_commands.h new file mode 100644 index 0000000000..dca62af5a5 --- /dev/null +++ b/drivers/gpu/drm/xe/instructions/xe_gfx_state_commands.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _XE_GFX_STATE_COMMANDS_H_ +#define _XE_GFX_STATE_COMMANDS_H_ + +#include "instructions/xe_instr_defs.h" + +#define GFX_STATE_OPCODE REG_GENMASK(28, 26) + +#define GFX_STATE_CMD(opcode) \ + (XE_INSTR_GFX_STATE | REG_FIELD_PREP(GFX_STATE_OPCODE, opcode)) + +#define STATE_WRITE_INLINE GFX_STATE_CMD(0x0) + +#endif diff --git a/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h b/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h index 8e6dd061f2..31d28a67ef 100644 --- a/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_gfxpipe_commands.h @@ -47,6 +47,8 @@ #define GPGPU_CSR_BASE_ADDRESS GFXPIPE_COMMON_CMD(0x1, 0x4) #define STATE_COMPUTE_MODE GFXPIPE_COMMON_CMD(0x1, 0x5) #define CMD_3DSTATE_BTD GFXPIPE_COMMON_CMD(0x1, 0x6) +#define STATE_SYSTEM_MEM_FENCE_ADDRESS GFXPIPE_COMMON_CMD(0x1, 0x9) +#define STATE_CONTEXT_DATA_BASE_ADDRESS GFXPIPE_COMMON_CMD(0x1, 0xB) #define CMD_3DSTATE_VF_STATISTICS GFXPIPE_SINGLE_DW_CMD(0x0, 0xB) @@ -71,6 +73,7 @@ #define CMD_3DSTATE_WM GFXPIPE_3D_CMD(0x0, 0x14) #define CMD_3DSTATE_CONSTANT_VS GFXPIPE_3D_CMD(0x0, 0x15) #define CMD_3DSTATE_CONSTANT_GS GFXPIPE_3D_CMD(0x0, 0x16) +#define CMD_3DSTATE_CONSTANT_PS GFXPIPE_3D_CMD(0x0, 0x17) #define CMD_3DSTATE_SAMPLE_MASK GFXPIPE_3D_CMD(0x0, 0x18) #define CMD_3DSTATE_CONSTANT_HS GFXPIPE_3D_CMD(0x0, 0x19) #define CMD_3DSTATE_CONSTANT_DS GFXPIPE_3D_CMD(0x0, 0x1A) diff --git a/drivers/gpu/drm/xe/instructions/xe_instr_defs.h b/drivers/gpu/drm/xe/instructions/xe_instr_defs.h index 04179b2a48..fd2ce7ace5 100644 --- a/drivers/gpu/drm/xe/instructions/xe_instr_defs.h +++ b/drivers/gpu/drm/xe/instructions/xe_instr_defs.h @@ -17,6 +17,7 @@ #define XE_INSTR_MI REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x0) #define XE_INSTR_GSC REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x2) #define XE_INSTR_GFXPIPE REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x3) +#define XE_INSTR_GFX_STATE REG_FIELD_PREP(XE_INSTR_CMD_TYPE, 0x4) /* * Most (but not all) instructions have a "length" field in the instruction diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index deddc8be48..af71b87d80 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -104,9 +104,6 @@ #define FF_SLICE_CS_CHICKEN1(base) XE_REG((base) + 0xe0, XE_REG_OPTION_MASKED) #define FFSC_PERCTX_PREEMPT_CTRL REG_BIT(14) -#define FF_SLICE_CS_CHICKEN2(base) XE_REG((base) + 0xe4, XE_REG_OPTION_MASKED) -#define PERF_FIX_BALANCING_CFE_DISABLE REG_BIT(15) - #define CS_DEBUG_MODE1(base) XE_REG((base) + 0xec, XE_REG_OPTION_MASKED) #define FF_DOP_CLOCK_GATE_DISABLE REG_BIT(1) #define REPLAY_MODE_GRANULARITY REG_BIT(0) diff --git a/drivers/gpu/drm/xe/regs/xe_gsc_regs.h b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h index 9886ec9cb0..e2a925be13 100644 --- a/drivers/gpu/drm/xe/regs/xe_gsc_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h @@ -38,4 +38,11 @@ #define HECI_H_GS1(base) XE_REG((base) + 0xc4c) #define HECI_H_GS1_ER_PREP REG_BIT(0) +#define GSCI_TIMER_STATUS XE_REG(0x11ca28) +#define GSCI_TIMER_STATUS_VALUE REG_GENMASK(1, 0) +#define GSCI_TIMER_STATUS_RESET_IN_PROGRESS 0 +#define GSCI_TIMER_STATUS_TIMER_EXPIRED 1 +#define GSCI_TIMER_STATUS_RESET_COMPLETE 2 +#define GSCI_TIMER_STATUS_OUT_OF_RESET 3 + #endif diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 15ac2d284d..94445810cc 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -69,10 +69,14 @@ #define XEHP_TILE_ADDR_RANGE(_idx) XE_REG_MCR(0x4900 + (_idx) * 4) #define XEHP_FLAT_CCS_BASE_ADDR XE_REG_MCR(0x4910) +#define XEHP_FLAT_CCS_PTR REG_GENMASK(31, 8) #define WM_CHICKEN3 XE_REG_MCR(0x5588, XE_REG_OPTION_MASKED) #define HIZ_PLANE_COMPRESSION_DIS REG_BIT(10) +#define CHICKEN_RASTER_1 XE_REG_MCR(0x6204, XE_REG_OPTION_MASKED) +#define DIS_SF_ROUND_NEAREST_EVEN REG_BIT(8) + #define CHICKEN_RASTER_2 XE_REG_MCR(0x6208, XE_REG_OPTION_MASKED) #define TBIMR_FAST_CLIP REG_BIT(5) @@ -97,7 +101,8 @@ #define CACHE_MODE_1 XE_REG(0x7004, XE_REG_OPTION_MASKED) #define MSAA_OPTIMIZATION_REDUC_DISABLE REG_BIT(11) -#define COMMON_SLICE_CHICKEN1 XE_REG(0x7010) +#define COMMON_SLICE_CHICKEN1 XE_REG(0x7010, XE_REG_OPTION_MASKED) +#define DISABLE_BOTTOM_CLIP_RECTANGLE_TEST REG_BIT(14) #define HIZ_CHICKEN XE_REG(0x7018, XE_REG_OPTION_MASKED) #define DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE REG_BIT(14) @@ -141,6 +146,10 @@ #define XE2_FLAT_CCS_BASE_RANGE_LOWER XE_REG_MCR(0x8800) #define XE2_FLAT_CCS_ENABLE REG_BIT(0) +#define XE2_FLAT_CCS_BASE_LOWER_ADDR_MASK REG_GENMASK(31, 6) + +#define XE2_FLAT_CCS_BASE_RANGE_UPPER XE_REG_MCR(0x8804) +#define XE2_FLAT_CCS_BASE_UPPER_ADDR_MASK REG_GENMASK(7, 0) #define GSCPSMI_BASE XE_REG(0x880c) @@ -156,7 +165,10 @@ #define MIRROR_FUSE3 XE_REG(0x9118) #define XE2_NODE_ENABLE_MASK REG_GENMASK(31, 16) #define L3BANK_PAIR_COUNT 4 +#define XEHPC_GT_L3_MODE_MASK REG_GENMASK(7, 4) +#define XE2_GT_L3_MODE_MASK REG_GENMASK(7, 4) #define L3BANK_MASK REG_GENMASK(3, 0) +#define XELP_GT_L3_MODE_MASK REG_GENMASK(7, 0) /* on Xe_HP the same fuses indicates mslices instead of L3 banks */ #define MAX_MSLICES 4 #define MEML3_EN_MASK REG_GENMASK(3, 0) @@ -271,6 +283,10 @@ #define FORCEWAKE_GT XE_REG(0xa188) #define PG_ENABLE XE_REG(0xa210) +#define VD2_MFXVDENC_POWERGATE_ENABLE REG_BIT(8) +#define VD2_HCP_POWERGATE_ENABLE REG_BIT(7) +#define VD0_MFXVDENC_POWERGATE_ENABLE REG_BIT(4) +#define VD0_HCP_POWERGATE_ENABLE REG_BIT(3) #define CTC_MODE XE_REG(0xa26c) #define CTC_SHIFT_PARAMETER_MASK REG_GENMASK(2, 1) @@ -349,6 +365,7 @@ #define THREAD_EX_ARB_MODE_RR_AFTER_DEP REG_FIELD_PREP(THREAD_EX_ARB_MODE, 0x2) #define ROW_CHICKEN3 XE_REG_MCR(0xe49c, XE_REG_OPTION_MASKED) +#define XE2_EUPEND_CHK_FLUSH_DIS REG_BIT(14) #define DIS_FIX_EOT1_FLUSH REG_BIT(9) #define TDL_TSL_CHICKEN XE_REG_MCR(0xe4c4, XE_REG_OPTION_MASKED) @@ -364,17 +381,22 @@ #define DISABLE_EARLY_READ REG_BIT(14) #define ENABLE_LARGE_GRF_MODE REG_BIT(12) #define PUSH_CONST_DEREF_HOLD_DIS REG_BIT(8) +#define DISABLE_TDL_SVHS_GATING REG_BIT(1) #define DISABLE_DOP_GATING REG_BIT(0) #define RT_CTRL XE_REG_MCR(0xe530) #define DIS_NULL_QUERY REG_BIT(10) +#define EU_SYSTOLIC_LIC_THROTTLE_CTL_WITH_LOCK XE_REG_MCR(0xe534) +#define EU_SYSTOLIC_LIC_THROTTLE_CTL_LOCK_BIT REG_BIT(31) + #define XEHP_HDC_CHICKEN0 XE_REG_MCR(0xe5f0, XE_REG_OPTION_MASKED) #define LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK REG_GENMASK(13, 11) #define DIS_ATOMIC_CHAINING_TYPED_WRITES REG_BIT(3) #define LSC_CHICKEN_BIT_0 XE_REG_MCR(0xe7c8) #define DISABLE_D8_D16_COASLESCE REG_BIT(30) +#define WR_REQ_CHAINING_DIS REG_BIT(26) #define TGM_WRITE_EOM_FORCE REG_BIT(17) #define FORCE_1_SUB_MESSAGE_PER_FRAGMENT REG_BIT(15) #define SEQUENTIAL_ACCESS_UPGRADE_DISABLE REG_BIT(13) @@ -439,7 +461,13 @@ #define GT_PERF_STATUS XE_REG(0x1381b4) #define VOLTAGE_MASK REG_GENMASK(10, 0) -#define GT_INTR_DW(x) XE_REG(0x190018 + ((x) * 4)) +/* + * Note: Interrupt registers 1900xx are VF accessible only until version 12.50. + * On newer platforms, VFs are using memory-based interrupts instead. + * However, for simplicity we keep this XE_REG_OPTION_VF tag intact. + */ + +#define GT_INTR_DW(x) XE_REG(0x190018 + ((x) * 4), XE_REG_OPTION_VF) #define INTR_GSC REG_BIT(31) #define INTR_GUC REG_BIT(25) #define INTR_MGUC REG_BIT(24) @@ -450,16 +478,16 @@ #define INTR_VECS(x) REG_BIT(31 - (x)) #define INTR_VCS(x) REG_BIT(x) -#define RENDER_COPY_INTR_ENABLE XE_REG(0x190030) -#define VCS_VECS_INTR_ENABLE XE_REG(0x190034) -#define GUC_SG_INTR_ENABLE XE_REG(0x190038) +#define RENDER_COPY_INTR_ENABLE XE_REG(0x190030, XE_REG_OPTION_VF) +#define VCS_VECS_INTR_ENABLE XE_REG(0x190034, XE_REG_OPTION_VF) +#define GUC_SG_INTR_ENABLE XE_REG(0x190038, XE_REG_OPTION_VF) #define ENGINE1_MASK REG_GENMASK(31, 16) #define ENGINE0_MASK REG_GENMASK(15, 0) -#define GPM_WGBOXPERF_INTR_ENABLE XE_REG(0x19003c) -#define GUNIT_GSC_INTR_ENABLE XE_REG(0x190044) -#define CCS_RSVD_INTR_ENABLE XE_REG(0x190048) +#define GPM_WGBOXPERF_INTR_ENABLE XE_REG(0x19003c, XE_REG_OPTION_VF) +#define GUNIT_GSC_INTR_ENABLE XE_REG(0x190044, XE_REG_OPTION_VF) +#define CCS_RSVD_INTR_ENABLE XE_REG(0x190048, XE_REG_OPTION_VF) -#define INTR_IDENTITY_REG(x) XE_REG(0x190060 + ((x) * 4)) +#define INTR_IDENTITY_REG(x) XE_REG(0x190060 + ((x) * 4), XE_REG_OPTION_VF) #define INTR_DATA_VALID REG_BIT(31) #define INTR_ENGINE_INSTANCE(x) REG_FIELD_GET(GENMASK(25, 20), x) #define INTR_ENGINE_CLASS(x) REG_FIELD_GET(GENMASK(18, 16), x) @@ -468,16 +496,16 @@ #define OTHER_GSC_HECI2_INSTANCE 3 #define OTHER_GSC_INSTANCE 6 -#define IIR_REG_SELECTOR(x) XE_REG(0x190070 + ((x) * 4)) -#define RCS0_RSVD_INTR_MASK XE_REG(0x190090) -#define BCS_RSVD_INTR_MASK XE_REG(0x1900a0) -#define VCS0_VCS1_INTR_MASK XE_REG(0x1900a8) -#define VCS2_VCS3_INTR_MASK XE_REG(0x1900ac) -#define VECS0_VECS1_INTR_MASK XE_REG(0x1900d0) +#define IIR_REG_SELECTOR(x) XE_REG(0x190070 + ((x) * 4), XE_REG_OPTION_VF) +#define RCS0_RSVD_INTR_MASK XE_REG(0x190090, XE_REG_OPTION_VF) +#define BCS_RSVD_INTR_MASK XE_REG(0x1900a0, XE_REG_OPTION_VF) +#define VCS0_VCS1_INTR_MASK XE_REG(0x1900a8, XE_REG_OPTION_VF) +#define VCS2_VCS3_INTR_MASK XE_REG(0x1900ac, XE_REG_OPTION_VF) +#define VECS0_VECS1_INTR_MASK XE_REG(0x1900d0, XE_REG_OPTION_VF) #define HECI2_RSVD_INTR_MASK XE_REG(0x1900e4) -#define GUC_SG_INTR_MASK XE_REG(0x1900e8) -#define GPM_WGBOXPERF_INTR_MASK XE_REG(0x1900ec) -#define GUNIT_GSC_INTR_MASK XE_REG(0x1900f4) +#define GUC_SG_INTR_MASK XE_REG(0x1900e8, XE_REG_OPTION_VF) +#define GPM_WGBOXPERF_INTR_MASK XE_REG(0x1900ec, XE_REG_OPTION_VF) +#define GUNIT_GSC_INTR_MASK XE_REG(0x1900f4, XE_REG_OPTION_VF) #define CCS0_CCS1_INTR_MASK XE_REG(0x190100) #define CCS2_CCS3_INTR_MASK XE_REG(0x190104) #define XEHPC_BCS1_BCS2_INTR_MASK XE_REG(0x190110) @@ -486,6 +514,7 @@ #define XEHPC_BCS7_BCS8_INTR_MASK XE_REG(0x19011c) #define GT_WAIT_SEMAPHORE_INTERRUPT REG_BIT(11) #define GT_CONTEXT_SWITCH_INTERRUPT REG_BIT(8) +#define GSC_ER_COMPLETE REG_BIT(5) #define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT REG_BIT(4) #define GT_CS_MASTER_ERROR_INTERRUPT REG_BIT(3) #define GT_RENDER_USER_INTERRUPT REG_BIT(0) diff --git a/drivers/gpu/drm/xe/regs/xe_gtt_defs.h b/drivers/gpu/drm/xe/regs/xe_gtt_defs.h new file mode 100644 index 0000000000..4389e5a76f --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_gtt_defs.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _XE_GTT_DEFS_H_ +#define _XE_GTT_DEFS_H_ + +#define XELPG_GGTT_PTE_PAT0 BIT_ULL(52) +#define XELPG_GGTT_PTE_PAT1 BIT_ULL(53) + +#define GGTT_PTE_VFID GENMASK_ULL(11, 2) + +#define GUC_GGTT_TOP 0xFEE00000 + +#define XELPG_PPGTT_PTE_PAT3 BIT_ULL(62) +#define XE2_PPGTT_PTE_PAT4 BIT_ULL(61) +#define XE_PPGTT_PDE_PDPE_PAT2 BIT_ULL(12) +#define XE_PPGTT_PTE_PAT2 BIT_ULL(7) +#define XE_PPGTT_PTE_PAT1 BIT_ULL(4) +#define XE_PPGTT_PTE_PAT0 BIT_ULL(3) + +#define XE_PDE_PS_2M BIT_ULL(7) +#define XE_PDPE_PS_1G BIT_ULL(7) +#define XE_PDE_IPS_64K BIT_ULL(11) + +#define XE_GGTT_PTE_DM BIT_ULL(1) +#define XE_USM_PPGTT_PTE_AE BIT_ULL(10) +#define XE_PPGTT_PTE_DM BIT_ULL(11) +#define XE_PDE_64K BIT_ULL(6) +#define XE_PTE_PS64 BIT_ULL(8) +#define XE_PTE_NULL BIT_ULL(9) + +#define XE_PAGE_PRESENT BIT_ULL(0) +#define XE_PAGE_RW BIT_ULL(1) + +#endif diff --git a/drivers/gpu/drm/xe/regs/xe_guc_regs.h b/drivers/gpu/drm/xe/regs/xe_guc_regs.h index 92320bbc9d..11682e675e 100644 --- a/drivers/gpu/drm/xe/regs/xe_guc_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_guc_regs.h @@ -100,16 +100,23 @@ #define GT_PM_CONFIG XE_REG(0x13816c) #define GT_DOORBELL_ENABLE REG_BIT(0) -#define GUC_HOST_INTERRUPT XE_REG(0x1901f0) +#define GUC_HOST_INTERRUPT XE_REG(0x1901f0, XE_REG_OPTION_VF) -#define VF_SW_FLAG(n) XE_REG(0x190240 + (n) * 4) +#define VF_SW_FLAG(n) XE_REG(0x190240 + (n) * 4, XE_REG_OPTION_VF) #define VF_SW_FLAG_COUNT 4 -#define MED_GUC_HOST_INTERRUPT XE_REG(0x190304) +#define MED_GUC_HOST_INTERRUPT XE_REG(0x190304, XE_REG_OPTION_VF) -#define MED_VF_SW_FLAG(n) XE_REG(0x190310 + (n) * 4) +#define MED_VF_SW_FLAG(n) XE_REG(0x190310 + (n) * 4, XE_REG_OPTION_VF) #define MED_VF_SW_FLAG_COUNT 4 +#define GUC_TLB_INV_CR XE_REG(0xcee8) +#define GUC_TLB_INV_CR_INVALIDATE REG_BIT(0) +#define PVC_GUC_TLB_INV_DESC0 XE_REG(0xcf7c) +#define PVC_GUC_TLB_INV_DESC0_VALID REG_BIT(0) +#define PVC_GUC_TLB_INV_DESC1 XE_REG(0xcf80) +#define PVC_GUC_TLB_INV_DESC1_INVALIDATE REG_BIT(6) + /* GuC Interrupt Vector */ #define GUC_INTR_GUC2HOST REG_BIT(15) #define GUC_INTR_EXEC_ERROR REG_BIT(14) diff --git a/drivers/gpu/drm/xe/regs/xe_reg_defs.h b/drivers/gpu/drm/xe/regs/xe_reg_defs.h index c50e7650c0..23f7dc5bbe 100644 --- a/drivers/gpu/drm/xe/regs/xe_reg_defs.h +++ b/drivers/gpu/drm/xe/regs/xe_reg_defs.h @@ -6,6 +6,8 @@ #ifndef _XE_REG_DEFS_H_ #define _XE_REG_DEFS_H_ +#include <linux/build_bug.h> + #include "compat-i915-headers/i915_reg_defs.h" /** @@ -36,6 +38,10 @@ struct xe_reg { */ u32 mcr:1; /** + * @vf: register is accessible from the Virtual Function. + */ + u32 vf:1; + /** * @ext: access MMIO extension space for current register. */ u32 ext:1; @@ -44,6 +50,7 @@ struct xe_reg { u32 raw; }; }; +static_assert(sizeof(struct xe_reg) == sizeof(u32)); /** * struct xe_reg_mcr - MCR register definition @@ -76,6 +83,13 @@ struct xe_reg_mcr { #define XE_REG_OPTION_MASKED .masked = 1 /** + * XE_REG_OPTION_VF - Register is "VF" accessible. + * + * To be used with XE_REG() and XE_REG_INITIALIZER(). + */ +#define XE_REG_OPTION_VF .vf = 1 + +/** * XE_REG_INITIALIZER - Initializer for xe_reg_t. * @r_: Register offset * @...: Additional options like access mode. See struct xe_reg for available @@ -117,4 +131,9 @@ struct xe_reg_mcr { .__reg = XE_REG_INITIALIZER(r_, ##__VA_ARGS__, .mcr = 1) \ }) +static inline bool xe_reg_is_valid(struct xe_reg r) +{ + return r.addr; +} + #endif diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h index 2c214bb9b6..722fb6dbb7 100644 --- a/drivers/gpu/drm/xe/regs/xe_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_regs.h @@ -57,7 +57,7 @@ #define DG1_MSTR_IRQ REG_BIT(31) #define DG1_MSTR_TILE(t) REG_BIT(t) -#define GFX_MSTR_IRQ XE_REG(0x190010) +#define GFX_MSTR_IRQ XE_REG(0x190010, XE_REG_OPTION_VF) #define MASTER_IRQ REG_BIT(31) #define GU_MISC_IRQ REG_BIT(29) #define DISPLAY_IRQ REG_BIT(16) diff --git a/drivers/gpu/drm/xe/regs/xe_sriov_regs.h b/drivers/gpu/drm/xe/regs/xe_sriov_regs.h index 58a4e0fad1..617ddb84b7 100644 --- a/drivers/gpu/drm/xe/regs/xe_sriov_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_sriov_regs.h @@ -14,4 +14,7 @@ #define LMEM_EN REG_BIT(31) #define LMTT_DIR_PTR REG_GENMASK(30, 0) /* in multiples of 64KB */ +#define VF_CAP_REG XE_REG(0x1901f8, XE_REG_OPTION_VF) +#define VF_CAP REG_BIT(0) + #endif diff --git a/drivers/gpu/drm/xe/tests/Makefile b/drivers/gpu/drm/xe/tests/Makefile index 9d1d88af8b..8cf2367449 100644 --- a/drivers/gpu/drm/xe/tests/Makefile +++ b/drivers/gpu/drm/xe/tests/Makefile @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 # "live" kunit tests -obj-$(CONFIG_DRM_XE_KUNIT_TEST) += \ +obj-$(CONFIG_DRM_XE_KUNIT_TEST) += xe_live_test.o +xe_live_test-y = xe_live_test_mod.o \ xe_bo_test.o \ xe_dma_buf_test.o \ xe_migrate_test.o \ diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 3436fd9cf2..9f3c028264 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -116,7 +116,7 @@ static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile, int ret; /* TODO: Sanity check */ - unsigned int bo_flags = XE_BO_CREATE_VRAM_IF_DGFX(tile); + unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile); if (IS_DGFX(xe)) kunit_info(test, "Testing vram id %u\n", tile->id); @@ -163,7 +163,7 @@ static int ccs_test_run_device(struct xe_device *xe) return 0; } - xe_device_mem_access_get(xe); + xe_pm_runtime_get(xe); for_each_tile(tile, xe, id) { /* For igfx run only for primary tile */ @@ -172,7 +172,7 @@ static int ccs_test_run_device(struct xe_device *xe) ccs_test_run_tile(xe, tile, test); } - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); return 0; } @@ -186,7 +186,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_ccs_migrate_kunit); static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struct kunit *test) { struct xe_bo *bo, *external; - unsigned int bo_flags = XE_BO_CREATE_VRAM_IF_DGFX(tile); + unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile); struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->migrate); struct xe_gt *__gt; int err, i, id; @@ -335,12 +335,12 @@ static int evict_test_run_device(struct xe_device *xe) return 0; } - xe_device_mem_access_get(xe); + xe_pm_runtime_get(xe); for_each_tile(tile, xe, id) evict_test_run_tile(xe, tile, test); - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); return 0; } diff --git a/drivers/gpu/drm/xe/tests/xe_bo_test.c b/drivers/gpu/drm/xe/tests/xe_bo_test.c index f408f17f21..a324cde77d 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo_test.c +++ b/drivers/gpu/drm/xe/tests/xe_bo_test.c @@ -19,8 +19,3 @@ static struct kunit_suite xe_bo_test_suite = { }; kunit_test_suite(xe_bo_test_suite); - -MODULE_AUTHOR("Intel Corporation"); -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("xe_bo kunit test"); -MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c index a3d2dd42ad..e7f9b531c4 100644 --- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c @@ -37,14 +37,14 @@ static void check_residency(struct kunit *test, struct xe_bo *exported, xe_bo_assert_held(imported); mem_type = XE_PL_VRAM0; - if (!(params->mem_mask & XE_BO_CREATE_VRAM0_BIT)) + if (!(params->mem_mask & XE_BO_FLAG_VRAM0)) /* No VRAM allowed */ mem_type = XE_PL_TT; else if (params->force_different_devices && !p2p_enabled(params)) /* No P2P */ mem_type = XE_PL_TT; else if (params->force_different_devices && !is_dynamic(params) && - (params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)) + (params->mem_mask & XE_BO_FLAG_SYSTEM)) /* Pin migrated to TT */ mem_type = XE_PL_TT; @@ -94,7 +94,7 @@ static void check_residency(struct kunit *test, struct xe_bo *exported, * possible, saving a migration step as the transfer is just * likely as fast from system memory. */ - if (params->mem_mask & XE_BO_CREATE_SYSTEM_BIT) + if (params->mem_mask & XE_BO_FLAG_SYSTEM) KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, XE_PL_TT)); else KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, mem_type)); @@ -116,17 +116,17 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe) /* No VRAM on this device? */ if (!ttm_manager_type(&xe->ttm, XE_PL_VRAM0) && - (params->mem_mask & XE_BO_CREATE_VRAM0_BIT)) + (params->mem_mask & XE_BO_FLAG_VRAM0)) return; size = PAGE_SIZE; - if ((params->mem_mask & XE_BO_CREATE_VRAM0_BIT) && + if ((params->mem_mask & XE_BO_FLAG_VRAM0) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) size = SZ_64K; kunit_info(test, "running %s\n", __func__); bo = xe_bo_create_user(xe, NULL, NULL, size, DRM_XE_GEM_CPU_CACHING_WC, - ttm_bo_type_device, XE_BO_CREATE_USER_BIT | params->mem_mask); + ttm_bo_type_device, params->mem_mask); if (IS_ERR(bo)) { KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", PTR_ERR(bo)); @@ -149,7 +149,7 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe) */ if (params->force_different_devices && !p2p_enabled(params) && - !(params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)) { + !(params->mem_mask & XE_BO_FLAG_SYSTEM)) { KUNIT_FAIL(test, "xe_gem_prime_import() succeeded when it shouldn't have\n"); } else { @@ -162,7 +162,7 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe) /* Pinning in VRAM is not allowed. */ if (!is_dynamic(params) && params->force_different_devices && - !(params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)) + !(params->mem_mask & XE_BO_FLAG_SYSTEM)) KUNIT_EXPECT_EQ(test, err, -EINVAL); /* Otherwise only expect interrupts or success. */ else if (err && err != -EINTR && err != -ERESTARTSYS) @@ -181,7 +181,7 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe) PTR_ERR(import)); } else if (!params->force_different_devices || p2p_enabled(params) || - (params->mem_mask & XE_BO_CREATE_SYSTEM_BIT)) { + (params->mem_mask & XE_BO_FLAG_SYSTEM)) { /* Shouldn't fail if we can reuse same bo, use p2p or use system */ KUNIT_FAIL(test, "dynamic p2p attachment failed with err=%ld\n", PTR_ERR(import)); @@ -204,52 +204,52 @@ static const struct dma_buf_attach_ops nop2p_attach_ops = { * gem object. */ static const struct dma_buf_test_params test_params[] = { - {.mem_mask = XE_BO_CREATE_VRAM0_BIT, + {.mem_mask = XE_BO_FLAG_VRAM0, .attach_ops = &xe_dma_buf_attach_ops}, - {.mem_mask = XE_BO_CREATE_VRAM0_BIT, + {.mem_mask = XE_BO_FLAG_VRAM0, .attach_ops = &xe_dma_buf_attach_ops, .force_different_devices = true}, - {.mem_mask = XE_BO_CREATE_VRAM0_BIT, + {.mem_mask = XE_BO_FLAG_VRAM0, .attach_ops = &nop2p_attach_ops}, - {.mem_mask = XE_BO_CREATE_VRAM0_BIT, + {.mem_mask = XE_BO_FLAG_VRAM0, .attach_ops = &nop2p_attach_ops, .force_different_devices = true}, - {.mem_mask = XE_BO_CREATE_VRAM0_BIT}, - {.mem_mask = XE_BO_CREATE_VRAM0_BIT, + {.mem_mask = XE_BO_FLAG_VRAM0}, + {.mem_mask = XE_BO_FLAG_VRAM0, .force_different_devices = true}, - {.mem_mask = XE_BO_CREATE_SYSTEM_BIT, + {.mem_mask = XE_BO_FLAG_SYSTEM, .attach_ops = &xe_dma_buf_attach_ops}, - {.mem_mask = XE_BO_CREATE_SYSTEM_BIT, + {.mem_mask = XE_BO_FLAG_SYSTEM, .attach_ops = &xe_dma_buf_attach_ops, .force_different_devices = true}, - {.mem_mask = XE_BO_CREATE_SYSTEM_BIT, + {.mem_mask = XE_BO_FLAG_SYSTEM, .attach_ops = &nop2p_attach_ops}, - {.mem_mask = XE_BO_CREATE_SYSTEM_BIT, + {.mem_mask = XE_BO_FLAG_SYSTEM, .attach_ops = &nop2p_attach_ops, .force_different_devices = true}, - {.mem_mask = XE_BO_CREATE_SYSTEM_BIT}, - {.mem_mask = XE_BO_CREATE_SYSTEM_BIT, + {.mem_mask = XE_BO_FLAG_SYSTEM}, + {.mem_mask = XE_BO_FLAG_SYSTEM, .force_different_devices = true}, - {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT, + {.mem_mask = XE_BO_FLAG_SYSTEM | XE_BO_FLAG_VRAM0, .attach_ops = &xe_dma_buf_attach_ops}, - {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT, + {.mem_mask = XE_BO_FLAG_SYSTEM | XE_BO_FLAG_VRAM0, .attach_ops = &xe_dma_buf_attach_ops, .force_different_devices = true}, - {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT, + {.mem_mask = XE_BO_FLAG_SYSTEM | XE_BO_FLAG_VRAM0, .attach_ops = &nop2p_attach_ops}, - {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT, + {.mem_mask = XE_BO_FLAG_SYSTEM | XE_BO_FLAG_VRAM0, .attach_ops = &nop2p_attach_ops, .force_different_devices = true}, - {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT}, - {.mem_mask = XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_VRAM0_BIT, + {.mem_mask = XE_BO_FLAG_SYSTEM | XE_BO_FLAG_VRAM0}, + {.mem_mask = XE_BO_FLAG_SYSTEM | XE_BO_FLAG_VRAM0, .force_different_devices = true}, {} diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c index 9f5a9cda8c..99cdb718b6 100644 --- a/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf_test.c @@ -18,8 +18,3 @@ static struct kunit_suite xe_dma_buf_test_suite = { }; kunit_test_suite(xe_dma_buf_test_suite); - -MODULE_AUTHOR("Intel Corporation"); -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("xe_dma_buf kunit test"); -MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); diff --git a/drivers/gpu/drm/xe/tests/xe_guc_id_mgr_test.c b/drivers/gpu/drm/xe/tests/xe_guc_id_mgr_test.c new file mode 100644 index 0000000000..ee30a1939e --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_guc_id_mgr_test.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0 AND MIT +/* + * Copyright © 2024 Intel Corporation + */ + +#include <kunit/test.h> + +#include "xe_device.h" +#include "xe_kunit_helpers.h" + +static int guc_id_mgr_test_init(struct kunit *test) +{ + struct xe_guc_id_mgr *idm; + + xe_kunit_helper_xe_device_test_init(test); + idm = &xe_device_get_gt(test->priv, 0)->uc.guc.submission_state.idm; + + mutex_init(idm_mutex(idm)); + test->priv = idm; + return 0; +} + +static void bad_init(struct kunit *test) +{ + struct xe_guc_id_mgr *idm = test->priv; + + KUNIT_EXPECT_EQ(test, -EINVAL, xe_guc_id_mgr_init(idm, 0)); + KUNIT_EXPECT_EQ(test, -ERANGE, xe_guc_id_mgr_init(idm, GUC_ID_MAX + 1)); +} + +static void no_init(struct kunit *test) +{ + struct xe_guc_id_mgr *idm = test->priv; + + mutex_lock(idm_mutex(idm)); + KUNIT_EXPECT_EQ(test, -ENODATA, xe_guc_id_mgr_reserve_locked(idm, 0)); + mutex_unlock(idm_mutex(idm)); + + KUNIT_EXPECT_EQ(test, -ENODATA, xe_guc_id_mgr_reserve(idm, 1, 1)); +} + +static void init_fini(struct kunit *test) +{ + struct xe_guc_id_mgr *idm = test->priv; + + KUNIT_ASSERT_EQ(test, 0, xe_guc_id_mgr_init(idm, -1)); + KUNIT_EXPECT_NOT_NULL(test, idm->bitmap); + KUNIT_EXPECT_EQ(test, idm->total, GUC_ID_MAX); + __fini_idm(NULL, idm); + KUNIT_EXPECT_NULL(test, idm->bitmap); + KUNIT_EXPECT_EQ(test, idm->total, 0); +} + +static void check_used(struct kunit *test) +{ + struct xe_guc_id_mgr *idm = test->priv; + unsigned int n; + + KUNIT_ASSERT_EQ(test, 0, xe_guc_id_mgr_init(idm, 2)); + + mutex_lock(idm_mutex(idm)); + + for (n = 0; n < idm->total; n++) { + kunit_info(test, "n=%u", n); + KUNIT_EXPECT_EQ(test, idm->used, n); + KUNIT_EXPECT_GE(test, idm_reserve_chunk_locked(idm, 1, 0), 0); + KUNIT_EXPECT_EQ(test, idm->used, n + 1); + } + KUNIT_EXPECT_EQ(test, idm->used, idm->total); + idm_release_chunk_locked(idm, 0, idm->used); + KUNIT_EXPECT_EQ(test, idm->used, 0); + + mutex_unlock(idm_mutex(idm)); +} + +static void check_quota(struct kunit *test) +{ + struct xe_guc_id_mgr *idm = test->priv; + unsigned int n; + + KUNIT_ASSERT_EQ(test, 0, xe_guc_id_mgr_init(idm, 2)); + + mutex_lock(idm_mutex(idm)); + + for (n = 0; n < idm->total - 1; n++) { + kunit_info(test, "n=%u", n); + KUNIT_EXPECT_EQ(test, idm_reserve_chunk_locked(idm, 1, idm->total), -EDQUOT); + KUNIT_EXPECT_EQ(test, idm_reserve_chunk_locked(idm, 1, idm->total - n), -EDQUOT); + KUNIT_EXPECT_EQ(test, idm_reserve_chunk_locked(idm, idm->total - n, 1), -EDQUOT); + KUNIT_EXPECT_GE(test, idm_reserve_chunk_locked(idm, 1, 1), 0); + } + KUNIT_EXPECT_LE(test, 0, idm_reserve_chunk_locked(idm, 1, 0)); + KUNIT_EXPECT_EQ(test, idm->used, idm->total); + idm_release_chunk_locked(idm, 0, idm->total); + KUNIT_EXPECT_EQ(test, idm->used, 0); + + mutex_unlock(idm_mutex(idm)); +} + +static void check_all(struct kunit *test) +{ + struct xe_guc_id_mgr *idm = test->priv; + unsigned int n; + + KUNIT_ASSERT_EQ(test, 0, xe_guc_id_mgr_init(idm, -1)); + + mutex_lock(idm_mutex(idm)); + + for (n = 0; n < idm->total; n++) + KUNIT_EXPECT_LE(test, 0, idm_reserve_chunk_locked(idm, 1, 0)); + KUNIT_EXPECT_EQ(test, idm->used, idm->total); + for (n = 0; n < idm->total; n++) + idm_release_chunk_locked(idm, n, 1); + + mutex_unlock(idm_mutex(idm)); +} + +static struct kunit_case guc_id_mgr_test_cases[] = { + KUNIT_CASE(bad_init), + KUNIT_CASE(no_init), + KUNIT_CASE(init_fini), + KUNIT_CASE(check_used), + KUNIT_CASE(check_quota), + KUNIT_CASE_SLOW(check_all), + {} +}; + +static struct kunit_suite guc_id_mgr_suite = { + .name = "guc_idm", + .test_cases = guc_id_mgr_test_cases, + + .init = guc_id_mgr_test_init, + .exit = NULL, +}; + +kunit_test_suites(&guc_id_mgr_suite); diff --git a/drivers/gpu/drm/xe/tests/xe_live_test_mod.c b/drivers/gpu/drm/xe/tests/xe_live_test_mod.c new file mode 100644 index 0000000000..eb1ea99a5a --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_live_test_mod.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright © 2023 Intel Corporation + */ +#include <linux/module.h> + +MODULE_AUTHOR("Intel Corporation"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("xe live kunit tests"); +MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index c347e2c29f..977d5f4e44 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -10,6 +10,7 @@ #include "tests/xe_pci_test.h" #include "xe_pci.h" +#include "xe_pm.h" static bool sanity_fence_failed(struct xe_device *xe, struct dma_fence *fence, const char *str, struct kunit *test) @@ -112,7 +113,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, bo->size, ttm_bo_type_kernel, region | - XE_BO_NEEDS_CPU_ACCESS); + XE_BO_FLAG_NEEDS_CPU_ACCESS); if (IS_ERR(remote)) { KUNIT_FAIL(test, "Failed to allocate remote bo for %s: %pe\n", str, remote); @@ -190,7 +191,7 @@ out_unlock: static void test_copy_sysmem(struct xe_migrate *m, struct xe_bo *bo, struct kunit *test) { - test_copy(m, bo, test, XE_BO_CREATE_SYSTEM_BIT); + test_copy(m, bo, test, XE_BO_FLAG_SYSTEM); } static void test_copy_vram(struct xe_migrate *m, struct xe_bo *bo, @@ -202,9 +203,9 @@ static void test_copy_vram(struct xe_migrate *m, struct xe_bo *bo, return; if (bo->ttm.resource->mem_type == XE_PL_VRAM0) - region = XE_BO_CREATE_VRAM1_BIT; + region = XE_BO_FLAG_VRAM1; else - region = XE_BO_CREATE_VRAM0_BIT; + region = XE_BO_FLAG_VRAM0; test_copy(m, bo, test, region); } @@ -280,8 +281,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) big = xe_bo_create_pin_map(xe, tile, m->q->vm, SZ_4M, ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(tile) | - XE_BO_CREATE_PINNED_BIT); + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_PINNED); if (IS_ERR(big)) { KUNIT_FAIL(test, "Failed to allocate bo: %li\n", PTR_ERR(big)); goto vunmap; @@ -289,8 +290,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) pt = xe_bo_create_pin_map(xe, tile, m->q->vm, XE_PAGE_SIZE, ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(tile) | - XE_BO_CREATE_PINNED_BIT); + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_PINNED); if (IS_ERR(pt)) { KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n", PTR_ERR(pt)); @@ -300,8 +301,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) tiny = xe_bo_create_pin_map(xe, tile, m->q->vm, 2 * SZ_4K, ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(tile) | - XE_BO_CREATE_PINNED_BIT); + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_PINNED); if (IS_ERR(tiny)) { KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n", PTR_ERR(pt)); @@ -423,17 +424,19 @@ static int migrate_test_run_device(struct xe_device *xe) struct xe_tile *tile; int id; + xe_pm_runtime_get(xe); + for_each_tile(tile, xe, id) { struct xe_migrate *m = tile->migrate; kunit_info(test, "Testing tile id %d.\n", id); xe_vm_lock(m->q->vm, true); - xe_device_mem_access_get(xe); xe_migrate_sanity_test(m, test); - xe_device_mem_access_put(xe); xe_vm_unlock(m->q->vm); } + xe_pm_runtime_put(xe); + return 0; } diff --git a/drivers/gpu/drm/xe/tests/xe_migrate_test.c b/drivers/gpu/drm/xe/tests/xe_migrate_test.c index cf0c173b94..eb0d896341 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate_test.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate_test.c @@ -18,8 +18,3 @@ static struct kunit_suite xe_migrate_test_suite = { }; kunit_test_suite(xe_migrate_test_suite); - -MODULE_AUTHOR("Intel Corporation"); -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("xe_migrate kunit test"); -MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c index df5c36b70a..1b8617075b 100644 --- a/drivers/gpu/drm/xe/tests/xe_mocs.c +++ b/drivers/gpu/drm/xe/tests/xe_mocs.c @@ -10,10 +10,11 @@ #include "tests/xe_pci_test.h" #include "tests/xe_test.h" -#include "xe_pci.h" +#include "xe_device.h" #include "xe_gt.h" #include "xe_mocs.h" -#include "xe_device.h" +#include "xe_pci.h" +#include "xe_pm.h" struct live_mocs { struct xe_mocs_info table; @@ -28,6 +29,8 @@ static int live_mocs_init(struct live_mocs *arg, struct xe_gt *gt) flags = get_mocs_settings(gt_to_xe(gt), &arg->table); + kunit_info(test, "gt %d", gt->info.id); + kunit_info(test, "gt type %d", gt->info.type); kunit_info(test, "table size %d", arg->table.size); kunit_info(test, "table uc_index %d", arg->table.uc_index); kunit_info(test, "table n_entries %d", arg->table.n_entries); @@ -38,69 +41,72 @@ static int live_mocs_init(struct live_mocs *arg, struct xe_gt *gt) static void read_l3cc_table(struct xe_gt *gt, const struct xe_mocs_info *info) { + struct kunit *test = xe_cur_kunit(); + u32 l3cc, l3cc_expected; unsigned int i; - u32 l3cc; u32 reg_val; u32 ret; - struct kunit *test = xe_cur_kunit(); - - xe_device_mem_access_get(gt_to_xe(gt)); ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n"); - mocs_dbg(>_to_xe(gt)->drm, "L3CC entries:%d\n", info->n_entries); - for (i = 0; - i < (info->n_entries + 1) / 2 ? - (l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i), - get_entry_l3cc(info, 2 * i + 1))), 1 : 0; - i++) { - if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250) - reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i)); - else - reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i)); - mocs_dbg(>_to_xe(gt)->drm, "%d 0x%x 0x%x 0x%x\n", i, - XELP_LNCFCMOCS(i).addr, reg_val, l3cc); - if (reg_val != l3cc) - KUNIT_FAIL(test, "l3cc reg 0x%x has incorrect val.\n", - XELP_LNCFCMOCS(i).addr); + + for (i = 0; i < info->n_entries; i++) { + if (!(i & 1)) { + if (regs_are_mcr(gt)) + reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_LNCFCMOCS(i >> 1)); + else + reg_val = xe_mmio_read32(gt, XELP_LNCFCMOCS(i >> 1)); + + mocs_dbg(gt, "reg_val=0x%x\n", reg_val); + } else { + /* Just re-use value read on previous iteration */ + reg_val >>= 16; + } + + l3cc_expected = get_entry_l3cc(info, i); + l3cc = reg_val & 0xffff; + + mocs_dbg(gt, "[%u] expected=0x%x actual=0x%x\n", + i, l3cc_expected, l3cc); + + KUNIT_EXPECT_EQ_MSG(test, l3cc_expected, l3cc, + "l3cc idx=%u has incorrect val.\n", i); } xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); - xe_device_mem_access_put(gt_to_xe(gt)); } static void read_mocs_table(struct xe_gt *gt, const struct xe_mocs_info *info) { - struct xe_device *xe = gt_to_xe(gt); - + struct kunit *test = xe_cur_kunit(); + u32 mocs, mocs_expected; unsigned int i; - u32 mocs; u32 reg_val; u32 ret; - struct kunit *test = xe_cur_kunit(); + KUNIT_EXPECT_TRUE_MSG(test, info->unused_entries_index, + "Unused entries index should have been defined\n"); - xe_device_mem_access_get(gt_to_xe(gt)); ret = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); KUNIT_ASSERT_EQ_MSG(test, ret, 0, "Forcewake Failed.\n"); - mocs_dbg(>_to_xe(gt)->drm, "Global MOCS entries:%d\n", info->n_entries); - drm_WARN_ONCE(&xe->drm, !info->unused_entries_index, - "Unused entries index should have been defined\n"); - for (i = 0; - i < info->n_entries ? (mocs = get_entry_control(info, i)), 1 : 0; - i++) { - if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250) + + for (i = 0; i < info->n_entries; i++) { + if (regs_are_mcr(gt)) reg_val = xe_gt_mcr_unicast_read_any(gt, XEHP_GLOBAL_MOCS(i)); else reg_val = xe_mmio_read32(gt, XELP_GLOBAL_MOCS(i)); - mocs_dbg(>_to_xe(gt)->drm, "%d 0x%x 0x%x 0x%x\n", i, - XELP_GLOBAL_MOCS(i).addr, reg_val, mocs); - if (reg_val != mocs) - KUNIT_FAIL(test, "mocs reg 0x%x has incorrect val.\n", - XELP_GLOBAL_MOCS(i).addr); + + mocs_expected = get_entry_control(info, i); + mocs = reg_val; + + mocs_dbg(gt, "[%u] expected=0x%x actual=0x%x\n", + i, mocs_expected, mocs); + + KUNIT_EXPECT_EQ_MSG(test, mocs_expected, mocs, + "mocs reg 0x%x has incorrect val.\n", i); } + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); - xe_device_mem_access_put(gt_to_xe(gt)); } static int mocs_kernel_test_run_device(struct xe_device *xe) @@ -113,6 +119,8 @@ static int mocs_kernel_test_run_device(struct xe_device *xe) unsigned int flags; int id; + xe_pm_runtime_get(xe); + for_each_gt(gt, xe, id) { flags = live_mocs_init(&mocs, gt); if (flags & HAS_GLOBAL_MOCS) @@ -120,6 +128,9 @@ static int mocs_kernel_test_run_device(struct xe_device *xe) if (flags & HAS_LNCF_MOCS) read_l3cc_table(gt, &mocs.table); } + + xe_pm_runtime_put(xe); + return 0; } @@ -139,6 +150,8 @@ static int mocs_reset_test_run_device(struct xe_device *xe) int id; struct kunit *test = xe_cur_kunit(); + xe_pm_runtime_get(xe); + for_each_gt(gt, xe, id) { flags = live_mocs_init(&mocs, gt); kunit_info(test, "mocs_reset_test before reset\n"); @@ -156,6 +169,9 @@ static int mocs_reset_test_run_device(struct xe_device *xe) if (flags & HAS_LNCF_MOCS) read_l3cc_table(gt, &mocs.table); } + + xe_pm_runtime_put(xe); + return 0; } diff --git a/drivers/gpu/drm/xe/tests/xe_mocs_test.c b/drivers/gpu/drm/xe/tests/xe_mocs_test.c index ee40f31e1e..6315886b65 100644 --- a/drivers/gpu/drm/xe/tests/xe_mocs_test.c +++ b/drivers/gpu/drm/xe/tests/xe_mocs_test.c @@ -19,8 +19,3 @@ static struct kunit_suite xe_mocs_test_suite = { }; kunit_test_suite(xe_mocs_test_suite); - -MODULE_AUTHOR("Intel Corporation"); -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("xe_mocs kunit test"); -MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c index 44570d8883..9d0c715142 100644 --- a/drivers/gpu/drm/xe/tests/xe_wa_test.c +++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c @@ -71,6 +71,7 @@ static const struct platform_test_case cases[] = { SUBPLATFORM_CASE(DG2, G12, A1), GMDID_CASE(METEORLAKE, 1270, A0, 1300, A0), GMDID_CASE(METEORLAKE, 1271, A0, 1300, A0), + GMDID_CASE(METEORLAKE, 1274, A0, 1300, A0), GMDID_CASE(LUNARLAKE, 2004, A0, 2000, A0), GMDID_CASE(LUNARLAKE, 2004, B0, 2000, A0), }; diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c index a35e0781b7..541361caff 100644 --- a/drivers/gpu/drm/xe/xe_bb.c +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -86,7 +86,8 @@ struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q, }; xe_gt_assert(q->gt, second_idx <= bb->len); - xe_gt_assert(q->gt, q->vm->flags & XE_VM_FLAG_MIGRATION); + xe_gt_assert(q->gt, xe_sched_job_is_migration(q)); + xe_gt_assert(q->gt, q->width == 1); return __xe_bb_create_job(q, bb, addr); } diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 9c0837b6fd..b6f3a43d63 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -22,6 +22,7 @@ #include "xe_gt.h" #include "xe_map.h" #include "xe_migrate.h" +#include "xe_pm.h" #include "xe_preempt_fence.h" #include "xe_res_cursor.h" #include "xe_trace.h" @@ -111,7 +112,7 @@ bool xe_bo_is_stolen_devmem(struct xe_bo *bo) static bool xe_bo_is_user(struct xe_bo *bo) { - return bo->flags & XE_BO_CREATE_USER_BIT; + return bo->flags & XE_BO_FLAG_USER; } static struct xe_migrate * @@ -137,7 +138,7 @@ static struct xe_mem_region *res_to_mem_region(struct ttm_resource *res) static void try_add_system(struct xe_device *xe, struct xe_bo *bo, u32 bo_flags, u32 *c) { - if (bo_flags & XE_BO_CREATE_SYSTEM_BIT) { + if (bo_flags & XE_BO_FLAG_SYSTEM) { xe_assert(xe, *c < ARRAY_SIZE(bo->placements)); bo->placements[*c] = (struct ttm_place) { @@ -164,12 +165,12 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo, * For eviction / restore on suspend / resume objects * pinned in VRAM must be contiguous */ - if (bo_flags & (XE_BO_CREATE_PINNED_BIT | - XE_BO_CREATE_GGTT_BIT)) + if (bo_flags & (XE_BO_FLAG_PINNED | + XE_BO_FLAG_GGTT)) place.flags |= TTM_PL_FLAG_CONTIGUOUS; if (io_size < vram->usable_size) { - if (bo_flags & XE_BO_NEEDS_CPU_ACCESS) { + if (bo_flags & XE_BO_FLAG_NEEDS_CPU_ACCESS) { place.fpfn = 0; place.lpfn = io_size >> PAGE_SHIFT; } else { @@ -183,22 +184,22 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo, static void try_add_vram(struct xe_device *xe, struct xe_bo *bo, u32 bo_flags, u32 *c) { - if (bo_flags & XE_BO_CREATE_VRAM0_BIT) + if (bo_flags & XE_BO_FLAG_VRAM0) add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c); - if (bo_flags & XE_BO_CREATE_VRAM1_BIT) + if (bo_flags & XE_BO_FLAG_VRAM1) add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c); } static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo, u32 bo_flags, u32 *c) { - if (bo_flags & XE_BO_CREATE_STOLEN_BIT) { + if (bo_flags & XE_BO_FLAG_STOLEN) { xe_assert(xe, *c < ARRAY_SIZE(bo->placements)); bo->placements[*c] = (struct ttm_place) { .mem_type = XE_PL_STOLEN, - .flags = bo_flags & (XE_BO_CREATE_PINNED_BIT | - XE_BO_CREATE_GGTT_BIT) ? + .flags = bo_flags & (XE_BO_FLAG_PINNED | + XE_BO_FLAG_GGTT) ? TTM_PL_FLAG_CONTIGUOUS : 0, }; *c += 1; @@ -316,7 +317,7 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, struct xe_device *xe = xe_bo_device(bo); struct xe_ttm_tt *tt; unsigned long extra_pages; - enum ttm_caching caching; + enum ttm_caching caching = ttm_cached; int err; tt = kzalloc(sizeof(*tt), GFP_KERNEL); @@ -330,26 +331,35 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size), PAGE_SIZE); - switch (bo->cpu_caching) { - case DRM_XE_GEM_CPU_CACHING_WC: - caching = ttm_write_combined; - break; - default: - caching = ttm_cached; - break; - } - - WARN_ON((bo->flags & XE_BO_CREATE_USER_BIT) && !bo->cpu_caching); - /* - * Display scanout is always non-coherent with the CPU cache. - * - * For Xe_LPG and beyond, PPGTT PTE lookups are also non-coherent and - * require a CPU:WC mapping. + * DGFX system memory is always WB / ttm_cached, since + * other caching modes are only supported on x86. DGFX + * GPU system memory accesses are always coherent with the + * CPU. */ - if ((!bo->cpu_caching && bo->flags & XE_BO_SCANOUT_BIT) || - (xe->info.graphics_verx100 >= 1270 && bo->flags & XE_BO_PAGETABLE)) - caching = ttm_write_combined; + if (!IS_DGFX(xe)) { + switch (bo->cpu_caching) { + case DRM_XE_GEM_CPU_CACHING_WC: + caching = ttm_write_combined; + break; + default: + caching = ttm_cached; + break; + } + + WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching); + + /* + * Display scanout is always non-coherent with the CPU cache. + * + * For Xe_LPG and beyond, PPGTT PTE lookups are also + * non-coherent and require a CPU:WC mapping. + */ + if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) || + (xe->info.graphics_verx100 >= 1270 && + bo->flags & XE_BO_FLAG_PAGETABLE)) + caching = ttm_write_combined; + } err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, caching, extra_pages); if (err) { @@ -715,7 +725,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, xe_assert(xe, migrate); trace_xe_bo_move(bo, new_mem->mem_type, old_mem_type, move_lacks_source); - xe_device_mem_access_get(xe); + xe_pm_runtime_get_noresume(xe); if (xe_bo_is_pinned(bo) && !xe_bo_is_user(bo)) { /* @@ -739,7 +749,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, if (XE_WARN_ON(new_mem->start == XE_BO_INVALID_OFFSET)) { ret = -EINVAL; - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); goto out; } @@ -757,7 +767,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, new_mem, handle_system_ccs); if (IS_ERR(fence)) { ret = PTR_ERR(fence); - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); goto out; } if (!move_lacks_source) { @@ -782,7 +792,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, dma_fence_put(fence); } - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); out: return ret; @@ -794,7 +804,6 @@ out: * @bo: The buffer object to move. * * On successful completion, the object memory will be moved to sytem memory. - * This function blocks until the object has been fully moved. * * This is needed to for special handling of pinned VRAM object during * suspend-resume. @@ -851,9 +860,6 @@ int xe_bo_evict_pinned(struct xe_bo *bo) if (ret) goto err_res_free; - dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL, - false, MAX_SCHEDULE_TIMEOUT); - return 0; err_res_free: @@ -866,7 +872,6 @@ err_res_free: * @bo: The buffer object to move. * * On successful completion, the object memory will be moved back to VRAM. - * This function blocks until the object has been fully moved. * * This is needed to for special handling of pinned VRAM object during * suspend-resume. @@ -908,9 +913,6 @@ int xe_bo_restore_pinned(struct xe_bo *bo) if (ret) goto err_res_free; - dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL, - false, MAX_SCHEDULE_TIMEOUT); - return 0; err_res_free: @@ -1110,12 +1112,12 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf) struct drm_device *ddev = tbo->base.dev; struct xe_device *xe = to_xe_device(ddev); struct xe_bo *bo = ttm_to_xe_bo(tbo); - bool needs_rpm = bo->flags & XE_BO_CREATE_VRAM_MASK; + bool needs_rpm = bo->flags & XE_BO_FLAG_VRAM_MASK; vm_fault_t ret; int idx; if (needs_rpm) - xe_device_mem_access_get(xe); + xe_pm_runtime_get(xe); ret = ttm_bo_vm_reserve(tbo, vmf); if (ret) @@ -1146,7 +1148,7 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf) dma_resv_unlock(tbo->base.resv); out: if (needs_rpm) - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); return ret; } @@ -1223,18 +1225,19 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, return ERR_PTR(-EINVAL); } - if (flags & (XE_BO_CREATE_VRAM_MASK | XE_BO_CREATE_STOLEN_BIT) && - !(flags & XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT) && - xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) { + if (flags & (XE_BO_FLAG_VRAM_MASK | XE_BO_FLAG_STOLEN) && + !(flags & XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE) && + ((xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) || + (flags & XE_BO_NEEDS_64K))) { aligned_size = ALIGN(size, SZ_64K); if (type != ttm_bo_type_device) size = ALIGN(size, SZ_64K); - flags |= XE_BO_INTERNAL_64K; + flags |= XE_BO_FLAG_INTERNAL_64K; alignment = SZ_64K >> PAGE_SHIFT; } else { aligned_size = ALIGN(size, SZ_4K); - flags &= ~XE_BO_INTERNAL_64K; + flags &= ~XE_BO_FLAG_INTERNAL_64K; alignment = SZ_4K >> PAGE_SHIFT; } @@ -1263,11 +1266,11 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size); if (resv) { - ctx.allow_res_evict = !(flags & XE_BO_CREATE_NO_RESV_EVICT); + ctx.allow_res_evict = !(flags & XE_BO_FLAG_NO_RESV_EVICT); ctx.resv = resv; } - if (!(flags & XE_BO_FIXED_PLACEMENT_BIT)) { + if (!(flags & XE_BO_FLAG_FIXED_PLACEMENT)) { err = __xe_bo_placement_for_flags(xe, bo, bo->flags); if (WARN_ON(err)) { xe_ttm_bo_destroy(&bo->ttm); @@ -1277,7 +1280,7 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, /* Defer populating type_sg bos */ placement = (type == ttm_bo_type_sg || - bo->flags & XE_BO_DEFER_BACKING) ? &sys_placement : + bo->flags & XE_BO_FLAG_DEFER_BACKING) ? &sys_placement : &bo->placement; err = ttm_bo_init_reserved(&xe->ttm, &bo->ttm, type, placement, alignment, @@ -1332,21 +1335,21 @@ static int __xe_bo_fixed_placement(struct xe_device *xe, { struct ttm_place *place = bo->placements; - if (flags & (XE_BO_CREATE_USER_BIT|XE_BO_CREATE_SYSTEM_BIT)) + if (flags & (XE_BO_FLAG_USER | XE_BO_FLAG_SYSTEM)) return -EINVAL; place->flags = TTM_PL_FLAG_CONTIGUOUS; place->fpfn = start >> PAGE_SHIFT; place->lpfn = end >> PAGE_SHIFT; - switch (flags & (XE_BO_CREATE_STOLEN_BIT | XE_BO_CREATE_VRAM_MASK)) { - case XE_BO_CREATE_VRAM0_BIT: + switch (flags & (XE_BO_FLAG_STOLEN | XE_BO_FLAG_VRAM_MASK)) { + case XE_BO_FLAG_VRAM0: place->mem_type = XE_PL_VRAM0; break; - case XE_BO_CREATE_VRAM1_BIT: + case XE_BO_FLAG_VRAM1: place->mem_type = XE_PL_VRAM1; break; - case XE_BO_CREATE_STOLEN_BIT: + case XE_BO_FLAG_STOLEN: place->mem_type = XE_PL_STOLEN; break; @@ -1380,7 +1383,7 @@ __xe_bo_create_locked(struct xe_device *xe, if (IS_ERR(bo)) return bo; - flags |= XE_BO_FIXED_PLACEMENT_BIT; + flags |= XE_BO_FLAG_FIXED_PLACEMENT; err = __xe_bo_fixed_placement(xe, bo, flags, start, end, size); if (err) { xe_bo_free(bo); @@ -1390,7 +1393,7 @@ __xe_bo_create_locked(struct xe_device *xe, bo = ___xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL, vm && !xe_vm_in_fault_mode(vm) && - flags & XE_BO_CREATE_USER_BIT ? + flags & XE_BO_FLAG_USER ? &vm->lru_bulk_move : NULL, size, cpu_caching, type, flags); if (IS_ERR(bo)) @@ -1407,13 +1410,13 @@ __xe_bo_create_locked(struct xe_device *xe, xe_vm_get(vm); bo->vm = vm; - if (bo->flags & XE_BO_CREATE_GGTT_BIT) { - if (!tile && flags & XE_BO_CREATE_STOLEN_BIT) + if (bo->flags & XE_BO_FLAG_GGTT) { + if (!tile && flags & XE_BO_FLAG_STOLEN) tile = xe_device_get_root_tile(xe); xe_assert(xe, tile); - if (flags & XE_BO_FIXED_PLACEMENT_BIT) { + if (flags & XE_BO_FLAG_FIXED_PLACEMENT) { err = xe_ggtt_insert_bo_at(tile->mem.ggtt, bo, start + bo->size, U64_MAX); } else { @@ -1456,7 +1459,7 @@ struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile, { struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, cpu_caching, type, - flags | XE_BO_CREATE_USER_BIT); + flags | XE_BO_FLAG_USER); if (!IS_ERR(bo)) xe_bo_unlock_vm_held(bo); @@ -1485,12 +1488,12 @@ struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile u64 start = offset == ~0ull ? 0 : offset; u64 end = offset == ~0ull ? offset : start + size; - if (flags & XE_BO_CREATE_STOLEN_BIT && + if (flags & XE_BO_FLAG_STOLEN && xe_ttm_stolen_cpu_access_needs_ggtt(xe)) - flags |= XE_BO_CREATE_GGTT_BIT; + flags |= XE_BO_FLAG_GGTT; bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type, - flags | XE_BO_NEEDS_CPU_ACCESS); + flags | XE_BO_FLAG_NEEDS_CPU_ACCESS); if (IS_ERR(bo)) return bo; @@ -1587,13 +1590,15 @@ struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_til int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, struct xe_bo **src) { struct xe_bo *bo; + u32 dst_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT; + + dst_flags |= (*src)->flags & XE_BO_FLAG_GGTT_INVALIDATE; xe_assert(xe, IS_DGFX(xe)); xe_assert(xe, !(*src)->vmap.is_iomem); - bo = xe_managed_bo_create_from_data(xe, tile, (*src)->vmap.vaddr, (*src)->size, - XE_BO_CREATE_VRAM_IF_DGFX(tile) | - XE_BO_CREATE_GGTT_BIT); + bo = xe_managed_bo_create_from_data(xe, tile, (*src)->vmap.vaddr, + (*src)->size, dst_flags); if (IS_ERR(bo)) return PTR_ERR(bo); @@ -1668,8 +1673,8 @@ int xe_bo_pin(struct xe_bo *bo) xe_assert(xe, !xe_bo_is_user(bo)); /* Pinned object must be in GGTT or have pinned flag */ - xe_assert(xe, bo->flags & (XE_BO_CREATE_PINNED_BIT | - XE_BO_CREATE_GGTT_BIT)); + xe_assert(xe, bo->flags & (XE_BO_FLAG_PINNED | + XE_BO_FLAG_GGTT)); /* * No reason we can't support pinning imported dma-bufs we just don't @@ -1690,7 +1695,7 @@ int xe_bo_pin(struct xe_bo *bo) * during suspend / resume (force restore to same physical address). */ if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) && - bo->flags & XE_BO_INTERNAL_TEST)) { + bo->flags & XE_BO_FLAG_INTERNAL_TEST)) { struct ttm_place *place = &(bo->placements[0]); if (mem_type_is_vram(place->mem_type)) { @@ -1758,7 +1763,7 @@ void xe_bo_unpin(struct xe_bo *bo) xe_assert(xe, xe_bo_is_pinned(bo)); if (IS_DGFX(xe) && !(IS_ENABLED(CONFIG_DRM_XE_DEBUG) && - bo->flags & XE_BO_INTERNAL_TEST)) { + bo->flags & XE_BO_FLAG_INTERNAL_TEST)) { struct ttm_place *place = &(bo->placements[0]); if (mem_type_is_vram(place->mem_type)) { @@ -1861,7 +1866,7 @@ int xe_bo_vmap(struct xe_bo *bo) xe_bo_assert_held(bo); - if (!(bo->flags & XE_BO_NEEDS_CPU_ACCESS)) + if (!(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS)) return -EINVAL; if (!iosys_map_is_null(&bo->vmap)) @@ -1943,29 +1948,29 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data, bo_flags = 0; if (args->flags & DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING) - bo_flags |= XE_BO_DEFER_BACKING; + bo_flags |= XE_BO_FLAG_DEFER_BACKING; if (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT) - bo_flags |= XE_BO_SCANOUT_BIT; + bo_flags |= XE_BO_FLAG_SCANOUT; - bo_flags |= args->placement << (ffs(XE_BO_CREATE_SYSTEM_BIT) - 1); + bo_flags |= args->placement << (ffs(XE_BO_FLAG_SYSTEM) - 1); if (args->flags & DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM) { - if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_CREATE_VRAM_MASK))) + if (XE_IOCTL_DBG(xe, !(bo_flags & XE_BO_FLAG_VRAM_MASK))) return -EINVAL; - bo_flags |= XE_BO_NEEDS_CPU_ACCESS; + bo_flags |= XE_BO_FLAG_NEEDS_CPU_ACCESS; } if (XE_IOCTL_DBG(xe, !args->cpu_caching || args->cpu_caching > DRM_XE_GEM_CPU_CACHING_WC)) return -EINVAL; - if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_CREATE_VRAM_MASK && + if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_VRAM_MASK && args->cpu_caching != DRM_XE_GEM_CPU_CACHING_WC)) return -EINVAL; - if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_SCANOUT_BIT && + if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_SCANOUT && args->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) return -EINVAL; @@ -2206,6 +2211,9 @@ bool xe_bo_needs_ccs_pages(struct xe_bo *bo) { struct xe_device *xe = xe_bo_device(bo); + if (GRAPHICS_VER(xe) >= 20 && IS_DGFX(xe)) + return false; + if (!xe_device_has_flat_ccs(xe) || bo->ttm.type != ttm_bo_type_device) return false; @@ -2214,7 +2222,7 @@ bool xe_bo_needs_ccs_pages(struct xe_bo *bo) * can't be used since there's no CCS storage associated with * non-VRAM addresses. */ - if (IS_DGFX(xe) && (bo->flags & XE_BO_CREATE_SYSTEM_BIT)) + if (IS_DGFX(xe) && (bo->flags & XE_BO_FLAG_SYSTEM)) return false; return true; @@ -2283,9 +2291,9 @@ int xe_bo_dumb_create(struct drm_file *file_priv, bo = xe_bo_create_user(xe, NULL, NULL, args->size, DRM_XE_GEM_CPU_CACHING_WC, ttm_bo_type_device, - XE_BO_CREATE_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | - XE_BO_CREATE_USER_BIT | XE_BO_SCANOUT_BIT | - XE_BO_NEEDS_CPU_ACCESS); + XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) | + XE_BO_FLAG_SCANOUT | + XE_BO_FLAG_NEEDS_CPU_ACCESS); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index c59ad15961..a885b14bf5 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -13,48 +13,34 @@ #include "xe_vm_types.h" #include "xe_vm.h" -/** - * xe_vm_assert_held(vm) - Assert that the vm's reservation object is held. - * @vm: The vm - */ -#define xe_vm_assert_held(vm) dma_resv_assert_held(xe_vm_resv(vm)) - - - #define XE_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */ -#define XE_BO_CREATE_USER_BIT BIT(0) +#define XE_BO_FLAG_USER BIT(0) /* The bits below need to be contiguous, or things break */ -#define XE_BO_CREATE_SYSTEM_BIT BIT(1) -#define XE_BO_CREATE_VRAM0_BIT BIT(2) -#define XE_BO_CREATE_VRAM1_BIT BIT(3) -#define XE_BO_CREATE_VRAM_MASK (XE_BO_CREATE_VRAM0_BIT | \ - XE_BO_CREATE_VRAM1_BIT) +#define XE_BO_FLAG_SYSTEM BIT(1) +#define XE_BO_FLAG_VRAM0 BIT(2) +#define XE_BO_FLAG_VRAM1 BIT(3) +#define XE_BO_FLAG_VRAM_MASK (XE_BO_FLAG_VRAM0 | XE_BO_FLAG_VRAM1) /* -- */ -#define XE_BO_CREATE_STOLEN_BIT BIT(4) -#define XE_BO_CREATE_VRAM_IF_DGFX(tile) \ - (IS_DGFX(tile_to_xe(tile)) ? XE_BO_CREATE_VRAM0_BIT << (tile)->id : \ - XE_BO_CREATE_SYSTEM_BIT) -#define XE_BO_CREATE_GGTT_BIT BIT(5) -#define XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT BIT(6) -#define XE_BO_CREATE_PINNED_BIT BIT(7) -#define XE_BO_CREATE_NO_RESV_EVICT BIT(8) -#define XE_BO_DEFER_BACKING BIT(9) -#define XE_BO_SCANOUT_BIT BIT(10) -#define XE_BO_FIXED_PLACEMENT_BIT BIT(11) -#define XE_BO_PAGETABLE BIT(12) -#define XE_BO_NEEDS_CPU_ACCESS BIT(13) -#define XE_BO_NEEDS_UC BIT(14) +#define XE_BO_FLAG_STOLEN BIT(4) +#define XE_BO_FLAG_VRAM_IF_DGFX(tile) (IS_DGFX(tile_to_xe(tile)) ? \ + XE_BO_FLAG_VRAM0 << (tile)->id : \ + XE_BO_FLAG_SYSTEM) +#define XE_BO_FLAG_GGTT BIT(5) +#define XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE BIT(6) +#define XE_BO_FLAG_PINNED BIT(7) +#define XE_BO_FLAG_NO_RESV_EVICT BIT(8) +#define XE_BO_FLAG_DEFER_BACKING BIT(9) +#define XE_BO_FLAG_SCANOUT BIT(10) +#define XE_BO_FLAG_FIXED_PLACEMENT BIT(11) +#define XE_BO_FLAG_PAGETABLE BIT(12) +#define XE_BO_FLAG_NEEDS_CPU_ACCESS BIT(13) +#define XE_BO_FLAG_NEEDS_UC BIT(14) +#define XE_BO_NEEDS_64K BIT(15) +#define XE_BO_FLAG_GGTT_INVALIDATE BIT(16) /* this one is trigger internally only */ -#define XE_BO_INTERNAL_TEST BIT(30) -#define XE_BO_INTERNAL_64K BIT(31) - -#define XELPG_PPGTT_PTE_PAT3 BIT_ULL(62) -#define XE2_PPGTT_PTE_PAT4 BIT_ULL(61) -#define XE_PPGTT_PDE_PDPE_PAT2 BIT_ULL(12) -#define XE_PPGTT_PTE_PAT2 BIT_ULL(7) -#define XE_PPGTT_PTE_PAT1 BIT_ULL(4) -#define XE_PPGTT_PTE_PAT0 BIT_ULL(3) +#define XE_BO_FLAG_INTERNAL_TEST BIT(30) +#define XE_BO_FLAG_INTERNAL_64K BIT(31) #define XE_PTE_SHIFT 12 #define XE_PAGE_SIZE (1 << XE_PTE_SHIFT) @@ -68,20 +54,6 @@ #define XE_64K_PTE_MASK (XE_64K_PAGE_SIZE - 1) #define XE_64K_PDE_MASK (XE_PDE_MASK >> 4) -#define XE_PDE_PS_2M BIT_ULL(7) -#define XE_PDPE_PS_1G BIT_ULL(7) -#define XE_PDE_IPS_64K BIT_ULL(11) - -#define XE_GGTT_PTE_DM BIT_ULL(1) -#define XE_USM_PPGTT_PTE_AE BIT_ULL(10) -#define XE_PPGTT_PTE_DM BIT_ULL(11) -#define XE_PDE_64K BIT_ULL(6) -#define XE_PTE_PS64 BIT_ULL(8) -#define XE_PTE_NULL BIT_ULL(9) - -#define XE_PAGE_PRESENT BIT_ULL(0) -#define XE_PAGE_RW BIT_ULL(1) - #define XE_PL_SYSTEM TTM_PL_SYSTEM #define XE_PL_TT TTM_PL_TT #define XE_PL_VRAM0 TTM_PL_VRAM diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c index 7a264a9ca0..541b49007d 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.c +++ b/drivers/gpu/drm/xe/xe_bo_evict.c @@ -146,7 +146,7 @@ int xe_bo_restore_kernel(struct xe_device *xe) return ret; } - if (bo->flags & XE_BO_CREATE_GGTT_BIT) { + if (bo->flags & XE_BO_FLAG_GGTT) { struct xe_tile *tile = bo->tile; mutex_lock(&tile->mem.ggtt->lock); @@ -220,7 +220,7 @@ int xe_bo_restore_user(struct xe_device *xe) list_splice_tail(&still_in_list, &xe->pinned.external_vram); spin_unlock(&xe->pinned.lock); - /* Wait for validate to complete */ + /* Wait for restore to complete */ for_each_tile(tile, xe, id) xe_tile_migrate_wait(tile); diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index 86422e113d..10450f1fbb 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -66,7 +66,8 @@ struct xe_bo { /** * @cpu_caching: CPU caching mode. Currently only used for userspace - * objects. + * objects. Exceptions are system memory on DGFX, which is always + * WB. */ u16 cpu_caching; diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index 01db5b27be..0b7aebaae8 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -5,6 +5,7 @@ #include "xe_debugfs.h" +#include <linux/debugfs.h> #include <linux/string_helpers.h> #include <drm/drm_debugfs.h> @@ -12,6 +13,8 @@ #include "xe_bo.h" #include "xe_device.h" #include "xe_gt_debugfs.h" +#include "xe_pm.h" +#include "xe_sriov.h" #include "xe_step.h" #ifdef CONFIG_DRM_XE_DEBUG @@ -37,6 +40,8 @@ static int info(struct seq_file *m, void *data) struct xe_gt *gt; u8 id; + xe_pm_runtime_get(xe); + drm_printf(&p, "graphics_verx100 %d\n", xe->info.graphics_verx100); drm_printf(&p, "media_verx100 %d\n", xe->info.media_verx100); drm_printf(&p, "stepping G:%s M:%s D:%s B:%s\n", @@ -63,11 +68,22 @@ static int info(struct seq_file *m, void *data) gt->info.engine_mask); } + xe_pm_runtime_put(xe); + return 0; +} + +static int sriov_info(struct seq_file *m, void *data) +{ + struct xe_device *xe = node_to_xe(m->private); + struct drm_printer p = drm_seq_file_printer(m); + + xe_sriov_print_info(xe, &p); return 0; } static const struct drm_info_list debugfs_list[] = { {"info", info, 0}, + { .name = "sriov_info", .show = sriov_info, }, }; static int forcewake_open(struct inode *inode, struct file *file) @@ -76,8 +92,7 @@ static int forcewake_open(struct inode *inode, struct file *file) struct xe_gt *gt; u8 id; - xe_device_mem_access_get(xe); - + xe_pm_runtime_get(xe); for_each_gt(gt, xe, id) XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)); @@ -92,8 +107,7 @@ static int forcewake_release(struct inode *inode, struct file *file) for_each_gt(gt, xe, id) XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); - - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); return 0; } @@ -127,7 +141,7 @@ void xe_debugfs_register(struct xe_device *xe) if (man) { char name[16]; - sprintf(name, "vram%d_mm", mem_type - XE_PL_VRAM0); + snprintf(name, sizeof(name), "vram%d_mm", mem_type - XE_PL_VRAM0); ttm_resource_manager_create_debugfs(man, root, name); } } diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index ccec291b02..3d7980232b 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -9,10 +9,13 @@ #include <linux/devcoredump.h> #include <generated/utsrelease.h> +#include <drm/drm_managed.h> + #include "xe_device.h" #include "xe_exec_queue.h" #include "xe_force_wake.h" #include "xe_gt.h" +#include "xe_gt_printk.h" #include "xe_guc_ct.h" #include "xe_guc_submit.h" #include "xe_hw_engine.h" @@ -64,9 +67,11 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work) { struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work); - xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL); - if (ss->vm) - xe_vm_snapshot_capture_delayed(ss->vm); + /* keep going if fw fails as we still want to save the memory and SW data */ + if (xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL)) + xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n"); + xe_vm_snapshot_capture_delayed(ss->vm); + xe_guc_exec_queue_snapshot_capture_delayed(ss->ge); xe_force_wake_put(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL); } @@ -119,10 +124,8 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, if (coredump->snapshot.hwe[i]) xe_hw_engine_snapshot_print(coredump->snapshot.hwe[i], &p); - if (coredump->snapshot.vm) { - drm_printf(&p, "\n**** VM state ****\n"); - xe_vm_snapshot_print(coredump->snapshot.vm, &p); - } + drm_printf(&p, "\n**** VM state ****\n"); + xe_vm_snapshot_print(coredump->snapshot.vm, &p); return count - iter.remain; } @@ -182,10 +185,12 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, } } - xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); + /* keep going if fw fails as we still want to save the memory and SW data */ + if (xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL)) + xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n"); coredump->snapshot.ct = xe_guc_ct_snapshot_capture(&guc->ct, true); - coredump->snapshot.ge = xe_guc_exec_queue_snapshot_capture(job); + coredump->snapshot.ge = xe_guc_exec_queue_snapshot_capture(q); coredump->snapshot.job = xe_sched_job_snapshot_capture(job); coredump->snapshot.vm = xe_vm_snapshot_capture(q->vm); @@ -198,8 +203,7 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, coredump->snapshot.hwe[id] = xe_hw_engine_snapshot_capture(hwe); } - if (ss->vm) - queue_work(system_unbound_wq, &ss->work); + queue_work(system_unbound_wq, &ss->work); xe_force_wake_put(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); dma_fence_end_signalling(cookie); @@ -233,5 +237,14 @@ void xe_devcoredump(struct xe_sched_job *job) dev_coredumpm(xe->drm.dev, THIS_MODULE, coredump, 0, GFP_KERNEL, xe_devcoredump_read, xe_devcoredump_free); } -#endif +static void xe_driver_devcoredump_fini(struct drm_device *drm, void *arg) +{ + dev_coredump_put(drm->dev); +} + +int xe_devcoredump_init(struct xe_device *xe) +{ + return drmm_add_action_or_reset(&xe->drm, xe_driver_devcoredump_fini, xe); +} +#endif diff --git a/drivers/gpu/drm/xe/xe_devcoredump.h b/drivers/gpu/drm/xe/xe_devcoredump.h index df8671f0b5..e2fa65ce09 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.h +++ b/drivers/gpu/drm/xe/xe_devcoredump.h @@ -11,10 +11,16 @@ struct xe_sched_job; #ifdef CONFIG_DEV_COREDUMP void xe_devcoredump(struct xe_sched_job *job); +int xe_devcoredump_init(struct xe_device *xe); #else static inline void xe_devcoredump(struct xe_sched_job *job) { } + +static inline int xe_devcoredump_init(struct xe_device *xe) +{ + return 0; +} #endif #endif diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index b3b37ed832..5ef9b50a20 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -9,6 +9,7 @@ #include <drm/drm_aperture.h> #include <drm/drm_atomic_helper.h> +#include <drm/drm_client.h> #include <drm/drm_gem_ttm_helper.h> #include <drm/drm_ioctl.h> #include <drm/drm_managed.h> @@ -20,6 +21,7 @@ #include "regs/xe_regs.h" #include "xe_bo.h" #include "xe_debugfs.h" +#include "xe_devcoredump.h" #include "xe_dma_buf.h" #include "xe_drm_client.h" #include "xe_drv.h" @@ -45,12 +47,6 @@ #include "xe_vm.h" #include "xe_wait_user_fence.h" -#ifdef CONFIG_LOCKDEP -struct lockdep_map xe_device_mem_access_lockdep_map = { - .name = "xe_device_mem_access_lockdep_map" -}; -#endif - static int xe_file_open(struct drm_device *dev, struct drm_file *file) { struct xe_device *xe = to_xe_device(dev); @@ -136,15 +132,48 @@ static const struct drm_ioctl_desc xe_ioctls[] = { DRM_RENDER_ALLOW), }; +static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct drm_file *file_priv = file->private_data; + struct xe_device *xe = to_xe_device(file_priv->minor->dev); + long ret; + + ret = xe_pm_runtime_get_ioctl(xe); + if (ret >= 0) + ret = drm_ioctl(file, cmd, arg); + xe_pm_runtime_put(xe); + + return ret; +} + +#ifdef CONFIG_COMPAT +static long xe_drm_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct drm_file *file_priv = file->private_data; + struct xe_device *xe = to_xe_device(file_priv->minor->dev); + long ret; + + ret = xe_pm_runtime_get_ioctl(xe); + if (ret >= 0) + ret = drm_compat_ioctl(file, cmd, arg); + xe_pm_runtime_put(xe); + + return ret; +} +#else +/* similarly to drm_compat_ioctl, let's it be assigned to .compat_ioct unconditionally */ +#define xe_drm_compat_ioctl NULL +#endif + static const struct file_operations xe_driver_fops = { .owner = THIS_MODULE, .open = drm_open, .release = drm_release_noglobal, - .unlocked_ioctl = drm_ioctl, + .unlocked_ioctl = xe_drm_ioctl, .mmap = drm_gem_mmap, .poll = drm_poll, .read = drm_read, - .compat_ioctl = drm_compat_ioctl, + .compat_ioctl = xe_drm_compat_ioctl, .llseek = noop_llseek, #ifdef CONFIG_PROC_FS .show_fdinfo = drm_show_fdinfo, @@ -389,12 +418,68 @@ mask_err: return err; } +static bool verify_lmem_ready(struct xe_gt *gt) +{ + u32 val = xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT; + + return !!val; +} + +static int wait_for_lmem_ready(struct xe_device *xe) +{ + struct xe_gt *gt = xe_root_mmio_gt(xe); + unsigned long timeout, start; + + if (!IS_DGFX(xe)) + return 0; + + if (IS_SRIOV_VF(xe)) + return 0; + + if (verify_lmem_ready(gt)) + return 0; + + drm_dbg(&xe->drm, "Waiting for lmem initialization\n"); + + start = jiffies; + timeout = start + msecs_to_jiffies(60 * 1000); /* 60 sec! */ + + do { + if (signal_pending(current)) + return -EINTR; + + /* + * The boot firmware initializes local memory and + * assesses its health. If memory training fails, + * the punit will have been instructed to keep the GT powered + * down.we won't be able to communicate with it + * + * If the status check is done before punit updates the register, + * it can lead to the system being unusable. + * use a timeout and defer the probe to prevent this. + */ + if (time_after(jiffies, timeout)) { + drm_dbg(&xe->drm, "lmem not initialized by firmware\n"); + return -EPROBE_DEFER; + } + + msleep(20); + + } while (!verify_lmem_ready(gt)); + + drm_dbg(&xe->drm, "lmem ready after %ums", + jiffies_to_msecs(jiffies - start)); + + return 0; +} + /** * xe_device_probe_early: Device early probe * @xe: xe device instance * * Initialize MMIO resources that don't require any - * knowledge about tile count. Also initialize pcode + * knowledge about tile count. Also initialize pcode and + * check vram initialization on root tile. * * Return: 0 on success, error code on failure */ @@ -406,11 +491,13 @@ int xe_device_probe_early(struct xe_device *xe) if (err) return err; - err = xe_mmio_root_tile_init(xe); + xe_sriov_probe_early(xe); + + err = xe_pcode_probe_early(xe); if (err) return err; - err = xe_pcode_probe_early(xe); + err = wait_for_lmem_ready(xe); if (err) return err; @@ -488,6 +575,9 @@ int xe_device_probe(struct xe_device *xe) return err; } + err = xe_devcoredump_init(xe); + if (err) + return err; err = drmm_add_action_or_reset(&xe->drm, xe_driver_flr_fini, xe); if (err) return err; @@ -560,11 +650,7 @@ int xe_device_probe(struct xe_device *xe) xe_hwmon_register(xe); - err = drmm_add_action_or_reset(&xe->drm, xe_device_sanitize, xe); - if (err) - return err; - - return 0; + return drmm_add_action_or_reset(&xe->drm, xe_device_sanitize, xe); err_fini_display: xe_display_driver_remove(xe); @@ -628,87 +714,20 @@ u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size) DIV_ROUND_UP_ULL(size, NUM_BYTES_PER_CCS_BYTE(xe)) : 0; } -bool xe_device_mem_access_ongoing(struct xe_device *xe) -{ - if (xe_pm_read_callback_task(xe) != NULL) - return true; - - return atomic_read(&xe->mem_access.ref); -} - +/** + * xe_device_assert_mem_access - Inspect the current runtime_pm state. + * @xe: xe device instance + * + * To be used before any kind of memory access. It will splat a debug warning + * if the device is currently sleeping. But it doesn't guarantee in any way + * that the device is going to remain awake. Xe PM runtime get and put + * functions might be added to the outer bound of the memory access, while + * this check is intended for inner usage to splat some warning if the worst + * case has just happened. + */ void xe_device_assert_mem_access(struct xe_device *xe) { - XE_WARN_ON(!xe_device_mem_access_ongoing(xe)); -} - -bool xe_device_mem_access_get_if_ongoing(struct xe_device *xe) -{ - bool active; - - if (xe_pm_read_callback_task(xe) == current) - return true; - - active = xe_pm_runtime_get_if_active(xe); - if (active) { - int ref = atomic_inc_return(&xe->mem_access.ref); - - xe_assert(xe, ref != S32_MAX); - } - - return active; -} - -void xe_device_mem_access_get(struct xe_device *xe) -{ - int ref; - - /* - * This looks racy, but should be fine since the pm_callback_task only - * transitions from NULL -> current (and back to NULL again), during the - * runtime_resume() or runtime_suspend() callbacks, for which there can - * only be a single one running for our device. We only need to prevent - * recursively calling the runtime_get or runtime_put from those - * callbacks, as well as preventing triggering any access_ongoing - * asserts. - */ - if (xe_pm_read_callback_task(xe) == current) - return; - - /* - * Since the resume here is synchronous it can be quite easy to deadlock - * if we are not careful. Also in practice it might be quite timing - * sensitive to ever see the 0 -> 1 transition with the callers locks - * held, so deadlocks might exist but are hard for lockdep to ever see. - * With this in mind, help lockdep learn about the potentially scary - * stuff that can happen inside the runtime_resume callback by acquiring - * a dummy lock (it doesn't protect anything and gets compiled out on - * non-debug builds). Lockdep then only needs to see the - * mem_access_lockdep_map -> runtime_resume callback once, and then can - * hopefully validate all the (callers_locks) -> mem_access_lockdep_map. - * For example if the (callers_locks) are ever grabbed in the - * runtime_resume callback, lockdep should give us a nice splat. - */ - lock_map_acquire(&xe_device_mem_access_lockdep_map); - lock_map_release(&xe_device_mem_access_lockdep_map); - - xe_pm_runtime_get(xe); - ref = atomic_inc_return(&xe->mem_access.ref); - - xe_assert(xe, ref != S32_MAX); - -} - -void xe_device_mem_access_put(struct xe_device *xe) -{ - int ref; - - if (xe_pm_read_callback_task(xe) == current) - return; - - ref = atomic_dec_return(&xe->mem_access.ref); - xe_pm_runtime_put(xe); - - xe_assert(xe, ref >= 0); + xe_assert(xe, !xe_pm_runtime_suspended(xe)); } void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p) diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index d413bc2c6b..36d4434ebc 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -16,10 +16,6 @@ struct xe_file; #include "xe_force_wake.h" #include "xe_macros.h" -#ifdef CONFIG_LOCKDEP -extern struct lockdep_map xe_device_mem_access_lockdep_map; -#endif - static inline struct xe_device *to_xe_device(const struct drm_device *dev) { return container_of(dev, struct xe_device, drm); @@ -137,12 +133,7 @@ static inline struct xe_force_wake *gt_to_fw(struct xe_gt *gt) return >->mmio.fw; } -void xe_device_mem_access_get(struct xe_device *xe); -bool xe_device_mem_access_get_if_ongoing(struct xe_device *xe); -void xe_device_mem_access_put(struct xe_device *xe); - void xe_device_assert_mem_access(struct xe_device *xe); -bool xe_device_mem_access_ongoing(struct xe_device *xe); static inline bool xe_device_in_fault_mode(struct xe_device *xe) { diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.c b/drivers/gpu/drm/xe/xe_device_sysfs.c index 99113a5a2b..21677b8cd9 100644 --- a/drivers/gpu/drm/xe/xe_device_sysfs.c +++ b/drivers/gpu/drm/xe/xe_device_sysfs.c @@ -35,7 +35,9 @@ vram_d3cold_threshold_show(struct device *dev, if (!xe) return -EINVAL; + xe_pm_runtime_get(xe); ret = sysfs_emit(buf, "%d\n", xe->d3cold.vram_threshold); + xe_pm_runtime_put(xe); return ret; } @@ -58,7 +60,9 @@ vram_d3cold_threshold_store(struct device *dev, struct device_attribute *attr, drm_dbg(&xe->drm, "vram_d3cold_threshold: %u\n", vram_d3cold_threshold); + xe_pm_runtime_get(xe); ret = xe_pm_set_vram_threshold(xe, vram_d3cold_threshold); + xe_pm_runtime_put(xe); return ret ?: count; } @@ -72,18 +76,14 @@ static void xe_device_sysfs_fini(struct drm_device *drm, void *arg) sysfs_remove_file(&xe->drm.dev->kobj, &dev_attr_vram_d3cold_threshold.attr); } -void xe_device_sysfs_init(struct xe_device *xe) +int xe_device_sysfs_init(struct xe_device *xe) { struct device *dev = xe->drm.dev; int ret; ret = sysfs_create_file(&dev->kobj, &dev_attr_vram_d3cold_threshold.attr); - if (ret) { - drm_warn(&xe->drm, "Failed to create sysfs file\n"); - return; - } - - ret = drmm_add_action_or_reset(&xe->drm, xe_device_sysfs_fini, xe); if (ret) - drm_warn(&xe->drm, "Failed to add sysfs fini drm action\n"); + return ret; + + return drmm_add_action_or_reset(&xe->drm, xe_device_sysfs_fini, xe); } diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.h b/drivers/gpu/drm/xe/xe_device_sysfs.h index 38b240684b..f9e83d8bd2 100644 --- a/drivers/gpu/drm/xe/xe_device_sysfs.h +++ b/drivers/gpu/drm/xe/xe_device_sysfs.h @@ -8,6 +8,6 @@ struct xe_device; -void xe_device_sysfs_init(struct xe_device *xe); +int xe_device_sysfs_init(struct xe_device *xe); #endif diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 8e3a222b41..2e62450d86 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -321,6 +321,10 @@ struct xe_device { struct { /** @sriov.__mode: SR-IOV mode (Don't access directly!) */ enum xe_sriov_mode __mode; + + /** @sriov.pf: PF specific data */ + struct xe_device_pf pf; + /** @sriov.wq: workqueue used by the virtualization workers */ struct workqueue_struct *wq; } sriov; @@ -380,9 +384,6 @@ struct xe_device { * triggering additional actions when they occur. */ struct { - /** @mem_access.ref: ref count of memory accesses */ - atomic_t ref; - /** * @mem_access.vram_userfault: Encapsulate vram_userfault * related stuff @@ -500,20 +501,9 @@ struct xe_device { /* For pcode */ struct mutex sb_lock; - /* Should be in struct intel_display */ - u32 skl_preferred_vco_freq, max_dotclk_freq, hti_state; - u8 snps_phy_failed_calibration; - struct drm_atomic_state *modeset_restore_state; - struct list_head global_obj_list; - - union { - /* only to allow build, not used functionally */ - u32 irq_mask; - u32 de_irq_mask[I915_MAX_PIPES]; - }; - u32 pipestat_irq_mask[I915_MAX_PIPES]; + /* only to allow build, not used functionally */ + u32 irq_mask; - bool display_irqs_enabled; u32 enabled_irq_mask; struct intel_uncore { @@ -525,11 +515,7 @@ struct xe_device { unsigned int hpll_freq; unsigned int czclk_freq; unsigned int fsb_freq, mem_freq, is_ddr3; - u8 vblank_enabled; }; - struct { - const char *dmc_firmware_path; - } params; void *pxp; #endif diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c index da2627ed6a..68f309f5e9 100644 --- a/drivers/gpu/drm/xe/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -16,6 +16,7 @@ #include "tests/xe_test.h" #include "xe_bo.h" #include "xe_device.h" +#include "xe_pm.h" #include "xe_ttm_vram_mgr.h" #include "xe_vm.h" @@ -33,7 +34,7 @@ static int xe_dma_buf_attach(struct dma_buf *dmabuf, if (!attach->peer2peer && !xe_bo_can_migrate(gem_to_xe_bo(obj), XE_PL_TT)) return -EOPNOTSUPP; - xe_device_mem_access_get(to_xe_device(obj->dev)); + xe_pm_runtime_get(to_xe_device(obj->dev)); return 0; } @@ -42,7 +43,7 @@ static void xe_dma_buf_detach(struct dma_buf *dmabuf, { struct drm_gem_object *obj = attach->dmabuf->priv; - xe_device_mem_access_put(to_xe_device(obj->dev)); + xe_pm_runtime_put(to_xe_device(obj->dev)); } static int xe_dma_buf_pin(struct dma_buf_attachment *attach) @@ -216,7 +217,7 @@ xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage, dma_resv_lock(resv, NULL); bo = ___xe_bo_create_locked(xe, storage, NULL, resv, NULL, dma_buf->size, 0, /* Will require 1way or 2way for vm_bind */ - ttm_bo_type_sg, XE_BO_CREATE_SYSTEM_BIT); + ttm_bo_type_sg, XE_BO_FLAG_SYSTEM); if (IS_ERR(bo)) { ret = PTR_ERR(bo); goto error; diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index 87c10bd795..08f0b7c959 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -78,7 +78,7 @@ void xe_drm_client_add_bo(struct xe_drm_client *client, spin_lock(&client->bos_lock); bo->client = xe_drm_client_get(client); - list_add_tail_rcu(&bo->client_link, &client->bos_list); + list_add_tail(&bo->client_link, &client->bos_list); spin_unlock(&client->bos_lock); } @@ -96,7 +96,7 @@ void xe_drm_client_remove_bo(struct xe_bo *bo) struct xe_drm_client *client = bo->client; spin_lock(&client->bos_lock); - list_del_rcu(&bo->client_link); + list_del(&bo->client_link); spin_unlock(&client->bos_lock); xe_drm_client_put(client); @@ -154,8 +154,8 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) /* Internal objects. */ spin_lock(&client->bos_lock); - list_for_each_entry_rcu(bo, &client->bos_list, client_link) { - if (!bo || !kref_get_unless_zero(&bo->ttm.base.refcount)) + list_for_each_entry(bo, &client->bos_list, client_link) { + if (!kref_get_unless_zero(&bo->ttm.base.refcount)) continue; bo_meminfo(bo, stats); xe_bo_put(bo); diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index cc5e0f75de..074344c739 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -118,7 +118,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) u64 addresses[XE_HW_ENGINE_MAX_INSTANCE]; struct drm_gpuvm_exec vm_exec = {.extra.fn = xe_exec_fn}; struct drm_exec *exec = &vm_exec.exec; - u32 i, num_syncs = 0, num_ufence = 0; + u32 i, num_syncs, num_ufence = 0; struct xe_sched_job *job; struct xe_vm *vm; bool write_locked, skip_retry = false; @@ -156,15 +156,15 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) vm = q->vm; - for (i = 0; i < args->num_syncs; i++) { - err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs++], - &syncs_user[i], SYNC_PARSE_FLAG_EXEC | + for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { + err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], + &syncs_user[num_syncs], SYNC_PARSE_FLAG_EXEC | (xe_vm_in_lr_mode(vm) ? SYNC_PARSE_FLAG_LR_MODE : 0)); if (err) goto err_syncs; - if (xe_sync_is_ufence(&syncs[i])) + if (xe_sync_is_ufence(&syncs[num_syncs])) num_ufence++; } @@ -216,7 +216,7 @@ retry: goto err_unlock_list; } for (i = 0; i < num_syncs; i++) - xe_sync_entry_signal(&syncs[i], NULL, fence); + xe_sync_entry_signal(&syncs[i], fence); xe_exec_queue_last_fence_set(q, vm, fence); dma_fence_put(fence); } @@ -294,9 +294,10 @@ retry: drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, &job->drm.s_fence->finished, DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_WRITE); - for (i = 0; i < num_syncs; i++) - xe_sync_entry_signal(&syncs[i], job, - &job->drm.s_fence->finished); + for (i = 0; i < num_syncs; i++) { + xe_sync_entry_signal(&syncs[i], &job->drm.s_fence->finished); + xe_sched_job_init_user_fence(job, &syncs[i]); + } if (xe_exec_queue_is_lr(q)) q->ring_ops->emit_job(job); @@ -320,15 +321,12 @@ err_put_job: err_exec: drm_exec_fini(exec); err_unlock_list: - if (write_locked) - up_write(&vm->lock); - else - up_read(&vm->lock); + up_read(&vm->lock); if (err == -EAGAIN && !skip_retry) goto retry; err_syncs: - for (i = 0; i < num_syncs; i++) - xe_sync_entry_cleanup(&syncs[i]); + while (num_syncs--) + xe_sync_entry_cleanup(&syncs[num_syncs]); kfree(syncs); err_exec_queue: xe_exec_queue_put(q); diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index ead25d5e72..395de93579 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -31,7 +31,14 @@ enum xe_exec_queue_sched_prop { }; static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, - u64 extensions, int ext_number, bool create); + u64 extensions, int ext_number); + +static void __xe_exec_queue_free(struct xe_exec_queue *q) +{ + if (q->vm) + xe_vm_put(q->vm); + kfree(q); +} static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, struct xe_vm *vm, @@ -74,21 +81,21 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, else q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_NORMAL; + if (vm) + q->vm = xe_vm_get(vm); + if (extensions) { /* * may set q->usm, must come before xe_lrc_init(), * may overwrite q->sched_props, must come before q->ops->init() */ - err = exec_queue_user_extensions(xe, q, extensions, 0, true); + err = exec_queue_user_extensions(xe, q, extensions, 0); if (err) { - kfree(q); + __xe_exec_queue_free(q); return ERR_PTR(err); } } - if (vm) - q->vm = xe_vm_get(vm); - if (xe_exec_queue_is_parallel(q)) { q->parallel.composite_fence_ctx = dma_fence_context_alloc(1); q->parallel.composite_fence_seqno = XE_FENCE_INITIAL_SEQNO; @@ -97,13 +104,6 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, return q; } -static void __xe_exec_queue_free(struct xe_exec_queue *q) -{ - if (q->vm) - xe_vm_put(q->vm); - kfree(q); -} - static int __xe_exec_queue_init(struct xe_exec_queue *q) { struct xe_device *xe = gt_to_xe(q->gt); @@ -128,7 +128,7 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q) * already grabbed the rpm ref outside any sensitive locks. */ if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM || !q->vm)) - drm_WARN_ON(&xe->drm, !xe_device_mem_access_get_if_ongoing(xe)); + xe_pm_runtime_get_noresume(xe); return 0; @@ -217,7 +217,7 @@ void xe_exec_queue_fini(struct xe_exec_queue *q) for (i = 0; i < q->width; ++i) xe_lrc_finish(q->lrc + i); if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && (q->flags & EXEC_QUEUE_FLAG_VM || !q->vm)) - xe_device_mem_access_put(gt_to_xe(q->gt)); + xe_pm_runtime_put(gt_to_xe(q->gt)); __xe_exec_queue_free(q); } @@ -225,22 +225,22 @@ void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance) { switch (q->class) { case XE_ENGINE_CLASS_RENDER: - sprintf(q->name, "rcs%d", instance); + snprintf(q->name, sizeof(q->name), "rcs%d", instance); break; case XE_ENGINE_CLASS_VIDEO_DECODE: - sprintf(q->name, "vcs%d", instance); + snprintf(q->name, sizeof(q->name), "vcs%d", instance); break; case XE_ENGINE_CLASS_VIDEO_ENHANCE: - sprintf(q->name, "vecs%d", instance); + snprintf(q->name, sizeof(q->name), "vecs%d", instance); break; case XE_ENGINE_CLASS_COPY: - sprintf(q->name, "bcs%d", instance); + snprintf(q->name, sizeof(q->name), "bcs%d", instance); break; case XE_ENGINE_CLASS_COMPUTE: - sprintf(q->name, "ccs%d", instance); + snprintf(q->name, sizeof(q->name), "ccs%d", instance); break; case XE_ENGINE_CLASS_OTHER: - sprintf(q->name, "gsccs%d", instance); + snprintf(q->name, sizeof(q->name), "gsccs%d", instance); break; default: XE_WARN_ON(q->class); @@ -268,7 +268,7 @@ xe_exec_queue_device_get_max_priority(struct xe_device *xe) } static int exec_queue_set_priority(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create) + u64 value) { if (XE_IOCTL_DBG(xe, value > XE_EXEC_QUEUE_PRIORITY_HIGH)) return -EINVAL; @@ -276,9 +276,6 @@ static int exec_queue_set_priority(struct xe_device *xe, struct xe_exec_queue *q if (XE_IOCTL_DBG(xe, value > xe_exec_queue_device_get_max_priority(xe))) return -EPERM; - if (!create) - return q->ops->set_priority(q, value); - q->sched_props.priority = value; return 0; } @@ -336,7 +333,7 @@ xe_exec_queue_get_prop_minmax(struct xe_hw_engine_class_intf *eclass, } static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create) + u64 value) { u32 min = 0, max = 0; @@ -347,16 +344,13 @@ static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue * !xe_hw_engine_timeout_in_range(value, min, max)) return -EINVAL; - if (!create) - return q->ops->set_timeslice(q, value); - q->sched_props.timeslice_us = value; return 0; } typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, struct xe_exec_queue *q, - u64 value, bool create); + u64 value); static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority, @@ -365,8 +359,7 @@ static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { static int exec_queue_user_ext_set_property(struct xe_device *xe, struct xe_exec_queue *q, - u64 extension, - bool create) + u64 extension) { u64 __user *address = u64_to_user_ptr(extension); struct drm_xe_ext_set_property ext; @@ -388,21 +381,20 @@ static int exec_queue_user_ext_set_property(struct xe_device *xe, if (!exec_queue_set_property_funcs[idx]) return -EINVAL; - return exec_queue_set_property_funcs[idx](xe, q, ext.value, create); + return exec_queue_set_property_funcs[idx](xe, q, ext.value); } typedef int (*xe_exec_queue_user_extension_fn)(struct xe_device *xe, struct xe_exec_queue *q, - u64 extension, - bool create); + u64 extension); -static const xe_exec_queue_set_property_fn exec_queue_user_extension_funcs[] = { +static const xe_exec_queue_user_extension_fn exec_queue_user_extension_funcs[] = { [DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY] = exec_queue_user_ext_set_property, }; #define MAX_USER_EXTENSIONS 16 static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, - u64 extensions, int ext_number, bool create) + u64 extensions, int ext_number) { u64 __user *address = u64_to_user_ptr(extensions); struct drm_xe_user_extension ext; @@ -423,13 +415,13 @@ static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue idx = array_index_nospec(ext.name, ARRAY_SIZE(exec_queue_user_extension_funcs)); - err = exec_queue_user_extension_funcs[idx](xe, q, extensions, create); + err = exec_queue_user_extension_funcs[idx](xe, q, extensions); if (XE_IOCTL_DBG(xe, err)) return err; if (ext.next_extension) return exec_queue_user_extensions(xe, q, ext.next_extension, - ++ext_number, create); + ++ext_number); return 0; } @@ -597,7 +589,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, return -EINVAL; /* The migration vm doesn't hold rpm ref */ - xe_device_mem_access_get(xe); + xe_pm_runtime_get_noresume(xe); flags = EXEC_QUEUE_FLAG_VM | (id ? EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD : 0); @@ -606,7 +598,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, args->width, hwe, flags, args->extensions); - xe_device_mem_access_put(xe); /* now held by engine */ + xe_pm_runtime_put(xe); /* now held by engine */ xe_vm_put(migrate_vm); if (IS_ERR(new)) { diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 462b331950..ee78d497d8 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -76,14 +76,12 @@ struct xe_exec_queue { #define EXEC_QUEUE_FLAG_KERNEL BIT(1) /* kernel engine only destroyed at driver unload */ #define EXEC_QUEUE_FLAG_PERMANENT BIT(2) -/* queue keeps running pending jobs after destroy ioctl */ -#define EXEC_QUEUE_FLAG_PERSISTENT BIT(3) /* for VM jobs. Caller needs to hold rpm ref when creating queue with this flag */ -#define EXEC_QUEUE_FLAG_VM BIT(4) +#define EXEC_QUEUE_FLAG_VM BIT(3) /* child of VM queue for multi-tile VM jobs */ -#define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD BIT(5) +#define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD BIT(4) /* kernel exec_queue only, set priority to highest level */ -#define EXEC_QUEUE_FLAG_HIGH_PRIORITY BIT(6) +#define EXEC_QUEUE_FLAG_HIGH_PRIORITY BIT(5) /** * @flags: flags for this exec queue, should statically setup aside from ban diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index ab96edb058..0d541f55b4 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -5,12 +5,14 @@ #include "xe_ggtt.h" +#include <linux/io-64-nonatomic-lo-hi.h> #include <linux/sizes.h> #include <drm/drm_managed.h> #include <drm/i915_drm.h> #include "regs/xe_gt_regs.h" +#include "regs/xe_gtt_defs.h" #include "regs/xe_regs.h" #include "xe_assert.h" #include "xe_bo.h" @@ -19,16 +21,10 @@ #include "xe_gt_printk.h" #include "xe_gt_tlb_invalidation.h" #include "xe_map.h" -#include "xe_mmio.h" +#include "xe_pm.h" #include "xe_sriov.h" #include "xe_wopcm.h" -#define XELPG_GGTT_PTE_PAT0 BIT_ULL(52) -#define XELPG_GGTT_PTE_PAT1 BIT_ULL(53) - -/* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */ -#define GUC_GGTT_TOP 0xFEE00000 - static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, u16 pat_index) { @@ -200,20 +196,20 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) return drmm_add_action_or_reset(&xe->drm, ggtt_fini_early, ggtt); } +static void xe_ggtt_invalidate(struct xe_ggtt *ggtt); + static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt) { struct drm_mm_node *hole; u64 start, end; /* Display may have allocated inside ggtt, so be careful with clearing here */ - xe_device_mem_access_get(tile_to_xe(ggtt->tile)); mutex_lock(&ggtt->lock); drm_mm_for_each_hole(hole, &ggtt->mm, start, end) xe_ggtt_clear(ggtt, start, end - start); xe_ggtt_invalidate(ggtt); mutex_unlock(&ggtt->lock); - xe_device_mem_access_put(tile_to_xe(ggtt->tile)); } int xe_ggtt_init(struct xe_ggtt *ggtt) @@ -227,11 +223,11 @@ int xe_ggtt_init(struct xe_ggtt *ggtt) * scratch entires, rather keep the scratch page in system memory on * platforms where 64K pages are needed for VRAM. */ - flags = XE_BO_CREATE_PINNED_BIT; + flags = XE_BO_FLAG_PINNED; if (ggtt->flags & XE_GGTT_FLAGS_64K) - flags |= XE_BO_CREATE_SYSTEM_BIT; + flags |= XE_BO_FLAG_SYSTEM; else - flags |= XE_BO_CREATE_VRAM_IF_DGFX(ggtt->tile); + flags |= XE_BO_FLAG_VRAM_IF_DGFX(ggtt->tile); ggtt->scratch = xe_managed_bo_create_pin_map(xe, ggtt->tile, XE_PAGE_SIZE, flags); if (IS_ERR(ggtt->scratch)) { @@ -249,51 +245,19 @@ err: return err; } -#define GUC_TLB_INV_CR XE_REG(0xcee8) -#define GUC_TLB_INV_CR_INVALIDATE REG_BIT(0) -#define PVC_GUC_TLB_INV_DESC0 XE_REG(0xcf7c) -#define PVC_GUC_TLB_INV_DESC0_VALID REG_BIT(0) -#define PVC_GUC_TLB_INV_DESC1 XE_REG(0xcf80) -#define PVC_GUC_TLB_INV_DESC1_INVALIDATE REG_BIT(6) - static void ggtt_invalidate_gt_tlb(struct xe_gt *gt) { + int err; + if (!gt) return; - /* - * Invalidation can happen when there's no in-flight work keeping the - * GT awake. We need to explicitly grab forcewake to ensure the GT - * and GuC are accessible. - */ - xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - - /* TODO: vfunc for GuC vs. non-GuC */ - - if (gt->uc.guc.submission_state.enabled) { - int seqno; - - seqno = xe_gt_tlb_invalidation_guc(gt); - xe_gt_assert(gt, seqno > 0); - if (seqno > 0) - xe_gt_tlb_invalidation_wait(gt, seqno); - } else if (xe_device_uc_enabled(gt_to_xe(gt))) { - struct xe_device *xe = gt_to_xe(gt); - - if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) { - xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1, - PVC_GUC_TLB_INV_DESC1_INVALIDATE); - xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC0, - PVC_GUC_TLB_INV_DESC0_VALID); - } else - xe_mmio_write32(gt, GUC_TLB_INV_CR, - GUC_TLB_INV_CR_INVALIDATE); - } - - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + err = xe_gt_tlb_invalidation_ggtt(gt); + if (err) + drm_warn(>_to_xe(gt)->drm, "xe_gt_tlb_invalidation_ggtt error=%d", err); } -void xe_ggtt_invalidate(struct xe_ggtt *ggtt) +static void xe_ggtt_invalidate(struct xe_ggtt *ggtt) { /* Each GT in a tile has its own TLB to cache GGTT lookups */ ggtt_invalidate_gt_tlb(ggtt->tile->primary_gt); @@ -410,7 +374,7 @@ int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) { - u16 cache_mode = bo->flags & XE_BO_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB; + u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB; u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode]; u64 start = bo->ggtt_node.start; u64 offset, pte; @@ -419,8 +383,6 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) pte = ggtt->pt_ops->pte_encode_bo(bo, offset, pat_index); xe_ggtt_set_pte(ggtt, start + offset, pte); } - - xe_ggtt_invalidate(ggtt); } static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, @@ -442,14 +404,17 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, if (err) return err; - xe_device_mem_access_get(tile_to_xe(ggtt->tile)); + xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile)); mutex_lock(&ggtt->lock); err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node, bo->size, alignment, 0, start, end, 0); if (!err) xe_ggtt_map_bo(ggtt, bo); mutex_unlock(&ggtt->lock); - xe_device_mem_access_put(tile_to_xe(ggtt->tile)); + + if (!err && bo->flags & XE_BO_FLAG_GGTT_INVALIDATE) + xe_ggtt_invalidate(ggtt); + xe_pm_runtime_put(tile_to_xe(ggtt->tile)); return err; } @@ -465,19 +430,21 @@ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX); } -void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node) +void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, + bool invalidate) { - xe_device_mem_access_get(tile_to_xe(ggtt->tile)); - mutex_lock(&ggtt->lock); + xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile)); + mutex_lock(&ggtt->lock); xe_ggtt_clear(ggtt, node->start, node->size); drm_mm_remove_node(node); node->size = 0; + mutex_unlock(&ggtt->lock); - xe_ggtt_invalidate(ggtt); + if (invalidate) + xe_ggtt_invalidate(ggtt); - mutex_unlock(&ggtt->lock); - xe_device_mem_access_put(tile_to_xe(ggtt->tile)); + xe_pm_runtime_put(tile_to_xe(ggtt->tile)); } void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) @@ -488,8 +455,53 @@ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) /* This BO is not currently in the GGTT */ xe_tile_assert(ggtt->tile, bo->ggtt_node.size == bo->size); - xe_ggtt_remove_node(ggtt, &bo->ggtt_node); + xe_ggtt_remove_node(ggtt, &bo->ggtt_node, + bo->flags & XE_BO_FLAG_GGTT_INVALIDATE); +} + +#ifdef CONFIG_PCI_IOV +static u64 xe_encode_vfid_pte(u16 vfid) +{ + return FIELD_PREP(GGTT_PTE_VFID, vfid) | XE_PAGE_PRESENT; +} + +static void xe_ggtt_assign_locked(struct xe_ggtt *ggtt, const struct drm_mm_node *node, u16 vfid) +{ + u64 start = node->start; + u64 size = node->size; + u64 end = start + size - 1; + u64 pte = xe_encode_vfid_pte(vfid); + + lockdep_assert_held(&ggtt->lock); + + if (!drm_mm_node_allocated(node)) + return; + + while (start < end) { + xe_ggtt_set_pte(ggtt, start, pte); + start += XE_PAGE_SIZE; + } + + xe_ggtt_invalidate(ggtt); +} + +/** + * xe_ggtt_assign - assign a GGTT region to the VF + * @ggtt: the &xe_ggtt where the node belongs + * @node: the &drm_mm_node to update + * @vfid: the VF identifier + * + * This function is used by the PF driver to assign a GGTT region to the VF. + * In addition to PTE's VFID bits 11:2 also PRESENT bit 0 is set as on some + * platforms VFs can't modify that either. + */ +void xe_ggtt_assign(struct xe_ggtt *ggtt, const struct drm_mm_node *node, u16 vfid) +{ + mutex_lock(&ggtt->lock); + xe_ggtt_assign_locked(ggtt, node, vfid); + mutex_unlock(&ggtt->lock); } +#endif int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p) { diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h index 42705e1338..4a41a17623 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.h +++ b/drivers/gpu/drm/xe/xe_ggtt.h @@ -11,7 +11,6 @@ struct drm_printer; void xe_ggtt_set_pte(struct xe_ggtt *ggtt, u64 addr, u64 pte); -void xe_ggtt_invalidate(struct xe_ggtt *ggtt); int xe_ggtt_init_early(struct xe_ggtt *ggtt); int xe_ggtt_init(struct xe_ggtt *ggtt); void xe_ggtt_printk(struct xe_ggtt *ggtt, const char *prefix); @@ -24,7 +23,8 @@ int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, struct drm_mm_node *node, u32 size, u32 align, u32 mm_flags); -void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node); +void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, + bool invalidate); void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, @@ -33,4 +33,8 @@ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo); int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p); +#ifdef CONFIG_PCI_IOV +void xe_ggtt_assign(struct xe_ggtt *ggtt, const struct drm_mm_node *node, u16 vfid); +#endif + #endif diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index a61994292c..60202b9036 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -17,15 +17,18 @@ #include "xe_gsc_proxy.h" #include "xe_gsc_submit.h" #include "xe_gt.h" +#include "xe_gt_mcr.h" #include "xe_gt_printk.h" #include "xe_huc.h" #include "xe_map.h" #include "xe_mmio.h" +#include "xe_pm.h" #include "xe_sched_job.h" #include "xe_uc_fw.h" #include "xe_wa.h" #include "instructions/xe_gsc_commands.h" #include "regs/xe_gsc_regs.h" +#include "regs/xe_gt_regs.h" static struct xe_gt * gsc_to_gt(struct xe_gsc *gsc) @@ -127,8 +130,8 @@ static int query_compatibility_version(struct xe_gsc *gsc) bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_VER_PKT_SZ * 2, ttm_bo_type_kernel, - XE_BO_CREATE_SYSTEM_BIT | - XE_BO_CREATE_GGTT_BIT); + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT); if (IS_ERR(bo)) { xe_gt_err(gt, "failed to allocate bo for GSC version query\n"); return PTR_ERR(bo); @@ -250,9 +253,30 @@ static int gsc_upload(struct xe_gsc *gsc) static int gsc_upload_and_init(struct xe_gsc *gsc) { struct xe_gt *gt = gsc_to_gt(gsc); + struct xe_tile *tile = gt_to_tile(gt); int ret; + if (XE_WA(gt, 14018094691)) { + ret = xe_force_wake_get(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL); + + /* + * If the forcewake fails we want to keep going, because the worst + * case outcome in failing to apply the WA is that PXP won't work, + * which is not fatal. We still throw a warning so the issue is + * seen if it happens. + */ + xe_gt_WARN_ON(tile->primary_gt, ret); + + xe_gt_mcr_multicast_write(tile->primary_gt, + EU_SYSTOLIC_LIC_THROTTLE_CTL_WITH_LOCK, + EU_SYSTOLIC_LIC_THROTTLE_CTL_LOCK_BIT); + } + ret = gsc_upload(gsc); + + if (XE_WA(gt, 14018094691)) + xe_force_wake_put(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL); + if (ret) return ret; @@ -272,6 +296,44 @@ static int gsc_upload_and_init(struct xe_gsc *gsc) return 0; } +static int gsc_er_complete(struct xe_gt *gt) +{ + u32 er_status; + + if (!gsc_fw_is_loaded(gt)) + return 0; + + /* + * Starting on Xe2, the GSCCS engine reset is a 2-step process. When the + * driver or the GuC hit the GDRST register, the CS is immediately reset + * and a success is reported, but the GSC shim keeps resetting in the + * background. While the shim reset is ongoing, the CS is able to accept + * new context submission, but any commands that require the shim will + * be stalled until the reset is completed. This means that we can keep + * submitting to the GSCCS as long as we make sure that the preemption + * timeout is big enough to cover any delay introduced by the reset. + * When the shim reset completes, a specific CS interrupt is triggered, + * in response to which we need to check the GSCI_TIMER_STATUS register + * to see if the reset was successful or not. + * Note that the GSCI_TIMER_STATUS register is not power save/restored, + * so it gets reset on MC6 entry. However, a reset failure stops MC6, + * so in that scenario we're always guaranteed to find the correct + * value. + */ + er_status = xe_mmio_read32(gt, GSCI_TIMER_STATUS) & GSCI_TIMER_STATUS_VALUE; + + if (er_status == GSCI_TIMER_STATUS_TIMER_EXPIRED) { + /* + * XXX: we should trigger an FLR here, but we don't have support + * for that yet. + */ + xe_gt_err(gt, "GSC ER timed out!\n"); + return -EIO; + } + + return 0; +} + static void gsc_work(struct work_struct *work) { struct xe_gsc *gsc = container_of(work, typeof(*gsc), work); @@ -285,8 +347,14 @@ static void gsc_work(struct work_struct *work) gsc->work_actions = 0; spin_unlock_irq(&gsc->lock); - xe_device_mem_access_get(xe); - xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC); + xe_pm_runtime_get(xe); + xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GSC)); + + if (actions & GSC_ACTION_ER_COMPLETE) { + ret = gsc_er_complete(gt); + if (ret) + goto out; + } if (actions & GSC_ACTION_FW_LOAD) { ret = gsc_upload_and_init(gsc); @@ -299,8 +367,26 @@ static void gsc_work(struct work_struct *work) if (actions & GSC_ACTION_SW_PROXY) xe_gsc_proxy_request_handler(gsc); +out: xe_force_wake_put(gt_to_fw(gt), XE_FW_GSC); - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); +} + +void xe_gsc_hwe_irq_handler(struct xe_hw_engine *hwe, u16 intr_vec) +{ + struct xe_gt *gt = hwe->gt; + struct xe_gsc *gsc = >->uc.gsc; + + if (unlikely(!intr_vec)) + return; + + if (intr_vec & GSC_ER_COMPLETE) { + spin_lock(&gsc->lock); + gsc->work_actions |= GSC_ACTION_ER_COMPLETE; + spin_unlock(&gsc->lock); + + queue_work(gsc->wq, &gsc->work); + } } int xe_gsc_init(struct xe_gsc *gsc) @@ -382,8 +468,8 @@ int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc) bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4M, ttm_bo_type_kernel, - XE_BO_CREATE_STOLEN_BIT | - XE_BO_CREATE_GGTT_BIT); + XE_BO_FLAG_STOLEN | + XE_BO_FLAG_GGTT); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_gsc.h b/drivers/gpu/drm/xe/xe_gsc.h index c6fb32e3fd..dd16e9b8b8 100644 --- a/drivers/gpu/drm/xe/xe_gsc.h +++ b/drivers/gpu/drm/xe/xe_gsc.h @@ -9,12 +9,14 @@ #include "xe_gsc_types.h" struct xe_gt; +struct xe_hw_engine; int xe_gsc_init(struct xe_gsc *gsc); int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc); void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc); void xe_gsc_load_start(struct xe_gsc *gsc); void xe_gsc_remove(struct xe_gsc *gsc); +void xe_gsc_hwe_irq_handler(struct xe_hw_engine *hwe, u16 intr_vec); void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep); diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c index 309ef80e3b..1b908d238b 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.c +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c @@ -66,7 +66,7 @@ static inline struct xe_device *kdev_to_xe(struct device *kdev) return dev_get_drvdata(kdev); } -static bool gsc_proxy_init_done(struct xe_gsc *gsc) +bool xe_gsc_proxy_init_done(struct xe_gsc *gsc) { struct xe_gt *gt = gsc_to_gt(gsc); u32 fwsts1 = xe_mmio_read32(gt, HECI_FWSTS1(MTL_GSC_HECI1_BASE)); @@ -403,7 +403,6 @@ static int proxy_channel_alloc(struct xe_gsc *gsc) struct xe_device *xe = gt_to_xe(gt); struct xe_bo *bo; void *csme; - int err; csme = kzalloc(GSC_PROXY_CHANNEL_SIZE, GFP_KERNEL); if (!csme) @@ -411,8 +410,8 @@ static int proxy_channel_alloc(struct xe_gsc *gsc) bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_PROXY_CHANNEL_SIZE, ttm_bo_type_kernel, - XE_BO_CREATE_SYSTEM_BIT | - XE_BO_CREATE_GGTT_BIT); + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT); if (IS_ERR(bo)) { kfree(csme); return PTR_ERR(bo); @@ -424,11 +423,7 @@ static int proxy_channel_alloc(struct xe_gsc *gsc) gsc->proxy.to_csme = csme; gsc->proxy.from_csme = csme + GSC_PROXY_BUFFER_SIZE; - err = drmm_add_action_or_reset(&xe->drm, proxy_channel_free, gsc); - if (err) - return err; - - return 0; + return drmm_add_action_or_reset(&xe->drm, proxy_channel_free, gsc); } /** @@ -528,7 +523,7 @@ int xe_gsc_proxy_start(struct xe_gsc *gsc) if (err) return err; - if (!gsc_proxy_init_done(gsc)) { + if (!xe_gsc_proxy_init_done(gsc)) { xe_gt_err(gsc_to_gt(gsc), "GSC FW reports proxy init not completed\n"); return -EIO; } diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.h b/drivers/gpu/drm/xe/xe_gsc_proxy.h index 908f9441f0..c511ade6b8 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.h +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.h @@ -11,6 +11,7 @@ struct xe_gsc; int xe_gsc_proxy_init(struct xe_gsc *gsc); +bool xe_gsc_proxy_init_done(struct xe_gsc *gsc); void xe_gsc_proxy_remove(struct xe_gsc *gsc); int xe_gsc_proxy_start(struct xe_gsc *gsc); diff --git a/drivers/gpu/drm/xe/xe_gsc_submit.c b/drivers/gpu/drm/xe/xe_gsc_submit.c index 348994b271..d34d032488 100644 --- a/drivers/gpu/drm/xe/xe_gsc_submit.c +++ b/drivers/gpu/drm/xe/xe_gsc_submit.c @@ -41,6 +41,21 @@ gsc_to_gt(struct xe_gsc *gsc) } /** + * xe_gsc_create_host_session_id - Creates a random 64 bit host_session id with + * bits 56-63 masked. + * + * Returns: random host_session_id which can be used to send messages to gsc cs + */ +u64 xe_gsc_create_host_session_id(void) +{ + u64 host_session_id; + + get_random_bytes(&host_session_id, sizeof(u64)); + host_session_id &= ~HOST_SESSION_CLIENT_MASK; + return host_session_id; +} + +/** * xe_gsc_emit_header - write the MTL GSC header in memory * @xe: the Xe device * @map: the iosys map to write to diff --git a/drivers/gpu/drm/xe/xe_gsc_submit.h b/drivers/gpu/drm/xe/xe_gsc_submit.h index 1939855031..1416b5745a 100644 --- a/drivers/gpu/drm/xe/xe_gsc_submit.h +++ b/drivers/gpu/drm/xe/xe_gsc_submit.h @@ -28,4 +28,5 @@ int xe_gsc_read_out_header(struct xe_device *xe, int xe_gsc_pkt_submit_kernel(struct xe_gsc *gsc, u64 addr_in, u32 size_in, u64 addr_out, u32 size_out); +u64 xe_gsc_create_host_session_id(void); #endif diff --git a/drivers/gpu/drm/xe/xe_gsc_types.h b/drivers/gpu/drm/xe/xe_gsc_types.h index 138d8cc0f1..5926de2021 100644 --- a/drivers/gpu/drm/xe/xe_gsc_types.h +++ b/drivers/gpu/drm/xe/xe_gsc_types.h @@ -47,6 +47,7 @@ struct xe_gsc { u32 work_actions; #define GSC_ACTION_FW_LOAD BIT(0) #define GSC_ACTION_SW_PROXY BIT(1) +#define GSC_ACTION_ER_COMPLETE BIT(2) /** @proxy: sub-structure containing the SW proxy-related variables */ struct { diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index f9705430ad..491d0413de 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -29,6 +29,7 @@ #include "xe_gt_mcr.h" #include "xe_gt_pagefault.h" #include "xe_gt_printk.h" +#include "xe_gt_sriov_pf.h" #include "xe_gt_sysfs.h" #include "xe_gt_tlb_invalidation.h" #include "xe_gt_topology.h" @@ -43,6 +44,7 @@ #include "xe_migrate.h" #include "xe_mmio.h" #include "xe_pat.h" +#include "xe_pm.h" #include "xe_mocs.h" #include "xe_reg_sr.h" #include "xe_ring_ops.h" @@ -310,6 +312,12 @@ int xe_gt_init_early(struct xe_gt *gt) { int err; + if (IS_SRIOV_PF(gt_to_xe(gt))) { + err = xe_gt_sriov_pf_init_early(gt); + if (err) + return err; + } + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (err) return err; @@ -346,7 +354,6 @@ static int gt_fw_domain_init(struct xe_gt *gt) { int err, i; - xe_device_mem_access_get(gt_to_xe(gt)); err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (err) goto err_hw_fence_irq; @@ -359,7 +366,9 @@ static int gt_fw_domain_init(struct xe_gt *gt) xe_lmtt_init(>_to_tile(gt)->sriov.pf.lmtt); } - xe_gt_idle_sysfs_init(>->gtidle); + err = xe_gt_idle_sysfs_init(>->gtidle); + if (err) + goto err_force_wake; /* Enable per hw engine IRQs */ xe_irq_enable_hwe(gt); @@ -373,9 +382,7 @@ static int gt_fw_domain_init(struct xe_gt *gt) err = xe_hw_engine_class_sysfs_init(gt); if (err) - drm_warn(>_to_xe(gt)->drm, - "failed to register engines sysfs directory, err: %d\n", - err); + goto err_force_wake; /* Initialize CCS mode sysfs after early initialization of HW engines */ err = xe_gt_ccs_mode_sysfs_init(gt); @@ -390,7 +397,6 @@ static int gt_fw_domain_init(struct xe_gt *gt) err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); XE_WARN_ON(err); - xe_device_mem_access_put(gt_to_xe(gt)); return 0; @@ -400,7 +406,6 @@ err_force_wake: err_hw_fence_irq: for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) xe_hw_fence_irq_finish(>->fence_irq[i]); - xe_device_mem_access_put(gt_to_xe(gt)); return err; } @@ -409,7 +414,6 @@ static int all_fw_domain_init(struct xe_gt *gt) { int err, i; - xe_device_mem_access_get(gt_to_xe(gt)); err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (err) goto err_hw_fence_irq; @@ -475,7 +479,6 @@ static int all_fw_domain_init(struct xe_gt *gt) err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); XE_WARN_ON(err); - xe_device_mem_access_put(gt_to_xe(gt)); return 0; @@ -484,7 +487,6 @@ err_force_wake: err_hw_fence_irq: for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) xe_hw_fence_irq_finish(>->fence_irq[i]); - xe_device_mem_access_put(gt_to_xe(gt)); return err; } @@ -497,7 +499,6 @@ int xe_gt_init_hwconfig(struct xe_gt *gt) { int err; - xe_device_mem_access_get(gt_to_xe(gt)); err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (err) goto out; @@ -520,8 +521,6 @@ int xe_gt_init_hwconfig(struct xe_gt *gt) out_fw: xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); out: - xe_device_mem_access_put(gt_to_xe(gt)); - return err; } @@ -547,13 +546,17 @@ int xe_gt_init(struct xe_gt *gt) xe_mocs_init_early(gt); - xe_gt_sysfs_init(gt); + err = xe_gt_sysfs_init(gt); + if (err) + return err; err = gt_fw_domain_init(gt); if (err) return err; - xe_gt_freq_init(gt); + err = xe_gt_freq_init(gt); + if (err) + return err; xe_force_wake_init_engines(gt, gt_to_fw(gt)); @@ -561,11 +564,7 @@ int xe_gt_init(struct xe_gt *gt) if (err) return err; - err = drmm_add_action_or_reset(>_to_xe(gt)->drm, gt_fini, gt); - if (err) - return err; - - return 0; + return drmm_add_action_or_reset(>_to_xe(gt)->drm, gt_fini, gt); } static int do_gt_reset(struct xe_gt *gt) @@ -645,9 +644,9 @@ static int gt_reset(struct xe_gt *gt) goto err_fail; } + xe_pm_runtime_get(gt_to_xe(gt)); xe_gt_sanitize(gt); - xe_device_mem_access_get(gt_to_xe(gt)); err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (err) goto err_msg; @@ -671,8 +670,8 @@ static int gt_reset(struct xe_gt *gt) goto err_out; err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); - xe_device_mem_access_put(gt_to_xe(gt)); XE_WARN_ON(err); + xe_pm_runtime_put(gt_to_xe(gt)); xe_gt_info(gt, "reset done\n"); @@ -682,7 +681,7 @@ err_out: XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); err_msg: XE_WARN_ON(xe_uc_start(>->uc)); - xe_device_mem_access_put(gt_to_xe(gt)); + xe_pm_runtime_put(gt_to_xe(gt)); err_fail: xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err)); @@ -712,22 +711,20 @@ void xe_gt_reset_async(struct xe_gt *gt) void xe_gt_suspend_prepare(struct xe_gt *gt) { - xe_device_mem_access_get(gt_to_xe(gt)); XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)); xe_uc_stop_prepare(>->uc); XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); - xe_device_mem_access_put(gt_to_xe(gt)); } int xe_gt_suspend(struct xe_gt *gt) { int err; + xe_gt_dbg(gt, "suspending\n"); xe_gt_sanitize(gt); - xe_device_mem_access_get(gt_to_xe(gt)); err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (err) goto err_msg; @@ -737,15 +734,13 @@ int xe_gt_suspend(struct xe_gt *gt) goto err_force_wake; XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); - xe_device_mem_access_put(gt_to_xe(gt)); - xe_gt_info(gt, "suspended\n"); + xe_gt_dbg(gt, "suspended\n"); return 0; err_force_wake: XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); err_msg: - xe_device_mem_access_put(gt_to_xe(gt)); xe_gt_err(gt, "suspend failed (%pe)\n", ERR_PTR(err)); return err; @@ -755,7 +750,7 @@ int xe_gt_resume(struct xe_gt *gt) { int err; - xe_device_mem_access_get(gt_to_xe(gt)); + xe_gt_dbg(gt, "resuming\n"); err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (err) goto err_msg; @@ -765,15 +760,13 @@ int xe_gt_resume(struct xe_gt *gt) goto err_force_wake; XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); - xe_device_mem_access_put(gt_to_xe(gt)); - xe_gt_info(gt, "resumed\n"); + xe_gt_dbg(gt, "resumed\n"); return 0; err_force_wake: XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); err_msg: - xe_device_mem_access_put(gt_to_xe(gt)); xe_gt_err(gt, "resume failed (%pe)\n", ERR_PTR(err)); return err; diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c index 937054e31d..c7bca20f6b 100644 --- a/drivers/gpu/drm/xe/xe_gt_clock.c +++ b/drivers/gpu/drm/xe/xe_gt_clock.c @@ -78,8 +78,3 @@ int xe_gt_clock_init(struct xe_gt *gt) gt->info.reference_clock = freq; return 0; } - -u64 xe_gt_clock_cycles_to_ns(const struct xe_gt *gt, u64 count) -{ - return DIV_ROUND_CLOSEST_ULL(count * NSEC_PER_SEC, gt->info.reference_clock); -} diff --git a/drivers/gpu/drm/xe/xe_gt_clock.h b/drivers/gpu/drm/xe/xe_gt_clock.h index aa162722f8..44fa0371b9 100644 --- a/drivers/gpu/drm/xe/xe_gt_clock.h +++ b/drivers/gpu/drm/xe/xe_gt_clock.h @@ -11,5 +11,5 @@ struct xe_gt; int xe_gt_clock_init(struct xe_gt *gt); -u64 xe_gt_clock_cycles_to_ns(const struct xe_gt *gt, u64 count); + #endif diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index c4b67cf09f..8cf0b2625e 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -5,6 +5,8 @@ #include "xe_gt_debugfs.h" +#include <linux/debugfs.h> + #include <drm/drm_debugfs.h> #include <drm/drm_managed.h> @@ -18,193 +20,246 @@ #include "xe_lrc.h" #include "xe_macros.h" #include "xe_pat.h" +#include "xe_pm.h" #include "xe_reg_sr.h" #include "xe_reg_whitelist.h" #include "xe_uc_debugfs.h" #include "xe_wa.h" -static struct xe_gt *node_to_gt(struct drm_info_node *node) +/** + * xe_gt_debugfs_simple_show - A show callback for struct drm_info_list + * @m: the &seq_file + * @data: data used by the drm debugfs helpers + * + * This callback can be used in struct drm_info_list to describe debugfs + * files that are &xe_gt specific. + * + * It is assumed that those debugfs files will be created on directory entry + * which struct dentry d_inode->i_private points to &xe_gt. + * + * This function assumes that &m->private will be set to the &struct + * drm_info_node corresponding to the instance of the info on a given &struct + * drm_minor (see struct drm_info_list.show for details). + * + * This function also assumes that struct drm_info_list.data will point to the + * function code that will actually print a file content:: + * + * int (*print)(struct xe_gt *, struct drm_printer *) + * + * Example:: + * + * int foo(struct xe_gt *gt, struct drm_printer *p) + * { + * drm_printf(p, "GT%u\n", gt->info.id); + * return 0; + * } + * + * static const struct drm_info_list bar[] = { + * { name = "foo", .show = xe_gt_debugfs_simple_show, .data = foo }, + * }; + * + * dir = debugfs_create_dir("gt", parent); + * dir->d_inode->i_private = gt; + * drm_debugfs_create_files(bar, ARRAY_SIZE(bar), dir, minor); + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_debugfs_simple_show(struct seq_file *m, void *data) { - return node->info_ent->data; + struct drm_printer p = drm_seq_file_printer(m); + struct drm_info_node *node = m->private; + struct dentry *parent = node->dent->d_parent; + struct xe_gt *gt = parent->d_inode->i_private; + int (*print)(struct xe_gt *, struct drm_printer *) = node->info_ent->data; + + if (WARN_ON(!print)) + return -EINVAL; + + return print(gt, &p); } -static int hw_engines(struct seq_file *m, void *data) +static int hw_engines(struct xe_gt *gt, struct drm_printer *p) { - struct xe_gt *gt = node_to_gt(m->private); struct xe_device *xe = gt_to_xe(gt); - struct drm_printer p = drm_seq_file_printer(m); struct xe_hw_engine *hwe; enum xe_hw_engine_id id; int err; - xe_device_mem_access_get(xe); + xe_pm_runtime_get(xe); err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (err) { - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); return err; } for_each_hw_engine(hwe, gt, id) - xe_hw_engine_print(hwe, &p); + xe_hw_engine_print(hwe, p); err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); if (err) return err; return 0; } -static int force_reset(struct seq_file *m, void *data) +static int force_reset(struct xe_gt *gt, struct drm_printer *p) { - struct xe_gt *gt = node_to_gt(m->private); - + xe_pm_runtime_get(gt_to_xe(gt)); xe_gt_reset_async(gt); + xe_pm_runtime_put(gt_to_xe(gt)); return 0; } -static int sa_info(struct seq_file *m, void *data) +static int sa_info(struct xe_gt *gt, struct drm_printer *p) { - struct xe_tile *tile = gt_to_tile(node_to_gt(m->private)); - struct drm_printer p = drm_seq_file_printer(m); + struct xe_tile *tile = gt_to_tile(gt); - drm_suballoc_dump_debug_info(&tile->mem.kernel_bb_pool->base, &p, + xe_pm_runtime_get(gt_to_xe(gt)); + drm_suballoc_dump_debug_info(&tile->mem.kernel_bb_pool->base, p, tile->mem.kernel_bb_pool->gpu_addr); + xe_pm_runtime_put(gt_to_xe(gt)); return 0; } -static int topology(struct seq_file *m, void *data) +static int topology(struct xe_gt *gt, struct drm_printer *p) { - struct xe_gt *gt = node_to_gt(m->private); - struct drm_printer p = drm_seq_file_printer(m); - - xe_gt_topology_dump(gt, &p); + xe_pm_runtime_get(gt_to_xe(gt)); + xe_gt_topology_dump(gt, p); + xe_pm_runtime_put(gt_to_xe(gt)); return 0; } -static int steering(struct seq_file *m, void *data) +static int steering(struct xe_gt *gt, struct drm_printer *p) { - struct xe_gt *gt = node_to_gt(m->private); - struct drm_printer p = drm_seq_file_printer(m); - - xe_gt_mcr_steering_dump(gt, &p); + xe_pm_runtime_get(gt_to_xe(gt)); + xe_gt_mcr_steering_dump(gt, p); + xe_pm_runtime_put(gt_to_xe(gt)); return 0; } -static int ggtt(struct seq_file *m, void *data) +static int ggtt(struct xe_gt *gt, struct drm_printer *p) { - struct xe_gt *gt = node_to_gt(m->private); - struct drm_printer p = drm_seq_file_printer(m); + int ret; + + xe_pm_runtime_get(gt_to_xe(gt)); + ret = xe_ggtt_dump(gt_to_tile(gt)->mem.ggtt, p); + xe_pm_runtime_put(gt_to_xe(gt)); - return xe_ggtt_dump(gt_to_tile(gt)->mem.ggtt, &p); + return ret; } -static int register_save_restore(struct seq_file *m, void *data) +static int register_save_restore(struct xe_gt *gt, struct drm_printer *p) { - struct xe_gt *gt = node_to_gt(m->private); - struct drm_printer p = drm_seq_file_printer(m); struct xe_hw_engine *hwe; enum xe_hw_engine_id id; - xe_reg_sr_dump(>->reg_sr, &p); - drm_printf(&p, "\n"); + xe_pm_runtime_get(gt_to_xe(gt)); - drm_printf(&p, "Engine\n"); + xe_reg_sr_dump(>->reg_sr, p); + drm_printf(p, "\n"); + + drm_printf(p, "Engine\n"); for_each_hw_engine(hwe, gt, id) - xe_reg_sr_dump(&hwe->reg_sr, &p); - drm_printf(&p, "\n"); + xe_reg_sr_dump(&hwe->reg_sr, p); + drm_printf(p, "\n"); - drm_printf(&p, "LRC\n"); + drm_printf(p, "LRC\n"); for_each_hw_engine(hwe, gt, id) - xe_reg_sr_dump(&hwe->reg_lrc, &p); - drm_printf(&p, "\n"); + xe_reg_sr_dump(&hwe->reg_lrc, p); + drm_printf(p, "\n"); - drm_printf(&p, "Whitelist\n"); + drm_printf(p, "Whitelist\n"); for_each_hw_engine(hwe, gt, id) - xe_reg_whitelist_dump(&hwe->reg_whitelist, &p); + xe_reg_whitelist_dump(&hwe->reg_whitelist, p); + + xe_pm_runtime_put(gt_to_xe(gt)); return 0; } -static int workarounds(struct seq_file *m, void *data) +static int workarounds(struct xe_gt *gt, struct drm_printer *p) { - struct xe_gt *gt = node_to_gt(m->private); - struct drm_printer p = drm_seq_file_printer(m); - - xe_wa_dump(gt, &p); + xe_pm_runtime_get(gt_to_xe(gt)); + xe_wa_dump(gt, p); + xe_pm_runtime_put(gt_to_xe(gt)); return 0; } -static int pat(struct seq_file *m, void *data) +static int pat(struct xe_gt *gt, struct drm_printer *p) { - struct xe_gt *gt = node_to_gt(m->private); - struct drm_printer p = drm_seq_file_printer(m); - - xe_pat_dump(gt, &p); + xe_pm_runtime_get(gt_to_xe(gt)); + xe_pat_dump(gt, p); + xe_pm_runtime_put(gt_to_xe(gt)); return 0; } -static int rcs_default_lrc(struct seq_file *m, void *data) +static int rcs_default_lrc(struct xe_gt *gt, struct drm_printer *p) { - struct drm_printer p = drm_seq_file_printer(m); + xe_pm_runtime_get(gt_to_xe(gt)); + xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_RENDER); + xe_pm_runtime_put(gt_to_xe(gt)); - xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_RENDER); return 0; } -static int ccs_default_lrc(struct seq_file *m, void *data) +static int ccs_default_lrc(struct xe_gt *gt, struct drm_printer *p) { - struct drm_printer p = drm_seq_file_printer(m); + xe_pm_runtime_get(gt_to_xe(gt)); + xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_COMPUTE); + xe_pm_runtime_put(gt_to_xe(gt)); - xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_COMPUTE); return 0; } -static int bcs_default_lrc(struct seq_file *m, void *data) +static int bcs_default_lrc(struct xe_gt *gt, struct drm_printer *p) { - struct drm_printer p = drm_seq_file_printer(m); + xe_pm_runtime_get(gt_to_xe(gt)); + xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_COPY); + xe_pm_runtime_put(gt_to_xe(gt)); - xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_COPY); return 0; } -static int vcs_default_lrc(struct seq_file *m, void *data) +static int vcs_default_lrc(struct xe_gt *gt, struct drm_printer *p) { - struct drm_printer p = drm_seq_file_printer(m); + xe_pm_runtime_get(gt_to_xe(gt)); + xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_VIDEO_DECODE); + xe_pm_runtime_put(gt_to_xe(gt)); - xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_VIDEO_DECODE); return 0; } -static int vecs_default_lrc(struct seq_file *m, void *data) +static int vecs_default_lrc(struct xe_gt *gt, struct drm_printer *p) { - struct drm_printer p = drm_seq_file_printer(m); + xe_pm_runtime_get(gt_to_xe(gt)); + xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_VIDEO_ENHANCE); + xe_pm_runtime_put(gt_to_xe(gt)); - xe_lrc_dump_default(&p, node_to_gt(m->private), XE_ENGINE_CLASS_VIDEO_ENHANCE); return 0; } static const struct drm_info_list debugfs_list[] = { - {"hw_engines", hw_engines, 0}, - {"force_reset", force_reset, 0}, - {"sa_info", sa_info, 0}, - {"topology", topology, 0}, - {"steering", steering, 0}, - {"ggtt", ggtt, 0}, - {"register-save-restore", register_save_restore, 0}, - {"workarounds", workarounds, 0}, - {"pat", pat, 0}, - {"default_lrc_rcs", rcs_default_lrc}, - {"default_lrc_ccs", ccs_default_lrc}, - {"default_lrc_bcs", bcs_default_lrc}, - {"default_lrc_vcs", vcs_default_lrc}, - {"default_lrc_vecs", vecs_default_lrc}, + {"hw_engines", .show = xe_gt_debugfs_simple_show, .data = hw_engines}, + {"force_reset", .show = xe_gt_debugfs_simple_show, .data = force_reset}, + {"sa_info", .show = xe_gt_debugfs_simple_show, .data = sa_info}, + {"topology", .show = xe_gt_debugfs_simple_show, .data = topology}, + {"steering", .show = xe_gt_debugfs_simple_show, .data = steering}, + {"ggtt", .show = xe_gt_debugfs_simple_show, .data = ggtt}, + {"register-save-restore", .show = xe_gt_debugfs_simple_show, .data = register_save_restore}, + {"workarounds", .show = xe_gt_debugfs_simple_show, .data = workarounds}, + {"pat", .show = xe_gt_debugfs_simple_show, .data = pat}, + {"default_lrc_rcs", .show = xe_gt_debugfs_simple_show, .data = rcs_default_lrc}, + {"default_lrc_ccs", .show = xe_gt_debugfs_simple_show, .data = ccs_default_lrc}, + {"default_lrc_bcs", .show = xe_gt_debugfs_simple_show, .data = bcs_default_lrc}, + {"default_lrc_vcs", .show = xe_gt_debugfs_simple_show, .data = vcs_default_lrc}, + {"default_lrc_vecs", .show = xe_gt_debugfs_simple_show, .data = vecs_default_lrc}, }; void xe_gt_debugfs_register(struct xe_gt *gt) @@ -212,13 +267,11 @@ void xe_gt_debugfs_register(struct xe_gt *gt) struct xe_device *xe = gt_to_xe(gt); struct drm_minor *minor = gt_to_xe(gt)->drm.primary; struct dentry *root; - struct drm_info_list *local; char name[8]; - int i; xe_gt_assert(gt, minor->debugfs_root); - sprintf(name, "gt%d", gt->info.id); + snprintf(name, sizeof(name), "gt%d", gt->info.id); root = debugfs_create_dir(name, minor->debugfs_root); if (IS_ERR(root)) { drm_warn(&xe->drm, "Create GT directory failed"); @@ -226,22 +279,13 @@ void xe_gt_debugfs_register(struct xe_gt *gt) } /* - * Allocate local copy as we need to pass in the GT to the debugfs - * entry and drm_debugfs_create_files just references the drm_info_list - * passed in (e.g. can't define this on the stack). + * Store the xe_gt pointer as private data of the gt/ directory node + * so other GT specific attributes under that directory may refer to + * it by looking at its parent node private data. */ -#define DEBUGFS_SIZE (ARRAY_SIZE(debugfs_list) * sizeof(struct drm_info_list)) - local = drmm_kmalloc(&xe->drm, DEBUGFS_SIZE, GFP_KERNEL); - if (!local) - return; - - memcpy(local, debugfs_list, DEBUGFS_SIZE); -#undef DEBUGFS_SIZE - - for (i = 0; i < ARRAY_SIZE(debugfs_list); ++i) - local[i].data = gt; + root->d_inode->i_private = gt; - drm_debugfs_create_files(local, + drm_debugfs_create_files(debugfs_list, ARRAY_SIZE(debugfs_list), root, minor); diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.h b/drivers/gpu/drm/xe/xe_gt_debugfs.h index 5a329f118a..05a6cc93c7 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.h +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.h @@ -6,8 +6,10 @@ #ifndef _XE_GT_DEBUGFS_H_ #define _XE_GT_DEBUGFS_H_ +struct seq_file; struct xe_gt; void xe_gt_debugfs_register(struct xe_gt *gt); +int xe_gt_debugfs_simple_show(struct seq_file *m, void *data); #endif diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c index e5b0f4ecdb..855de40e40 100644 --- a/drivers/gpu/drm/xe/xe_gt_freq.c +++ b/drivers/gpu/drm/xe/xe_gt_freq.c @@ -15,6 +15,7 @@ #include "xe_gt_sysfs.h" #include "xe_gt_throttle_sysfs.h" #include "xe_guc_pc.h" +#include "xe_pm.h" /** * DOC: Xe GT Frequency Management @@ -49,12 +50,23 @@ dev_to_pc(struct device *dev) return &kobj_to_gt(dev->kobj.parent)->uc.guc.pc; } +static struct xe_device * +dev_to_xe(struct device *dev) +{ + return gt_to_xe(kobj_to_gt(dev->kobj.parent)); +} + static ssize_t act_freq_show(struct device *dev, struct device_attribute *attr, char *buf) { struct xe_guc_pc *pc = dev_to_pc(dev); + u32 freq; - return sysfs_emit(buf, "%d\n", xe_guc_pc_get_act_freq(pc)); + xe_pm_runtime_get(dev_to_xe(dev)); + freq = xe_guc_pc_get_act_freq(pc); + xe_pm_runtime_put(dev_to_xe(dev)); + + return sysfs_emit(buf, "%d\n", freq); } static DEVICE_ATTR_RO(act_freq); @@ -65,7 +77,9 @@ static ssize_t cur_freq_show(struct device *dev, u32 freq; ssize_t ret; + xe_pm_runtime_get(dev_to_xe(dev)); ret = xe_guc_pc_get_cur_freq(pc, &freq); + xe_pm_runtime_put(dev_to_xe(dev)); if (ret) return ret; @@ -77,8 +91,13 @@ static ssize_t rp0_freq_show(struct device *dev, struct device_attribute *attr, char *buf) { struct xe_guc_pc *pc = dev_to_pc(dev); + u32 freq; - return sysfs_emit(buf, "%d\n", xe_guc_pc_get_rp0_freq(pc)); + xe_pm_runtime_get(dev_to_xe(dev)); + freq = xe_guc_pc_get_rp0_freq(pc); + xe_pm_runtime_put(dev_to_xe(dev)); + + return sysfs_emit(buf, "%d\n", freq); } static DEVICE_ATTR_RO(rp0_freq); @@ -86,8 +105,13 @@ static ssize_t rpe_freq_show(struct device *dev, struct device_attribute *attr, char *buf) { struct xe_guc_pc *pc = dev_to_pc(dev); + u32 freq; + + xe_pm_runtime_get(dev_to_xe(dev)); + freq = xe_guc_pc_get_rpe_freq(pc); + xe_pm_runtime_put(dev_to_xe(dev)); - return sysfs_emit(buf, "%d\n", xe_guc_pc_get_rpe_freq(pc)); + return sysfs_emit(buf, "%d\n", freq); } static DEVICE_ATTR_RO(rpe_freq); @@ -107,7 +131,9 @@ static ssize_t min_freq_show(struct device *dev, u32 freq; ssize_t ret; + xe_pm_runtime_get(dev_to_xe(dev)); ret = xe_guc_pc_get_min_freq(pc, &freq); + xe_pm_runtime_put(dev_to_xe(dev)); if (ret) return ret; @@ -125,7 +151,9 @@ static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr, if (ret) return ret; + xe_pm_runtime_get(dev_to_xe(dev)); ret = xe_guc_pc_set_min_freq(pc, freq); + xe_pm_runtime_put(dev_to_xe(dev)); if (ret) return ret; @@ -140,7 +168,9 @@ static ssize_t max_freq_show(struct device *dev, u32 freq; ssize_t ret; + xe_pm_runtime_get(dev_to_xe(dev)); ret = xe_guc_pc_get_max_freq(pc, &freq); + xe_pm_runtime_put(dev_to_xe(dev)); if (ret) return ret; @@ -158,7 +188,9 @@ static ssize_t max_freq_store(struct device *dev, struct device_attribute *attr, if (ret) return ret; + xe_pm_runtime_get(dev_to_xe(dev)); ret = xe_guc_pc_set_max_freq(pc, freq); + xe_pm_runtime_put(dev_to_xe(dev)); if (ret) return ret; @@ -190,33 +222,28 @@ static void freq_fini(struct drm_device *drm, void *arg) * @gt: Xe GT object * * It needs to be initialized after GT Sysfs and GuC PC components are ready. + * + * Returns: Returns error value for failure and 0 for success. */ -void xe_gt_freq_init(struct xe_gt *gt) +int xe_gt_freq_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); int err; if (xe->info.skip_guc_pc) - return; + return 0; gt->freq = kobject_create_and_add("freq0", gt->sysfs); - if (!gt->freq) { - drm_warn(&xe->drm, "failed to add freq0 directory to %s\n", - kobject_name(gt->sysfs)); - return; - } + if (!gt->freq) + return -ENOMEM; err = drmm_add_action_or_reset(&xe->drm, freq_fini, gt->freq); - if (err) { - drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n", - __func__, err); - return; - } + if (err) + return err; err = sysfs_create_files(gt->freq, freq_attrs); if (err) - drm_warn(&xe->drm, "failed to add freq attrs to %s, err: %d\n", - kobject_name(gt->freq), err); + return err; - xe_gt_throttle_sysfs_init(gt); + return xe_gt_throttle_sysfs_init(gt); } diff --git a/drivers/gpu/drm/xe/xe_gt_freq.h b/drivers/gpu/drm/xe/xe_gt_freq.h index f3fe3c9049..b7fddbe7b9 100644 --- a/drivers/gpu/drm/xe/xe_gt_freq.h +++ b/drivers/gpu/drm/xe/xe_gt_freq.h @@ -8,6 +8,6 @@ struct xe_gt; -void xe_gt_freq_init(struct xe_gt *gt); +int xe_gt_freq_init(struct xe_gt *gt); #endif diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index e7a39ad7ad..944770fb2d 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -12,6 +12,7 @@ #include "xe_guc_pc.h" #include "regs/xe_gt_regs.h" #include "xe_mmio.h" +#include "xe_pm.h" /** * DOC: Xe GT Idle @@ -40,6 +41,15 @@ static struct xe_guc_pc *gtidle_to_pc(struct xe_gt_idle *gtidle) return >idle_to_gt(gtidle)->uc.guc.pc; } +static struct xe_device * +pc_to_xe(struct xe_guc_pc *pc) +{ + struct xe_guc *guc = container_of(pc, struct xe_guc, pc); + struct xe_gt *gt = container_of(guc, struct xe_gt, uc.guc); + + return gt_to_xe(gt); +} + static const char *gt_idle_state_to_string(enum xe_gt_idle_state state) { switch (state) { @@ -86,8 +96,14 @@ static ssize_t name_show(struct device *dev, struct device_attribute *attr, char *buff) { struct xe_gt_idle *gtidle = dev_to_gtidle(dev); + struct xe_guc_pc *pc = gtidle_to_pc(gtidle); + ssize_t ret; + + xe_pm_runtime_get(pc_to_xe(pc)); + ret = sysfs_emit(buff, "%s\n", gtidle->name); + xe_pm_runtime_put(pc_to_xe(pc)); - return sysfs_emit(buff, "%s\n", gtidle->name); + return ret; } static DEVICE_ATTR_RO(name); @@ -98,7 +114,9 @@ static ssize_t idle_status_show(struct device *dev, struct xe_guc_pc *pc = gtidle_to_pc(gtidle); enum xe_gt_idle_state state; + xe_pm_runtime_get(pc_to_xe(pc)); state = gtidle->idle_status(pc); + xe_pm_runtime_put(pc_to_xe(pc)); return sysfs_emit(buff, "%s\n", gt_idle_state_to_string(state)); } @@ -111,7 +129,10 @@ static ssize_t idle_residency_ms_show(struct device *dev, struct xe_guc_pc *pc = gtidle_to_pc(gtidle); u64 residency; + xe_pm_runtime_get(pc_to_xe(pc)); residency = gtidle->idle_residency(pc); + xe_pm_runtime_put(pc_to_xe(pc)); + return sysfs_emit(buff, "%llu\n", get_residency_ms(gtidle, residency)); } static DEVICE_ATTR_RO(idle_residency_ms); @@ -138,7 +159,7 @@ static void gt_idle_sysfs_fini(struct drm_device *drm, void *arg) kobject_put(kobj); } -void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle) +int xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle) { struct xe_gt *gt = gtidle_to_gt(gtidle); struct xe_device *xe = gt_to_xe(gt); @@ -146,16 +167,14 @@ void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle) int err; kobj = kobject_create_and_add("gtidle", gt->sysfs); - if (!kobj) { - drm_warn(&xe->drm, "%s failed, err: %d\n", __func__, -ENOMEM); - return; - } + if (!kobj) + return -ENOMEM; if (xe_gt_is_media_type(gt)) { - sprintf(gtidle->name, "gt%d-mc", gt->info.id); + snprintf(gtidle->name, sizeof(gtidle->name), "gt%d-mc", gt->info.id); gtidle->idle_residency = xe_guc_pc_mc6_residency; } else { - sprintf(gtidle->name, "gt%d-rc", gt->info.id); + snprintf(gtidle->name, sizeof(gtidle->name), "gt%d-rc", gt->info.id); gtidle->idle_residency = xe_guc_pc_rc6_residency; } @@ -166,14 +185,10 @@ void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle) err = sysfs_create_files(kobj, gt_idle_attrs); if (err) { kobject_put(kobj); - drm_warn(&xe->drm, "failed to register gtidle sysfs, err: %d\n", err); - return; + return err; } - err = drmm_add_action_or_reset(&xe->drm, gt_idle_sysfs_fini, kobj); - if (err) - drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n", - __func__, err); + return drmm_add_action_or_reset(&xe->drm, gt_idle_sysfs_fini, kobj); } void xe_gt_idle_enable_c6(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_gt_idle.h b/drivers/gpu/drm/xe/xe_gt_idle.h index 69280fd16b..75bd99659b 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.h +++ b/drivers/gpu/drm/xe/xe_gt_idle.h @@ -10,7 +10,7 @@ struct xe_gt; -void xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle); +int xe_gt_idle_sysfs_init(struct xe_gt_idle *gtidle); void xe_gt_idle_enable_c6(struct xe_gt *gt); void xe_gt_idle_disable_c6(struct xe_gt *gt); diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index c78fbb9bc5..0443e07880 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -6,6 +6,7 @@ #include "xe_gt_mcr.h" #include "regs/xe_gt_regs.h" +#include "xe_assert.h" #include "xe_gt.h" #include "xe_gt_topology.h" #include "xe_gt_types.h" @@ -294,14 +295,40 @@ static void init_steering_mslice(struct xe_gt *gt) gt->steering[LNCF].instance_target = 0; /* unused */ } -static void init_steering_dss(struct xe_gt *gt) +static unsigned int dss_per_group(struct xe_gt *gt) { - unsigned int dss = min(xe_dss_mask_group_ffs(gt->fuse_topo.g_dss_mask, 0, 0), - xe_dss_mask_group_ffs(gt->fuse_topo.c_dss_mask, 0, 0)); - unsigned int dss_per_grp = gt_to_xe(gt)->info.platform == XE_PVC ? 8 : 4; + if (gt_to_xe(gt)->info.platform == XE_PVC) + return 8; + else if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250) + return 4; + else + return 6; +} - gt->steering[DSS].group_target = dss / dss_per_grp; - gt->steering[DSS].instance_target = dss % dss_per_grp; +/** + * xe_gt_mcr_get_dss_steering - Get the group/instance steering for a DSS + * @gt: GT structure + * @dss: DSS ID to obtain steering for + * @group: pointer to storage for steering group ID + * @instance: pointer to storage for steering instance ID + */ +void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance) +{ + int dss_per_grp = dss_per_group(gt); + + xe_gt_assert(gt, dss < XE_MAX_DSS_FUSE_BITS); + + *group = dss / dss_per_grp; + *instance = dss % dss_per_grp; +} + +static void init_steering_dss(struct xe_gt *gt) +{ + xe_gt_mcr_get_dss_steering(gt, + min(xe_dss_mask_group_ffs(gt->fuse_topo.g_dss_mask, 0, 0), + xe_dss_mask_group_ffs(gt->fuse_topo.c_dss_mask, 0, 0)), + >->steering[DSS].group_target, + >->steering[DSS].instance_target); } static void init_steering_oaddrm(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h index 27ca1bc880..a7f4ab1aa5 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.h +++ b/drivers/gpu/drm/xe/xe_gt_mcr.h @@ -7,6 +7,7 @@ #define _XE_GT_MCR_H_ #include "regs/xe_reg_defs.h" +#include "xe_gt_topology.h" struct drm_printer; struct xe_gt; @@ -25,5 +26,18 @@ void xe_gt_mcr_multicast_write(struct xe_gt *gt, struct xe_reg_mcr mcr_reg, u32 value); void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p); +void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance); + +/* + * Loop over each DSS and determine the group and instance IDs that + * should be used to steer MCR accesses toward this DSS. + * @dss: DSS ID to obtain steering for + * @gt: GT structure + * @group: steering group ID, data type: u16 + * @instance: steering instance ID, data type: u16 + */ +#define for_each_dss_steering(dss, gt, group, instance) \ + for_each_dss((dss), (gt)) \ + for_each_if((xe_gt_mcr_get_dss_steering((gt), (dss), &(group), &(instance)), true)) #endif /* _XE_GT_MCR_H_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c new file mode 100644 index 0000000000..791dcdd767 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#include <drm/drm_managed.h> + +#include "xe_gt_sriov_pf.h" +#include "xe_gt_sriov_pf_helpers.h" + +/* + * VF's metadata is maintained in the flexible array where: + * - entry [0] contains metadata for the PF (only if applicable), + * - entries [1..n] contain metadata for VF1..VFn:: + * + * <--------------------------- 1 + total_vfs -----------> + * +-------+-------+-------+-----------------------+-------+ + * | 0 | 1 | 2 | | n | + * +-------+-------+-------+-----------------------+-------+ + * | PF | VF1 | VF2 | ... ... | VFn | + * +-------+-------+-------+-----------------------+-------+ + */ +static int pf_alloc_metadata(struct xe_gt *gt) +{ + unsigned int num_vfs = xe_gt_sriov_pf_get_totalvfs(gt); + + gt->sriov.pf.vfs = drmm_kcalloc(>_to_xe(gt)->drm, 1 + num_vfs, + sizeof(*gt->sriov.pf.vfs), GFP_KERNEL); + if (!gt->sriov.pf.vfs) + return -ENOMEM; + + return 0; +} + +/** + * xe_gt_sriov_pf_init_early - Prepare SR-IOV PF data structures on PF. + * @gt: the &xe_gt to initialize + * + * Early initialization of the PF data. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_init_early(struct xe_gt *gt) +{ + int err; + + err = pf_alloc_metadata(gt); + if (err) + return err; + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h new file mode 100644 index 0000000000..05142ffc43 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_GT_SRIOV_PF_H_ +#define _XE_GT_SRIOV_PF_H_ + +struct xe_gt; + +#ifdef CONFIG_PCI_IOV +int xe_gt_sriov_pf_init_early(struct xe_gt *gt); +#else +static inline int xe_gt_sriov_pf_init_early(struct xe_gt *gt) +{ + return 0; +} +#endif + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c new file mode 100644 index 0000000000..4f40fe24c6 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -0,0 +1,1990 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#include <linux/string_choices.h> +#include <linux/wordpart.h> + +#include "abi/guc_actions_sriov_abi.h" +#include "abi/guc_klvs_abi.h" + +#include "regs/xe_guc_regs.h" + +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_ggtt.h" +#include "xe_gt.h" +#include "xe_gt_sriov_pf_config.h" +#include "xe_gt_sriov_pf_helpers.h" +#include "xe_gt_sriov_pf_policy.h" +#include "xe_gt_sriov_printk.h" +#include "xe_guc.h" +#include "xe_guc_ct.h" +#include "xe_guc_db_mgr.h" +#include "xe_guc_fwif.h" +#include "xe_guc_id_mgr.h" +#include "xe_guc_klv_helpers.h" +#include "xe_guc_submit.h" +#include "xe_lmtt.h" +#include "xe_map.h" +#include "xe_sriov.h" +#include "xe_ttm_vram_mgr.h" +#include "xe_wopcm.h" + +/* + * Return: number of KLVs that were successfully parsed and saved, + * negative error code on failure. + */ +static int guc_action_update_vf_cfg(struct xe_guc *guc, u32 vfid, + u64 addr, u32 size) +{ + u32 request[] = { + GUC_ACTION_PF2GUC_UPDATE_VF_CFG, + vfid, + lower_32_bits(addr), + upper_32_bits(addr), + size, + }; + + return xe_guc_ct_send_block(&guc->ct, request, ARRAY_SIZE(request)); +} + +/* + * Return: 0 on success, negative error code on failure. + */ +static int pf_send_vf_cfg_reset(struct xe_gt *gt, u32 vfid) +{ + struct xe_guc *guc = >->uc.guc; + int ret; + + ret = guc_action_update_vf_cfg(guc, vfid, 0, 0); + + return ret <= 0 ? ret : -EPROTO; +} + +/* + * Return: number of KLVs that were successfully parsed and saved, + * negative error code on failure. + */ +static int pf_send_vf_cfg_klvs(struct xe_gt *gt, u32 vfid, const u32 *klvs, u32 num_dwords) +{ + const u32 bytes = num_dwords * sizeof(u32); + struct xe_tile *tile = gt_to_tile(gt); + struct xe_device *xe = tile_to_xe(tile); + struct xe_guc *guc = >->uc.guc; + struct xe_bo *bo; + int ret; + + bo = xe_bo_create_pin_map(xe, tile, NULL, + ALIGN(bytes, PAGE_SIZE), + ttm_bo_type_kernel, + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + xe_map_memcpy_to(xe, &bo->vmap, 0, klvs, bytes); + + ret = guc_action_update_vf_cfg(guc, vfid, xe_bo_ggtt_addr(bo), num_dwords); + + xe_bo_unpin_map_no_vm(bo); + + return ret; +} + +/* + * Return: 0 on success, -ENOKEY if some KLVs were not updated, -EPROTO if reply was malformed, + * negative error code on failure. + */ +static int pf_push_vf_cfg_klvs(struct xe_gt *gt, unsigned int vfid, u32 num_klvs, + const u32 *klvs, u32 num_dwords) +{ + int ret; + + xe_gt_assert(gt, num_klvs == xe_guc_klv_count(klvs, num_dwords)); + + ret = pf_send_vf_cfg_klvs(gt, vfid, klvs, num_dwords); + + if (ret != num_klvs) { + int err = ret < 0 ? ret : ret < num_klvs ? -ENOKEY : -EPROTO; + struct drm_printer p = xe_gt_info_printer(gt); + char name[8]; + + xe_gt_sriov_notice(gt, "Failed to push %s %u config KLV%s (%pe)\n", + xe_sriov_function_name(vfid, name, sizeof(name)), + num_klvs, str_plural(num_klvs), ERR_PTR(err)); + xe_guc_klv_print(klvs, num_dwords, &p); + return err; + } + + if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) { + struct drm_printer p = xe_gt_info_printer(gt); + + xe_guc_klv_print(klvs, num_dwords, &p); + } + + return 0; +} + +static int pf_push_vf_cfg_u32(struct xe_gt *gt, unsigned int vfid, u16 key, u32 value) +{ + u32 klv[] = { + FIELD_PREP(GUC_KLV_0_KEY, key) | FIELD_PREP(GUC_KLV_0_LEN, 1), + value, + }; + + return pf_push_vf_cfg_klvs(gt, vfid, 1, klv, ARRAY_SIZE(klv)); +} + +static int pf_push_vf_cfg_u64(struct xe_gt *gt, unsigned int vfid, u16 key, u64 value) +{ + u32 klv[] = { + FIELD_PREP(GUC_KLV_0_KEY, key) | FIELD_PREP(GUC_KLV_0_LEN, 2), + lower_32_bits(value), + upper_32_bits(value), + }; + + return pf_push_vf_cfg_klvs(gt, vfid, 1, klv, ARRAY_SIZE(klv)); +} + +static int pf_push_vf_cfg_ggtt(struct xe_gt *gt, unsigned int vfid, u64 start, u64 size) +{ + u32 klvs[] = { + PREP_GUC_KLV_TAG(VF_CFG_GGTT_START), + lower_32_bits(start), + upper_32_bits(start), + PREP_GUC_KLV_TAG(VF_CFG_GGTT_SIZE), + lower_32_bits(size), + upper_32_bits(size), + }; + + return pf_push_vf_cfg_klvs(gt, vfid, 2, klvs, ARRAY_SIZE(klvs)); +} + +static int pf_push_vf_cfg_ctxs(struct xe_gt *gt, unsigned int vfid, u32 begin, u32 num) +{ + u32 klvs[] = { + PREP_GUC_KLV_TAG(VF_CFG_BEGIN_CONTEXT_ID), + begin, + PREP_GUC_KLV_TAG(VF_CFG_NUM_CONTEXTS), + num, + }; + + return pf_push_vf_cfg_klvs(gt, vfid, 2, klvs, ARRAY_SIZE(klvs)); +} + +static int pf_push_vf_cfg_dbs(struct xe_gt *gt, unsigned int vfid, u32 begin, u32 num) +{ + u32 klvs[] = { + PREP_GUC_KLV_TAG(VF_CFG_BEGIN_DOORBELL_ID), + begin, + PREP_GUC_KLV_TAG(VF_CFG_NUM_DOORBELLS), + num, + }; + + return pf_push_vf_cfg_klvs(gt, vfid, 2, klvs, ARRAY_SIZE(klvs)); +} + +static int pf_push_vf_cfg_exec_quantum(struct xe_gt *gt, unsigned int vfid, u32 exec_quantum) +{ + return pf_push_vf_cfg_u32(gt, vfid, GUC_KLV_VF_CFG_EXEC_QUANTUM_KEY, exec_quantum); +} + +static int pf_push_vf_cfg_preempt_timeout(struct xe_gt *gt, unsigned int vfid, u32 preempt_timeout) +{ + return pf_push_vf_cfg_u32(gt, vfid, GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY, preempt_timeout); +} + +static int pf_push_vf_cfg_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) +{ + return pf_push_vf_cfg_u64(gt, vfid, GUC_KLV_VF_CFG_LMEM_SIZE_KEY, size); +} + +static struct xe_gt_sriov_config *pf_pick_vf_config(struct xe_gt *gt, unsigned int vfid) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + return >->sriov.pf.vfs[vfid].config; +} + +/* Return: number of configuration dwords written */ +static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config) +{ + u32 n = 0; + + if (drm_mm_node_allocated(&config->ggtt_region)) { + cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_START); + cfg[n++] = lower_32_bits(config->ggtt_region.start); + cfg[n++] = upper_32_bits(config->ggtt_region.start); + + cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_SIZE); + cfg[n++] = lower_32_bits(config->ggtt_region.size); + cfg[n++] = upper_32_bits(config->ggtt_region.size); + } + + return n; +} + +/* Return: number of configuration dwords written */ +static u32 encode_config(u32 *cfg, const struct xe_gt_sriov_config *config) +{ + u32 n = 0; + + n += encode_config_ggtt(cfg, config); + + cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_BEGIN_CONTEXT_ID); + cfg[n++] = config->begin_ctx; + + cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_NUM_CONTEXTS); + cfg[n++] = config->num_ctxs; + + cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_BEGIN_DOORBELL_ID); + cfg[n++] = config->begin_db; + + cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_NUM_DOORBELLS); + cfg[n++] = config->num_dbs; + + if (config->lmem_obj) { + cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_LMEM_SIZE); + cfg[n++] = lower_32_bits(config->lmem_obj->size); + cfg[n++] = upper_32_bits(config->lmem_obj->size); + } + + cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_EXEC_QUANTUM); + cfg[n++] = config->exec_quantum; + + cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_PREEMPT_TIMEOUT); + cfg[n++] = config->preempt_timeout; + + return n; +} + +static int pf_push_full_vf_config(struct xe_gt *gt, unsigned int vfid) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + u32 max_cfg_dwords = SZ_4K / sizeof(u32); + u32 num_dwords; + int num_klvs; + u32 *cfg; + int err; + + cfg = kcalloc(max_cfg_dwords, sizeof(u32), GFP_KERNEL); + if (!cfg) + return -ENOMEM; + + num_dwords = encode_config(cfg, config); + xe_gt_assert(gt, num_dwords <= max_cfg_dwords); + + if (xe_gt_is_media_type(gt)) { + struct xe_gt *primary = gt->tile->primary_gt; + struct xe_gt_sriov_config *other = pf_pick_vf_config(primary, vfid); + + /* media-GT will never include a GGTT config */ + xe_gt_assert(gt, !encode_config_ggtt(cfg + num_dwords, config)); + + /* the GGTT config must be taken from the primary-GT instead */ + num_dwords += encode_config_ggtt(cfg + num_dwords, other); + } + xe_gt_assert(gt, num_dwords <= max_cfg_dwords); + + num_klvs = xe_guc_klv_count(cfg, num_dwords); + err = pf_push_vf_cfg_klvs(gt, vfid, num_klvs, cfg, num_dwords); + + kfree(cfg); + return err; +} + +static u64 pf_get_ggtt_alignment(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + return IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K; +} + +static u64 pf_get_min_spare_ggtt(struct xe_gt *gt) +{ + /* XXX: preliminary */ + return IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV) ? + pf_get_ggtt_alignment(gt) : SZ_64M; +} + +static u64 pf_get_spare_ggtt(struct xe_gt *gt) +{ + u64 spare; + + xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + spare = gt->sriov.pf.spare.ggtt_size; + spare = max_t(u64, spare, pf_get_min_spare_ggtt(gt)); + + return spare; +} + +static int pf_set_spare_ggtt(struct xe_gt *gt, u64 size) +{ + xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + if (size && size < pf_get_min_spare_ggtt(gt)) + return -EINVAL; + + size = round_up(size, pf_get_ggtt_alignment(gt)); + gt->sriov.pf.spare.ggtt_size = size; + + return 0; +} + +static int pf_distribute_config_ggtt(struct xe_tile *tile, unsigned int vfid, u64 start, u64 size) +{ + int err, err2 = 0; + + err = pf_push_vf_cfg_ggtt(tile->primary_gt, vfid, start, size); + + if (tile->media_gt && !err) + err2 = pf_push_vf_cfg_ggtt(tile->media_gt, vfid, start, size); + + return err ?: err2; +} + +static void pf_release_ggtt(struct xe_tile *tile, struct drm_mm_node *node) +{ + struct xe_ggtt *ggtt = tile->mem.ggtt; + + if (drm_mm_node_allocated(node)) { + /* + * explicit GGTT PTE assignment to the PF using xe_ggtt_assign() + * is redundant, as PTE will be implicitly re-assigned to PF by + * the xe_ggtt_clear() called by below xe_ggtt_remove_node(). + */ + xe_ggtt_remove_node(ggtt, node, false); + } +} + +static void pf_release_vf_config_ggtt(struct xe_gt *gt, struct xe_gt_sriov_config *config) +{ + pf_release_ggtt(gt_to_tile(gt), &config->ggtt_region); +} + +static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + struct drm_mm_node *node = &config->ggtt_region; + struct xe_tile *tile = gt_to_tile(gt); + struct xe_ggtt *ggtt = tile->mem.ggtt; + u64 alignment = pf_get_ggtt_alignment(gt); + int err; + + xe_gt_assert(gt, vfid); + xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + + size = round_up(size, alignment); + + if (drm_mm_node_allocated(node)) { + err = pf_distribute_config_ggtt(tile, vfid, 0, 0); + if (unlikely(err)) + return err; + + pf_release_ggtt(tile, node); + } + xe_gt_assert(gt, !drm_mm_node_allocated(node)); + + if (!size) + return 0; + + err = xe_ggtt_insert_special_node(ggtt, node, size, alignment); + if (unlikely(err)) + return err; + + xe_ggtt_assign(ggtt, node, vfid); + xe_gt_sriov_dbg_verbose(gt, "VF%u assigned GGTT %llx-%llx\n", + vfid, node->start, node->start + node->size - 1); + + err = pf_distribute_config_ggtt(gt->tile, vfid, node->start, node->size); + if (unlikely(err)) + return err; + + return 0; +} + +static u64 pf_get_vf_config_ggtt(struct xe_gt *gt, unsigned int vfid) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + struct drm_mm_node *node = &config->ggtt_region; + + xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + return drm_mm_node_allocated(node) ? node->size : 0; +} + +/** + * xe_gt_sriov_pf_config_get_ggtt - Query size of GGTT address space of the VF. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function can only be called on PF. + * + * Return: size of the VF's assigned (or PF's spare) GGTT address space. + */ +u64 xe_gt_sriov_pf_config_get_ggtt(struct xe_gt *gt, unsigned int vfid) +{ + u64 size; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + if (vfid) + size = pf_get_vf_config_ggtt(gt_to_tile(gt)->primary_gt, vfid); + else + size = pf_get_spare_ggtt(gt_to_tile(gt)->primary_gt); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return size; +} + +static int pf_config_set_u64_done(struct xe_gt *gt, unsigned int vfid, u64 value, + u64 actual, const char *what, int err) +{ + char size[10]; + char name[8]; + + xe_sriov_function_name(vfid, name, sizeof(name)); + + if (unlikely(err)) { + string_get_size(value, 1, STRING_UNITS_2, size, sizeof(size)); + xe_gt_sriov_notice(gt, "Failed to provision %s with %llu (%s) %s (%pe)\n", + name, value, size, what, ERR_PTR(err)); + string_get_size(actual, 1, STRING_UNITS_2, size, sizeof(size)); + xe_gt_sriov_info(gt, "%s provisioning remains at %llu (%s) %s\n", + name, actual, size, what); + return err; + } + + /* the actual value may have changed during provisioning */ + string_get_size(actual, 1, STRING_UNITS_2, size, sizeof(size)); + xe_gt_sriov_info(gt, "%s provisioned with %llu (%s) %s\n", + name, actual, size, what); + return 0; +} + +/** + * xe_gt_sriov_pf_config_set_ggtt - Provision VF with GGTT space. + * @gt: the &xe_gt (can't be media) + * @vfid: the VF identifier + * @size: requested GGTT size + * + * If &vfid represents PF, then function will change PF's spare GGTT config. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_set_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) +{ + int err; + + xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + if (vfid) + err = pf_provision_vf_ggtt(gt, vfid, size); + else + err = pf_set_spare_ggtt(gt, size); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_config_set_u64_done(gt, vfid, size, + xe_gt_sriov_pf_config_get_ggtt(gt, vfid), + vfid ? "GGTT" : "spare GGTT", err); +} + +static int pf_config_bulk_set_u64_done(struct xe_gt *gt, unsigned int first, unsigned int num_vfs, + u64 value, u64 (*get)(struct xe_gt*, unsigned int), + const char *what, unsigned int last, int err) +{ + char size[10]; + + xe_gt_assert(gt, first); + xe_gt_assert(gt, num_vfs); + xe_gt_assert(gt, first <= last); + + if (num_vfs == 1) + return pf_config_set_u64_done(gt, first, value, get(gt, first), what, err); + + if (unlikely(err)) { + xe_gt_sriov_notice(gt, "Failed to bulk provision VF%u..VF%u with %s\n", + first, first + num_vfs - 1, what); + if (last > first) + pf_config_bulk_set_u64_done(gt, first, last - first, value, + get, what, last, 0); + return pf_config_set_u64_done(gt, last, value, get(gt, last), what, err); + } + + /* pick actual value from first VF - bulk provisioning shall be equal across all VFs */ + value = get(gt, first); + string_get_size(value, 1, STRING_UNITS_2, size, sizeof(size)); + xe_gt_sriov_info(gt, "VF%u..VF%u provisioned with %llu (%s) %s\n", + first, first + num_vfs - 1, value, size, what); + return 0; +} + +/** + * xe_gt_sriov_pf_config_bulk_set_ggtt - Provision many VFs with GGTT. + * @gt: the &xe_gt (can't be media) + * @vfid: starting VF identifier (can't be 0) + * @num_vfs: number of VFs to provision + * @size: requested GGTT size + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_bulk_set_ggtt(struct xe_gt *gt, unsigned int vfid, + unsigned int num_vfs, u64 size) +{ + unsigned int n; + int err = 0; + + xe_gt_assert(gt, vfid); + xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + + if (!num_vfs) + return 0; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + for (n = vfid; n < vfid + num_vfs; n++) { + err = pf_provision_vf_ggtt(gt, n, size); + if (err) + break; + } + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_config_bulk_set_u64_done(gt, vfid, num_vfs, size, + xe_gt_sriov_pf_config_get_ggtt, + "GGTT", n, err); +} + +/* Return: size of the largest continuous GGTT region */ +static u64 pf_get_max_ggtt(struct xe_gt *gt) +{ + struct xe_ggtt *ggtt = gt_to_tile(gt)->mem.ggtt; + const struct drm_mm *mm = &ggtt->mm; + const struct drm_mm_node *entry; + u64 alignment = pf_get_ggtt_alignment(gt); + u64 spare = pf_get_spare_ggtt(gt); + u64 hole_min_start = xe_wopcm_size(gt_to_xe(gt)); + u64 hole_start, hole_end, hole_size; + u64 max_hole = 0; + + mutex_lock(&ggtt->lock); + + drm_mm_for_each_hole(entry, mm, hole_start, hole_end) { + hole_start = max(hole_start, hole_min_start); + hole_start = ALIGN(hole_start, alignment); + hole_end = ALIGN_DOWN(hole_end, alignment); + if (hole_start >= hole_end) + continue; + hole_size = hole_end - hole_start; + xe_gt_sriov_dbg_verbose(gt, "HOLE start %llx size %lluK\n", + hole_start, hole_size / SZ_1K); + spare -= min3(spare, hole_size, max_hole); + max_hole = max(max_hole, hole_size); + } + + mutex_unlock(&ggtt->lock); + + xe_gt_sriov_dbg_verbose(gt, "HOLE max %lluK reserved %lluK\n", + max_hole / SZ_1K, spare / SZ_1K); + return max_hole > spare ? max_hole - spare : 0; +} + +static u64 pf_estimate_fair_ggtt(struct xe_gt *gt, unsigned int num_vfs) +{ + u64 available = pf_get_max_ggtt(gt); + u64 alignment = pf_get_ggtt_alignment(gt); + u64 fair; + + /* + * To simplify the logic we only look at single largest GGTT region + * as that will be always the best fit for 1 VF case, and most likely + * will also nicely cover other cases where VFs are provisioned on the + * fresh and idle PF driver, without any stale GGTT allocations spread + * in the middle of the full GGTT range. + */ + + fair = div_u64(available, num_vfs); + fair = ALIGN_DOWN(fair, alignment); + xe_gt_sriov_dbg_verbose(gt, "GGTT available(%lluK) fair(%u x %lluK)\n", + available / SZ_1K, num_vfs, fair / SZ_1K); + return fair; +} + +/** + * xe_gt_sriov_pf_config_set_fair_ggtt - Provision many VFs with fair GGTT. + * @gt: the &xe_gt (can't be media) + * @vfid: starting VF identifier (can't be 0) + * @num_vfs: number of VFs to provision + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_set_fair_ggtt(struct xe_gt *gt, unsigned int vfid, + unsigned int num_vfs) +{ + u64 fair; + + xe_gt_assert(gt, vfid); + xe_gt_assert(gt, num_vfs); + xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + fair = pf_estimate_fair_ggtt(gt, num_vfs); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + if (!fair) + return -ENOSPC; + + return xe_gt_sriov_pf_config_bulk_set_ggtt(gt, vfid, num_vfs, fair); +} + +static u32 pf_get_min_spare_ctxs(struct xe_gt *gt) +{ + /* XXX: preliminary */ + return IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV) ? + hweight64(gt->info.engine_mask) : SZ_256; +} + +static u32 pf_get_spare_ctxs(struct xe_gt *gt) +{ + u32 spare; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + spare = gt->sriov.pf.spare.num_ctxs; + spare = max_t(u32, spare, pf_get_min_spare_ctxs(gt)); + + return spare; +} + +static int pf_set_spare_ctxs(struct xe_gt *gt, u32 spare) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + if (spare > GUC_ID_MAX) + return -EINVAL; + + if (spare && spare < pf_get_min_spare_ctxs(gt)) + return -EINVAL; + + gt->sriov.pf.spare.num_ctxs = spare; + + return 0; +} + +/* Return: start ID or negative error code on failure */ +static int pf_reserve_ctxs(struct xe_gt *gt, u32 num) +{ + struct xe_guc_id_mgr *idm = >->uc.guc.submission_state.idm; + unsigned int spare = pf_get_spare_ctxs(gt); + + return xe_guc_id_mgr_reserve(idm, num, spare); +} + +static void pf_release_ctxs(struct xe_gt *gt, u32 start, u32 num) +{ + struct xe_guc_id_mgr *idm = >->uc.guc.submission_state.idm; + + if (num) + xe_guc_id_mgr_release(idm, start, num); +} + +static void pf_release_config_ctxs(struct xe_gt *gt, struct xe_gt_sriov_config *config) +{ + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + pf_release_ctxs(gt, config->begin_ctx, config->num_ctxs); + config->begin_ctx = 0; + config->num_ctxs = 0; +} + +static int pf_provision_vf_ctxs(struct xe_gt *gt, unsigned int vfid, u32 num_ctxs) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + int ret; + + xe_gt_assert(gt, vfid); + + if (num_ctxs > GUC_ID_MAX) + return -EINVAL; + + if (config->num_ctxs) { + ret = pf_push_vf_cfg_ctxs(gt, vfid, 0, 0); + if (unlikely(ret)) + return ret; + + pf_release_config_ctxs(gt, config); + } + + if (!num_ctxs) + return 0; + + ret = pf_reserve_ctxs(gt, num_ctxs); + if (unlikely(ret < 0)) + return ret; + + config->begin_ctx = ret; + config->num_ctxs = num_ctxs; + + ret = pf_push_vf_cfg_ctxs(gt, vfid, config->begin_ctx, config->num_ctxs); + if (unlikely(ret)) { + pf_release_config_ctxs(gt, config); + return ret; + } + + xe_gt_sriov_dbg_verbose(gt, "VF%u contexts %u-%u\n", + vfid, config->begin_ctx, config->begin_ctx + config->num_ctxs - 1); + return 0; +} + +static u32 pf_get_vf_config_ctxs(struct xe_gt *gt, unsigned int vfid) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + + return config->num_ctxs; +} + +/** + * xe_gt_sriov_pf_config_get_ctxs - Get VF's GuC contexts IDs quota. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function can only be called on PF. + * If &vfid represents a PF then number of PF's spare GuC context IDs is returned. + * + * Return: VF's quota (or PF's spare). + */ +u32 xe_gt_sriov_pf_config_get_ctxs(struct xe_gt *gt, unsigned int vfid) +{ + u32 num_ctxs; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + if (vfid) + num_ctxs = pf_get_vf_config_ctxs(gt, vfid); + else + num_ctxs = pf_get_spare_ctxs(gt); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return num_ctxs; +} + +static const char *no_unit(u32 unused) +{ + return ""; +} + +static const char *spare_unit(u32 unused) +{ + return " spare"; +} + +static int pf_config_set_u32_done(struct xe_gt *gt, unsigned int vfid, u32 value, u32 actual, + const char *what, const char *(*unit)(u32), int err) +{ + char name[8]; + + xe_sriov_function_name(vfid, name, sizeof(name)); + + if (unlikely(err)) { + xe_gt_sriov_notice(gt, "Failed to provision %s with %u%s %s (%pe)\n", + name, value, unit(value), what, ERR_PTR(err)); + xe_gt_sriov_info(gt, "%s provisioning remains at %u%s %s\n", + name, actual, unit(actual), what); + return err; + } + + /* the actual value may have changed during provisioning */ + xe_gt_sriov_info(gt, "%s provisioned with %u%s %s\n", + name, actual, unit(actual), what); + return 0; +} + +/** + * xe_gt_sriov_pf_config_set_ctxs - Configure GuC contexts IDs quota for the VF. + * @gt: the &xe_gt + * @vfid: the VF identifier + * @num_ctxs: requested number of GuC contexts IDs (0 to release) + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_set_ctxs(struct xe_gt *gt, unsigned int vfid, u32 num_ctxs) +{ + int err; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + if (vfid) + err = pf_provision_vf_ctxs(gt, vfid, num_ctxs); + else + err = pf_set_spare_ctxs(gt, num_ctxs); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_config_set_u32_done(gt, vfid, num_ctxs, + xe_gt_sriov_pf_config_get_ctxs(gt, vfid), + "GuC context IDs", vfid ? no_unit : spare_unit, err); +} + +static int pf_config_bulk_set_u32_done(struct xe_gt *gt, unsigned int first, unsigned int num_vfs, + u32 value, u32 (*get)(struct xe_gt*, unsigned int), + const char *what, const char *(*unit)(u32), + unsigned int last, int err) +{ + xe_gt_assert(gt, first); + xe_gt_assert(gt, num_vfs); + xe_gt_assert(gt, first <= last); + + if (num_vfs == 1) + return pf_config_set_u32_done(gt, first, value, get(gt, first), what, unit, err); + + if (unlikely(err)) { + xe_gt_sriov_notice(gt, "Failed to bulk provision VF%u..VF%u with %s\n", + first, first + num_vfs - 1, what); + if (last > first) + pf_config_bulk_set_u32_done(gt, first, last - first, value, + get, what, unit, last, 0); + return pf_config_set_u32_done(gt, last, value, get(gt, last), what, unit, err); + } + + /* pick actual value from first VF - bulk provisioning shall be equal across all VFs */ + value = get(gt, first); + xe_gt_sriov_info(gt, "VF%u..VF%u provisioned with %u%s %s\n", + first, first + num_vfs - 1, value, unit(value), what); + return 0; +} + +/** + * xe_gt_sriov_pf_config_bulk_set_ctxs - Provision many VFs with GuC context IDs. + * @gt: the &xe_gt + * @vfid: starting VF identifier + * @num_vfs: number of VFs to provision + * @num_ctxs: requested number of GuC contexts IDs (0 to release) + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_bulk_set_ctxs(struct xe_gt *gt, unsigned int vfid, + unsigned int num_vfs, u32 num_ctxs) +{ + unsigned int n; + int err = 0; + + xe_gt_assert(gt, vfid); + + if (!num_vfs) + return 0; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + for (n = vfid; n < vfid + num_vfs; n++) { + err = pf_provision_vf_ctxs(gt, n, num_ctxs); + if (err) + break; + } + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_config_bulk_set_u32_done(gt, vfid, num_vfs, num_ctxs, + xe_gt_sriov_pf_config_get_ctxs, + "GuC context IDs", no_unit, n, err); +} + +static u32 pf_estimate_fair_ctxs(struct xe_gt *gt, unsigned int num_vfs) +{ + struct xe_guc_id_mgr *idm = >->uc.guc.submission_state.idm; + u32 spare = pf_get_spare_ctxs(gt); + u32 fair = (idm->total - spare) / num_vfs; + int ret; + + for (; fair; --fair) { + ret = xe_guc_id_mgr_reserve(idm, fair * num_vfs, spare); + if (ret < 0) + continue; + xe_guc_id_mgr_release(idm, ret, fair * num_vfs); + break; + } + + xe_gt_sriov_dbg_verbose(gt, "contexts fair(%u x %u)\n", num_vfs, fair); + return fair; +} + +/** + * xe_gt_sriov_pf_config_set_fair_ctxs - Provision many VFs with fair GuC context IDs. + * @gt: the &xe_gt + * @vfid: starting VF identifier (can't be 0) + * @num_vfs: number of VFs to provision (can't be 0) + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_set_fair_ctxs(struct xe_gt *gt, unsigned int vfid, + unsigned int num_vfs) +{ + u32 fair; + + xe_gt_assert(gt, vfid); + xe_gt_assert(gt, num_vfs); + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + fair = pf_estimate_fair_ctxs(gt, num_vfs); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + if (!fair) + return -ENOSPC; + + return xe_gt_sriov_pf_config_bulk_set_ctxs(gt, vfid, num_vfs, fair); +} + +static u32 pf_get_min_spare_dbs(struct xe_gt *gt) +{ + /* XXX: preliminary, we don't use doorbells yet! */ + return IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV) ? 1 : 0; +} + +static u32 pf_get_spare_dbs(struct xe_gt *gt) +{ + u32 spare; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + spare = gt->sriov.pf.spare.num_dbs; + spare = max_t(u32, spare, pf_get_min_spare_dbs(gt)); + + return spare; +} + +static int pf_set_spare_dbs(struct xe_gt *gt, u32 spare) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + if (spare > GUC_NUM_DOORBELLS) + return -EINVAL; + + if (spare && spare < pf_get_min_spare_dbs(gt)) + return -EINVAL; + + gt->sriov.pf.spare.num_dbs = spare; + return 0; +} + +/* Return: start ID or negative error code on failure */ +static int pf_reserve_dbs(struct xe_gt *gt, u32 num) +{ + struct xe_guc_db_mgr *dbm = >->uc.guc.dbm; + unsigned int spare = pf_get_spare_dbs(gt); + + return xe_guc_db_mgr_reserve_range(dbm, num, spare); +} + +static void pf_release_dbs(struct xe_gt *gt, u32 start, u32 num) +{ + struct xe_guc_db_mgr *dbm = >->uc.guc.dbm; + + if (num) + xe_guc_db_mgr_release_range(dbm, start, num); +} + +static void pf_release_config_dbs(struct xe_gt *gt, struct xe_gt_sriov_config *config) +{ + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + pf_release_dbs(gt, config->begin_db, config->num_dbs); + config->begin_db = 0; + config->num_dbs = 0; +} + +static int pf_provision_vf_dbs(struct xe_gt *gt, unsigned int vfid, u32 num_dbs) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + int ret; + + xe_gt_assert(gt, vfid); + + if (num_dbs > GUC_NUM_DOORBELLS) + return -EINVAL; + + if (config->num_dbs) { + ret = pf_push_vf_cfg_dbs(gt, vfid, 0, 0); + if (unlikely(ret)) + return ret; + + pf_release_config_dbs(gt, config); + } + + if (!num_dbs) + return 0; + + ret = pf_reserve_dbs(gt, num_dbs); + if (unlikely(ret < 0)) + return ret; + + config->begin_db = ret; + config->num_dbs = num_dbs; + + ret = pf_push_vf_cfg_dbs(gt, vfid, config->begin_db, config->num_dbs); + if (unlikely(ret)) { + pf_release_config_dbs(gt, config); + return ret; + } + + xe_gt_sriov_dbg_verbose(gt, "VF%u doorbells %u-%u\n", + vfid, config->begin_db, config->begin_db + config->num_dbs - 1); + return 0; +} + +static u32 pf_get_vf_config_dbs(struct xe_gt *gt, unsigned int vfid) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + + return config->num_dbs; +} + +/** + * xe_gt_sriov_pf_config_get_dbs - Get VF's GuC doorbells IDs quota. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function can only be called on PF. + * If &vfid represents a PF then number of PF's spare GuC doorbells IDs is returned. + * + * Return: VF's quota (or PF's spare). + */ +u32 xe_gt_sriov_pf_config_get_dbs(struct xe_gt *gt, unsigned int vfid) +{ + u32 num_dbs; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + if (vfid) + num_dbs = pf_get_vf_config_dbs(gt, vfid); + else + num_dbs = pf_get_spare_dbs(gt); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return num_dbs; +} + +/** + * xe_gt_sriov_pf_config_set_dbs - Configure GuC doorbells IDs quota for the VF. + * @gt: the &xe_gt + * @vfid: the VF identifier + * @num_dbs: requested number of GuC doorbells IDs (0 to release) + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_set_dbs(struct xe_gt *gt, unsigned int vfid, u32 num_dbs) +{ + int err; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt))); + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + if (vfid) + err = pf_provision_vf_dbs(gt, vfid, num_dbs); + else + err = pf_set_spare_dbs(gt, num_dbs); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_config_set_u32_done(gt, vfid, num_dbs, + xe_gt_sriov_pf_config_get_dbs(gt, vfid), + "GuC doorbell IDs", vfid ? no_unit : spare_unit, err); +} + +/** + * xe_gt_sriov_pf_config_bulk_set_dbs - Provision many VFs with GuC context IDs. + * @gt: the &xe_gt + * @vfid: starting VF identifier (can't be 0) + * @num_vfs: number of VFs to provision + * @num_dbs: requested number of GuC doorbell IDs (0 to release) + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_bulk_set_dbs(struct xe_gt *gt, unsigned int vfid, + unsigned int num_vfs, u32 num_dbs) +{ + unsigned int n; + int err = 0; + + xe_gt_assert(gt, vfid); + + if (!num_vfs) + return 0; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + for (n = vfid; n < vfid + num_vfs; n++) { + err = pf_provision_vf_dbs(gt, n, num_dbs); + if (err) + break; + } + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_config_bulk_set_u32_done(gt, vfid, num_vfs, num_dbs, + xe_gt_sriov_pf_config_get_dbs, + "GuC doorbell IDs", no_unit, n, err); +} + +static u32 pf_estimate_fair_dbs(struct xe_gt *gt, unsigned int num_vfs) +{ + struct xe_guc_db_mgr *dbm = >->uc.guc.dbm; + u32 spare = pf_get_spare_dbs(gt); + u32 fair = (GUC_NUM_DOORBELLS - spare) / num_vfs; + int ret; + + for (; fair; --fair) { + ret = xe_guc_db_mgr_reserve_range(dbm, fair * num_vfs, spare); + if (ret < 0) + continue; + xe_guc_db_mgr_release_range(dbm, ret, fair * num_vfs); + break; + } + + xe_gt_sriov_dbg_verbose(gt, "doorbells fair(%u x %u)\n", num_vfs, fair); + return fair; +} + +/** + * xe_gt_sriov_pf_config_set_fair_dbs - Provision many VFs with fair GuC doorbell IDs. + * @gt: the &xe_gt + * @vfid: starting VF identifier (can't be 0) + * @num_vfs: number of VFs to provision (can't be 0) + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_set_fair_dbs(struct xe_gt *gt, unsigned int vfid, + unsigned int num_vfs) +{ + u32 fair; + + xe_gt_assert(gt, vfid); + xe_gt_assert(gt, num_vfs); + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + fair = pf_estimate_fair_dbs(gt, num_vfs); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + if (!fair) + return -ENOSPC; + + return xe_gt_sriov_pf_config_bulk_set_dbs(gt, vfid, num_vfs, fair); +} + +static u64 pf_get_lmem_alignment(struct xe_gt *gt) +{ + /* this might be platform dependent */ + return SZ_2M; +} + +static u64 pf_get_min_spare_lmem(struct xe_gt *gt) +{ + /* this might be platform dependent */ + return SZ_128M; /* XXX: preliminary */ +} + +static u64 pf_get_spare_lmem(struct xe_gt *gt) +{ + u64 spare; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + spare = gt->sriov.pf.spare.lmem_size; + spare = max_t(u64, spare, pf_get_min_spare_lmem(gt)); + + return spare; +} + +static int pf_set_spare_lmem(struct xe_gt *gt, u64 size) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + if (size && size < pf_get_min_spare_lmem(gt)) + return -EINVAL; + + gt->sriov.pf.spare.lmem_size = size; + return 0; +} + +static u64 pf_get_vf_config_lmem(struct xe_gt *gt, unsigned int vfid) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + struct xe_bo *bo; + + bo = config->lmem_obj; + return bo ? bo->size : 0; +} + +static int pf_distribute_config_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) +{ + struct xe_device *xe = gt_to_xe(gt); + struct xe_tile *tile; + unsigned int tid; + int err; + + for_each_tile(tile, xe, tid) { + if (tile->primary_gt == gt) { + err = pf_push_vf_cfg_lmem(gt, vfid, size); + } else { + u64 lmem = pf_get_vf_config_lmem(tile->primary_gt, vfid); + + if (!lmem) + continue; + err = pf_push_vf_cfg_lmem(gt, vfid, lmem); + } + if (unlikely(err)) + return err; + } + return 0; +} + +static void pf_force_lmtt_invalidate(struct xe_device *xe) +{ + /* TODO */ +} + +static void pf_reset_vf_lmtt(struct xe_device *xe, unsigned int vfid) +{ + struct xe_lmtt *lmtt; + struct xe_tile *tile; + unsigned int tid; + + xe_assert(xe, IS_DGFX(xe)); + xe_assert(xe, IS_SRIOV_PF(xe)); + + for_each_tile(tile, xe, tid) { + lmtt = &tile->sriov.pf.lmtt; + xe_lmtt_drop_pages(lmtt, vfid); + } +} + +static int pf_update_vf_lmtt(struct xe_device *xe, unsigned int vfid) +{ + struct xe_gt_sriov_config *config; + struct xe_tile *tile; + struct xe_lmtt *lmtt; + struct xe_bo *bo; + struct xe_gt *gt; + u64 total, offset; + unsigned int gtid; + unsigned int tid; + int err; + + xe_assert(xe, IS_DGFX(xe)); + xe_assert(xe, IS_SRIOV_PF(xe)); + + total = 0; + for_each_tile(tile, xe, tid) + total += pf_get_vf_config_lmem(tile->primary_gt, vfid); + + for_each_tile(tile, xe, tid) { + lmtt = &tile->sriov.pf.lmtt; + + xe_lmtt_drop_pages(lmtt, vfid); + if (!total) + continue; + + err = xe_lmtt_prepare_pages(lmtt, vfid, total); + if (err) + goto fail; + + offset = 0; + for_each_gt(gt, xe, gtid) { + if (xe_gt_is_media_type(gt)) + continue; + + config = pf_pick_vf_config(gt, vfid); + bo = config->lmem_obj; + if (!bo) + continue; + + err = xe_lmtt_populate_pages(lmtt, vfid, bo, offset); + if (err) + goto fail; + offset += bo->size; + } + } + + pf_force_lmtt_invalidate(xe); + return 0; + +fail: + for_each_tile(tile, xe, tid) { + lmtt = &tile->sriov.pf.lmtt; + xe_lmtt_drop_pages(lmtt, vfid); + } + return err; +} + +static void pf_release_vf_config_lmem(struct xe_gt *gt, struct xe_gt_sriov_config *config) +{ + xe_gt_assert(gt, IS_DGFX(gt_to_xe(gt))); + xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + if (config->lmem_obj) { + xe_bo_unpin_map_no_vm(config->lmem_obj); + config->lmem_obj = NULL; + } +} + +static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + struct xe_device *xe = gt_to_xe(gt); + struct xe_tile *tile = gt_to_tile(gt); + struct xe_bo *bo; + int err; + + xe_gt_assert(gt, vfid); + xe_gt_assert(gt, IS_DGFX(xe)); + xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + + size = round_up(size, pf_get_lmem_alignment(gt)); + + if (config->lmem_obj) { + err = pf_distribute_config_lmem(gt, vfid, 0); + if (unlikely(err)) + return err; + + pf_reset_vf_lmtt(xe, vfid); + pf_release_vf_config_lmem(gt, config); + } + xe_gt_assert(gt, !config->lmem_obj); + + if (!size) + return 0; + + xe_gt_assert(gt, pf_get_lmem_alignment(gt) == SZ_2M); + bo = xe_bo_create_pin_map(xe, tile, NULL, + ALIGN(size, PAGE_SIZE), + ttm_bo_type_kernel, + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_PINNED); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + config->lmem_obj = bo; + + err = pf_update_vf_lmtt(xe, vfid); + if (unlikely(err)) + goto release; + + err = pf_push_vf_cfg_lmem(gt, vfid, bo->size); + if (unlikely(err)) + goto reset_lmtt; + + xe_gt_sriov_dbg_verbose(gt, "VF%u LMEM %zu (%zuM)\n", + vfid, bo->size, bo->size / SZ_1M); + return 0; + +reset_lmtt: + pf_reset_vf_lmtt(xe, vfid); +release: + pf_release_vf_config_lmem(gt, config); + return err; +} + +/** + * xe_gt_sriov_pf_config_get_lmem - Get VF's LMEM quota. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function can only be called on PF. + * + * Return: VF's (or PF's spare) LMEM quota. + */ +u64 xe_gt_sriov_pf_config_get_lmem(struct xe_gt *gt, unsigned int vfid) +{ + u64 size; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + if (vfid) + size = pf_get_vf_config_lmem(gt, vfid); + else + size = pf_get_spare_lmem(gt); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return size; +} + +/** + * xe_gt_sriov_pf_config_set_lmem - Provision VF with LMEM. + * @gt: the &xe_gt (can't be media) + * @vfid: the VF identifier + * @size: requested LMEM size + * + * This function can only be called on PF. + */ +int xe_gt_sriov_pf_config_set_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) +{ + int err; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + if (vfid) + err = pf_provision_vf_lmem(gt, vfid, size); + else + err = pf_set_spare_lmem(gt, size); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_config_set_u64_done(gt, vfid, size, + xe_gt_sriov_pf_config_get_lmem(gt, vfid), + vfid ? "LMEM" : "spare LMEM", err); +} + +/** + * xe_gt_sriov_pf_config_bulk_set_lmem - Provision many VFs with LMEM. + * @gt: the &xe_gt (can't be media) + * @vfid: starting VF identifier (can't be 0) + * @num_vfs: number of VFs to provision + * @size: requested LMEM size + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_bulk_set_lmem(struct xe_gt *gt, unsigned int vfid, + unsigned int num_vfs, u64 size) +{ + unsigned int n; + int err = 0; + + xe_gt_assert(gt, vfid); + xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + + if (!num_vfs) + return 0; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + for (n = vfid; n < vfid + num_vfs; n++) { + err = pf_provision_vf_lmem(gt, n, size); + if (err) + break; + } + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_config_bulk_set_u64_done(gt, vfid, num_vfs, size, + xe_gt_sriov_pf_config_get_lmem, + "LMEM", n, err); +} + +static u64 pf_query_free_lmem(struct xe_gt *gt) +{ + struct xe_tile *tile = gt->tile; + + return xe_ttm_vram_get_avail(&tile->mem.vram_mgr->manager); +} + +static u64 pf_query_max_lmem(struct xe_gt *gt) +{ + u64 alignment = pf_get_lmem_alignment(gt); + u64 spare = pf_get_spare_lmem(gt); + u64 free = pf_query_free_lmem(gt); + u64 avail; + + /* XXX: need to account for 2MB blocks only */ + avail = free > spare ? free - spare : 0; + avail = round_down(avail, alignment); + + return avail; +} + +#ifdef CONFIG_DRM_XE_DEBUG_SRIOV +#define MAX_FAIR_LMEM SZ_128M /* XXX: make it small for the driver bringup */ +#else +#define MAX_FAIR_LMEM SZ_2G /* XXX: known issue with allocating BO over 2GiB */ +#endif + +static u64 pf_estimate_fair_lmem(struct xe_gt *gt, unsigned int num_vfs) +{ + u64 available = pf_query_max_lmem(gt); + u64 alignment = pf_get_lmem_alignment(gt); + u64 fair; + + fair = div_u64(available, num_vfs); + fair = rounddown_pow_of_two(fair); /* XXX: ttm_vram_mgr & drm_buddy limitation */ + fair = ALIGN_DOWN(fair, alignment); +#ifdef MAX_FAIR_LMEM + fair = min_t(u64, MAX_FAIR_LMEM, fair); +#endif + xe_gt_sriov_dbg_verbose(gt, "LMEM available(%lluM) fair(%u x %lluM)\n", + available / SZ_1M, num_vfs, fair / SZ_1M); + return fair; +} + +/** + * xe_gt_sriov_pf_config_set_fair_lmem - Provision many VFs with fair LMEM. + * @gt: the &xe_gt (can't be media) + * @vfid: starting VF identifier (can't be 0) + * @num_vfs: number of VFs to provision (can't be 0) + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_set_fair_lmem(struct xe_gt *gt, unsigned int vfid, + unsigned int num_vfs) +{ + u64 fair; + + xe_gt_assert(gt, vfid); + xe_gt_assert(gt, num_vfs); + xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + + if (!IS_DGFX(gt_to_xe(gt))) + return 0; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + fair = pf_estimate_fair_lmem(gt, num_vfs); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + if (!fair) + return -ENOSPC; + + return xe_gt_sriov_pf_config_bulk_set_lmem(gt, vfid, num_vfs, fair); +} + +/** + * xe_gt_sriov_pf_config_set_fair - Provision many VFs with fair resources. + * @gt: the &xe_gt + * @vfid: starting VF identifier (can't be 0) + * @num_vfs: number of VFs to provision (can't be 0) + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_set_fair(struct xe_gt *gt, unsigned int vfid, + unsigned int num_vfs) +{ + int result = 0; + int err; + + xe_gt_assert(gt, vfid); + xe_gt_assert(gt, num_vfs); + + if (!xe_gt_is_media_type(gt)) { + err = xe_gt_sriov_pf_config_set_fair_ggtt(gt, vfid, num_vfs); + result = result ?: err; + err = xe_gt_sriov_pf_config_set_fair_lmem(gt, vfid, num_vfs); + result = result ?: err; + } + err = xe_gt_sriov_pf_config_set_fair_ctxs(gt, vfid, num_vfs); + result = result ?: err; + err = xe_gt_sriov_pf_config_set_fair_dbs(gt, vfid, num_vfs); + result = result ?: err; + + return result; +} + +static const char *exec_quantum_unit(u32 exec_quantum) +{ + return exec_quantum ? "ms" : "(infinity)"; +} + +static int pf_provision_exec_quantum(struct xe_gt *gt, unsigned int vfid, + u32 exec_quantum) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + int err; + + err = pf_push_vf_cfg_exec_quantum(gt, vfid, exec_quantum); + if (unlikely(err)) + return err; + + config->exec_quantum = exec_quantum; + return 0; +} + +static int pf_get_exec_quantum(struct xe_gt *gt, unsigned int vfid) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + + return config->exec_quantum; +} + +/** + * xe_gt_sriov_pf_config_set_exec_quantum - Configure execution quantum for the VF. + * @gt: the &xe_gt + * @vfid: the VF identifier + * @exec_quantum: requested execution quantum in milliseconds (0 is infinity) + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_set_exec_quantum(struct xe_gt *gt, unsigned int vfid, + u32 exec_quantum) +{ + int err; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + err = pf_provision_exec_quantum(gt, vfid, exec_quantum); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_config_set_u32_done(gt, vfid, exec_quantum, + xe_gt_sriov_pf_config_get_exec_quantum(gt, vfid), + "execution quantum", exec_quantum_unit, err); +} + +/** + * xe_gt_sriov_pf_config_get_exec_quantum - Get VF's execution quantum. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function can only be called on PF. + * + * Return: VF's (or PF's) execution quantum in milliseconds. + */ +u32 xe_gt_sriov_pf_config_get_exec_quantum(struct xe_gt *gt, unsigned int vfid) +{ + u32 exec_quantum; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + exec_quantum = pf_get_exec_quantum(gt, vfid); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return exec_quantum; +} + +static const char *preempt_timeout_unit(u32 preempt_timeout) +{ + return preempt_timeout ? "us" : "(infinity)"; +} + +static int pf_provision_preempt_timeout(struct xe_gt *gt, unsigned int vfid, + u32 preempt_timeout) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + int err; + + err = pf_push_vf_cfg_preempt_timeout(gt, vfid, preempt_timeout); + if (unlikely(err)) + return err; + + config->preempt_timeout = preempt_timeout; + + return 0; +} + +static int pf_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + + return config->preempt_timeout; +} + +/** + * xe_gt_sriov_pf_config_set_preempt_timeout - Configure preemption timeout for the VF. + * @gt: the &xe_gt + * @vfid: the VF identifier + * @preempt_timeout: requested preemption timeout in microseconds (0 is infinity) + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_set_preempt_timeout(struct xe_gt *gt, unsigned int vfid, + u32 preempt_timeout) +{ + int err; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + err = pf_provision_preempt_timeout(gt, vfid, preempt_timeout); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_config_set_u32_done(gt, vfid, preempt_timeout, + xe_gt_sriov_pf_config_get_preempt_timeout(gt, vfid), + "preemption timeout", preempt_timeout_unit, err); +} + +/** + * xe_gt_sriov_pf_config_get_preempt_timeout - Get VF's preemption timeout. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function can only be called on PF. + * + * Return: VF's (or PF's) preemption timeout in microseconds. + */ +u32 xe_gt_sriov_pf_config_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid) +{ + u32 preempt_timeout; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + preempt_timeout = pf_get_preempt_timeout(gt, vfid); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return preempt_timeout; +} + +static void pf_reset_config_sched(struct xe_gt *gt, struct xe_gt_sriov_config *config) +{ + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + config->exec_quantum = 0; + config->preempt_timeout = 0; +} + +static void pf_release_vf_config(struct xe_gt *gt, unsigned int vfid) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + struct xe_device *xe = gt_to_xe(gt); + + if (!xe_gt_is_media_type(gt)) { + pf_release_vf_config_ggtt(gt, config); + if (IS_DGFX(xe)) { + pf_release_vf_config_lmem(gt, config); + pf_update_vf_lmtt(xe, vfid); + } + } + pf_release_config_ctxs(gt, config); + pf_release_config_dbs(gt, config); + pf_reset_config_sched(gt, config); +} + +/** + * xe_gt_sriov_pf_config_release - Release and reset VF configuration. + * @gt: the &xe_gt + * @vfid: the VF identifier (can't be PF) + * @force: force configuration release + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_release(struct xe_gt *gt, unsigned int vfid, bool force) +{ + int err; + + xe_gt_assert(gt, vfid); + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + err = pf_send_vf_cfg_reset(gt, vfid); + if (!err || force) + pf_release_vf_config(gt, vfid); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + if (unlikely(err)) { + xe_gt_sriov_notice(gt, "VF%u unprovisioning failed with error (%pe)%s\n", + vfid, ERR_PTR(err), + force ? " but all resources were released anyway!" : ""); + } + + return force ? 0 : err; +} + +/** + * xe_gt_sriov_pf_config_push - Reprovision VF's configuration. + * @gt: the &xe_gt + * @vfid: the VF identifier (can't be PF) + * @refresh: explicit refresh + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_push(struct xe_gt *gt, unsigned int vfid, bool refresh) +{ + int err = 0; + + xe_gt_assert(gt, vfid); + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + if (refresh) + err = pf_send_vf_cfg_reset(gt, vfid); + if (!err) + err = pf_push_full_vf_config(gt, vfid); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + if (unlikely(err)) { + xe_gt_sriov_notice(gt, "Failed to %s VF%u configuration (%pe)\n", + refresh ? "refresh" : "push", vfid, ERR_PTR(err)); + } + + return err; +} + +/** + * xe_gt_sriov_pf_config_print_ggtt - Print GGTT configurations. + * @gt: the &xe_gt + * @p: the &drm_printer + * + * Print GGTT configuration data for all VFs. + * VFs without provisioned GGTT are ignored. + * + * This function can only be called on PF. + */ +int xe_gt_sriov_pf_config_print_ggtt(struct xe_gt *gt, struct drm_printer *p) +{ + unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(gt_to_xe(gt)); + const struct xe_gt_sriov_config *config; + char buf[10]; + + for (n = 1; n <= total_vfs; n++) { + config = >->sriov.pf.vfs[n].config; + if (!drm_mm_node_allocated(&config->ggtt_region)) + continue; + + string_get_size(config->ggtt_region.size, 1, STRING_UNITS_2, buf, sizeof(buf)); + drm_printf(p, "VF%u:\t%#0llx-%#llx\t(%s)\n", + n, config->ggtt_region.start, + config->ggtt_region.start + config->ggtt_region.size - 1, buf); + } + + return 0; +} + +/** + * xe_gt_sriov_pf_config_print_ctxs - Print GuC context IDs configurations. + * @gt: the &xe_gt + * @p: the &drm_printer + * + * Print GuC context ID allocations across all VFs. + * VFs without GuC context IDs are skipped. + * + * This function can only be called on PF. + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_print_ctxs(struct xe_gt *gt, struct drm_printer *p) +{ + unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(gt_to_xe(gt)); + const struct xe_gt_sriov_config *config; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + + for (n = 1; n <= total_vfs; n++) { + config = >->sriov.pf.vfs[n].config; + if (!config->num_ctxs) + continue; + + drm_printf(p, "VF%u:\t%u-%u\t(%u)\n", + n, + config->begin_ctx, + config->begin_ctx + config->num_ctxs - 1, + config->num_ctxs); + } + + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + return 0; +} + +/** + * xe_gt_sriov_pf_config_print_dbs - Print GuC doorbell ID configurations. + * @gt: the &xe_gt + * @p: the &drm_printer + * + * Print GuC doorbell IDs allocations across all VFs. + * VFs without GuC doorbell IDs are skipped. + * + * This function can only be called on PF. + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_print_dbs(struct xe_gt *gt, struct drm_printer *p) +{ + unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(gt_to_xe(gt)); + const struct xe_gt_sriov_config *config; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + + for (n = 1; n <= total_vfs; n++) { + config = >->sriov.pf.vfs[n].config; + if (!config->num_dbs) + continue; + + drm_printf(p, "VF%u:\t%u-%u\t(%u)\n", + n, + config->begin_db, + config->begin_db + config->num_dbs - 1, + config->num_dbs); + } + + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + return 0; +} + +/** + * xe_gt_sriov_pf_config_print_available_ggtt - Print available GGTT ranges. + * @gt: the &xe_gt + * @p: the &drm_printer + * + * Print GGTT ranges that are available for the provisioning. + * + * This function can only be called on PF. + */ +int xe_gt_sriov_pf_config_print_available_ggtt(struct xe_gt *gt, struct drm_printer *p) +{ + struct xe_ggtt *ggtt = gt_to_tile(gt)->mem.ggtt; + const struct drm_mm *mm = &ggtt->mm; + const struct drm_mm_node *entry; + u64 alignment = pf_get_ggtt_alignment(gt); + u64 hole_min_start = xe_wopcm_size(gt_to_xe(gt)); + u64 hole_start, hole_end, hole_size; + u64 spare, avail, total = 0; + char buf[10]; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + + spare = pf_get_spare_ggtt(gt); + + mutex_lock(&ggtt->lock); + + drm_mm_for_each_hole(entry, mm, hole_start, hole_end) { + hole_start = max(hole_start, hole_min_start); + hole_start = ALIGN(hole_start, alignment); + hole_end = ALIGN_DOWN(hole_end, alignment); + if (hole_start >= hole_end) + continue; + hole_size = hole_end - hole_start; + total += hole_size; + + string_get_size(hole_size, 1, STRING_UNITS_2, buf, sizeof(buf)); + drm_printf(p, "range:\t%#llx-%#llx\t(%s)\n", + hole_start, hole_end - 1, buf); + } + + mutex_unlock(&ggtt->lock); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + string_get_size(total, 1, STRING_UNITS_2, buf, sizeof(buf)); + drm_printf(p, "total:\t%llu\t(%s)\n", total, buf); + + string_get_size(spare, 1, STRING_UNITS_2, buf, sizeof(buf)); + drm_printf(p, "spare:\t%llu\t(%s)\n", spare, buf); + + avail = total > spare ? total - spare : 0; + + string_get_size(avail, 1, STRING_UNITS_2, buf, sizeof(buf)); + drm_printf(p, "avail:\t%llu\t(%s)\n", avail, buf); + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h new file mode 100644 index 0000000000..5e6b36f00b --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_GT_SRIOV_PF_CONFIG_H_ +#define _XE_GT_SRIOV_PF_CONFIG_H_ + +#include <linux/types.h> + +struct drm_printer; +struct xe_gt; + +u64 xe_gt_sriov_pf_config_get_ggtt(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_config_set_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size); +int xe_gt_sriov_pf_config_set_fair_ggtt(struct xe_gt *gt, + unsigned int vfid, unsigned int num_vfs); +int xe_gt_sriov_pf_config_bulk_set_ggtt(struct xe_gt *gt, + unsigned int vfid, unsigned int num_vfs, u64 size); + +u32 xe_gt_sriov_pf_config_get_ctxs(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_config_set_ctxs(struct xe_gt *gt, unsigned int vfid, u32 num_ctxs); +int xe_gt_sriov_pf_config_set_fair_ctxs(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs); +int xe_gt_sriov_pf_config_bulk_set_ctxs(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs, + u32 num_ctxs); + +u32 xe_gt_sriov_pf_config_get_dbs(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_config_set_dbs(struct xe_gt *gt, unsigned int vfid, u32 num_dbs); +int xe_gt_sriov_pf_config_set_fair_dbs(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs); +int xe_gt_sriov_pf_config_bulk_set_dbs(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs, + u32 num_dbs); + +u64 xe_gt_sriov_pf_config_get_lmem(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_config_set_lmem(struct xe_gt *gt, unsigned int vfid, u64 size); +int xe_gt_sriov_pf_config_set_fair_lmem(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs); +int xe_gt_sriov_pf_config_bulk_set_lmem(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs, + u64 size); + +u32 xe_gt_sriov_pf_config_get_exec_quantum(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_config_set_exec_quantum(struct xe_gt *gt, unsigned int vfid, u32 exec_quantum); + +u32 xe_gt_sriov_pf_config_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_config_set_preempt_timeout(struct xe_gt *gt, unsigned int vfid, + u32 preempt_timeout); + +int xe_gt_sriov_pf_config_set_fair(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs); +int xe_gt_sriov_pf_config_release(struct xe_gt *gt, unsigned int vfid, bool force); +int xe_gt_sriov_pf_config_push(struct xe_gt *gt, unsigned int vfid, bool refresh); + +int xe_gt_sriov_pf_config_print_ggtt(struct xe_gt *gt, struct drm_printer *p); +int xe_gt_sriov_pf_config_print_ctxs(struct xe_gt *gt, struct drm_printer *p); +int xe_gt_sriov_pf_config_print_dbs(struct xe_gt *gt, struct drm_printer *p); + +int xe_gt_sriov_pf_config_print_available_ggtt(struct xe_gt *gt, struct drm_printer *p); + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h new file mode 100644 index 0000000000..d3745c3559 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_GT_SRIOV_PF_CONFIG_TYPES_H_ +#define _XE_GT_SRIOV_PF_CONFIG_TYPES_H_ + +#include <drm/drm_mm.h> + +struct xe_bo; + +/** + * struct xe_gt_sriov_config - GT level per-VF configuration data. + * + * Used by the PF driver to maintain per-VF provisioning data. + */ +struct xe_gt_sriov_config { + /** @ggtt_region: GGTT region assigned to the VF. */ + struct drm_mm_node ggtt_region; + /** @lmem_obj: LMEM allocation for use by the VF. */ + struct xe_bo *lmem_obj; + /** @num_ctxs: number of GuC contexts IDs. */ + u16 num_ctxs; + /** @begin_ctx: start index of GuC context ID range. */ + u16 begin_ctx; + /** @num_dbs: number of GuC doorbells IDs. */ + u16 num_dbs; + /** @begin_db: start index of GuC doorbell ID range. */ + u16 begin_db; + /** @exec_quantum: execution-quantum in milliseconds. */ + u32 exec_quantum; + /** @preempt_timeout: preemption timeout in microseconds. */ + u32 preempt_timeout; +}; + +/** + * struct xe_gt_sriov_spare_config - GT-level PF spare configuration data. + * + * Used by the PF driver to maintain it's own reserved (spare) provisioning + * data that is not applicable to be tracked in struct xe_gt_sriov_config. + */ +struct xe_gt_sriov_spare_config { + /** @ggtt_size: GGTT size. */ + u64 ggtt_size; + /** @lmem_size: LMEM size. */ + u64 lmem_size; + /** @num_ctxs: number of GuC submission contexts. */ + u16 num_ctxs; + /** @num_dbs: number of GuC doorbells. */ + u16 num_dbs; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c new file mode 100644 index 0000000000..40b8f881fe --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c @@ -0,0 +1,257 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#include "abi/guc_actions_sriov_abi.h" + +#include "xe_device.h" +#include "xe_gt.h" +#include "xe_gt_sriov_pf_control.h" +#include "xe_gt_sriov_printk.h" +#include "xe_guc_ct.h" +#include "xe_sriov.h" + +static const char *control_cmd_to_string(u32 cmd) +{ + switch (cmd) { + case GUC_PF_TRIGGER_VF_PAUSE: + return "PAUSE"; + case GUC_PF_TRIGGER_VF_RESUME: + return "RESUME"; + case GUC_PF_TRIGGER_VF_STOP: + return "STOP"; + case GUC_PF_TRIGGER_VF_FLR_START: + return "FLR_START"; + case GUC_PF_TRIGGER_VF_FLR_FINISH: + return "FLR_FINISH"; + default: + return "<unknown>"; + } +} + +static int guc_action_vf_control_cmd(struct xe_guc *guc, u32 vfid, u32 cmd) +{ + u32 request[PF2GUC_VF_CONTROL_REQUEST_MSG_LEN] = { + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_PF2GUC_VF_CONTROL), + FIELD_PREP(PF2GUC_VF_CONTROL_REQUEST_MSG_1_VFID, vfid), + FIELD_PREP(PF2GUC_VF_CONTROL_REQUEST_MSG_2_COMMAND, cmd), + }; + int ret; + + /* XXX those two commands are now sent from the G2H handler */ + if (cmd == GUC_PF_TRIGGER_VF_FLR_START || cmd == GUC_PF_TRIGGER_VF_FLR_FINISH) + return xe_guc_ct_send_g2h_handler(&guc->ct, request, ARRAY_SIZE(request)); + + ret = xe_guc_ct_send_block(&guc->ct, request, ARRAY_SIZE(request)); + return ret > 0 ? -EPROTO : ret; +} + +static int pf_send_vf_control_cmd(struct xe_gt *gt, unsigned int vfid, u32 cmd) +{ + int err; + + xe_gt_assert(gt, vfid != PFID); + + err = guc_action_vf_control_cmd(>->uc.guc, vfid, cmd); + if (unlikely(err)) + xe_gt_sriov_err(gt, "VF%u control command %s failed (%pe)\n", + vfid, control_cmd_to_string(cmd), ERR_PTR(err)); + return err; +} + +static int pf_send_vf_pause(struct xe_gt *gt, unsigned int vfid) +{ + return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_PAUSE); +} + +static int pf_send_vf_resume(struct xe_gt *gt, unsigned int vfid) +{ + return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_RESUME); +} + +static int pf_send_vf_stop(struct xe_gt *gt, unsigned int vfid) +{ + return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_STOP); +} + +static int pf_send_vf_flr_start(struct xe_gt *gt, unsigned int vfid) +{ + return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_FLR_START); +} + +static int pf_send_vf_flr_finish(struct xe_gt *gt, unsigned int vfid) +{ + return pf_send_vf_control_cmd(gt, vfid, GUC_PF_TRIGGER_VF_FLR_FINISH); +} + +/** + * xe_gt_sriov_pf_control_pause_vf - Pause a VF. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid) +{ + return pf_send_vf_pause(gt, vfid); +} + +/** + * xe_gt_sriov_pf_control_resume_vf - Resume a VF. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid) +{ + return pf_send_vf_resume(gt, vfid); +} + +/** + * xe_gt_sriov_pf_control_stop_vf - Stop a VF. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid) +{ + return pf_send_vf_stop(gt, vfid); +} + +/** + * DOC: The VF FLR Flow with GuC + * + * PF GUC PCI + * ======================================================== + * | | | + * (1) | [ ] <----- FLR --| + * | [ ] : + * (2) [ ] <-------- NOTIFY FLR --[ ] + * [ ] | + * (3) [ ] | + * [ ] | + * [ ]-- START FLR ---------> [ ] + * | [ ] + * (4) | [ ] + * | [ ] + * [ ] <--------- FLR DONE -- [ ] + * [ ] | + * (5) [ ] | + * [ ] | + * [ ]-- FINISH FLR --------> [ ] + * | | + * + * Step 1: PCI HW generates interrupt to the GuC about VF FLR + * Step 2: GuC FW sends G2H notification to the PF about VF FLR + * Step 2a: on some platforms G2H is only received from root GuC + * Step 3: PF sends H2G request to the GuC to start VF FLR sequence + * Step 3a: on some platforms PF must send H2G to all other GuCs + * Step 4: GuC FW performs VF FLR cleanups and notifies the PF when done + * Step 5: PF performs VF FLR cleanups and notifies the GuC FW when finished + */ + +static bool needs_dispatch_flr(struct xe_device *xe) +{ + return xe->info.platform == XE_PVC; +} + +static void pf_handle_vf_flr(struct xe_gt *gt, u32 vfid) +{ + struct xe_device *xe = gt_to_xe(gt); + struct xe_gt *gtit; + unsigned int gtid; + + xe_gt_sriov_info(gt, "VF%u FLR\n", vfid); + + if (needs_dispatch_flr(xe)) { + for_each_gt(gtit, xe, gtid) + pf_send_vf_flr_start(gtit, vfid); + } else { + pf_send_vf_flr_start(gt, vfid); + } +} + +static void pf_handle_vf_flr_done(struct xe_gt *gt, u32 vfid) +{ + pf_send_vf_flr_finish(gt, vfid); +} + +static int pf_handle_vf_event(struct xe_gt *gt, u32 vfid, u32 eventid) +{ + switch (eventid) { + case GUC_PF_NOTIFY_VF_FLR: + pf_handle_vf_flr(gt, vfid); + break; + case GUC_PF_NOTIFY_VF_FLR_DONE: + pf_handle_vf_flr_done(gt, vfid); + break; + case GUC_PF_NOTIFY_VF_PAUSE_DONE: + break; + case GUC_PF_NOTIFY_VF_FIXUP_DONE: + break; + default: + return -ENOPKG; + } + return 0; +} + +static int pf_handle_pf_event(struct xe_gt *gt, u32 eventid) +{ + switch (eventid) { + case GUC_PF_NOTIFY_VF_ENABLE: + xe_gt_sriov_dbg_verbose(gt, "VFs %s/%s\n", + str_enabled_disabled(true), + str_enabled_disabled(false)); + break; + default: + return -ENOPKG; + } + return 0; +} + +/** + * xe_gt_sriov_pf_control_process_guc2pf - Handle VF state notification from GuC. + * @gt: the &xe_gt + * @msg: the G2H message + * @len: the length of the G2H message + * + * This function is for PF only. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_control_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len) +{ + u32 vfid; + u32 eventid; + + xe_gt_assert(gt, len); + xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_ORIGIN, msg[0]) == GUC_HXG_ORIGIN_GUC); + xe_gt_assert(gt, FIELD_GET(GUC_HXG_MSG_0_TYPE, msg[0]) == GUC_HXG_TYPE_EVENT); + xe_gt_assert(gt, FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, msg[0]) == + GUC_ACTION_GUC2PF_VF_STATE_NOTIFY); + + if (unlikely(!xe_device_is_sriov_pf(gt_to_xe(gt)))) + return -EPROTO; + + if (unlikely(FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_0_MBZ, msg[0]))) + return -EPFNOSUPPORT; + + if (unlikely(len != GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_LEN)) + return -EPROTO; + + vfid = FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_1_VFID, msg[1]); + eventid = FIELD_GET(GUC2PF_VF_STATE_NOTIFY_EVENT_MSG_2_EVENT, msg[2]); + + return vfid ? pf_handle_vf_event(gt, vfid, eventid) : pf_handle_pf_event(gt, eventid); +} diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h new file mode 100644 index 0000000000..850a3e3766 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_GT_SRIOV_PF_CONTROL_H_ +#define _XE_GT_SRIOV_PF_CONTROL_H_ + +#include <linux/errno.h> +#include <linux/types.h> + +struct xe_gt; + +int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid); + +#ifdef CONFIG_PCI_IOV +int xe_gt_sriov_pf_control_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len); +#else +static inline int xe_gt_sriov_pf_control_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len) +{ + return -EPROTO; +} +#endif + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h new file mode 100644 index 0000000000..0bf12d89ce --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_GT_SRIOV_PF_HELPERS_H_ +#define _XE_GT_SRIOV_PF_HELPERS_H_ + +#include "xe_gt_types.h" +#include "xe_sriov_pf_helpers.h" + +/** + * xe_gt_sriov_pf_assert_vfid() - warn if &id is not a supported VF number when debugging. + * @gt: the PF &xe_gt to assert on + * @vfid: the VF number to assert + * + * Assert that > belongs to the Physical Function (PF) device and provided &vfid + * is within a range of supported VF numbers (up to maximum number of VFs that + * driver can support, including VF0 that represents the PF itself). + * + * Note: Effective only on debug builds. See `Xe ASSERTs`_ for more information. + */ +#define xe_gt_sriov_pf_assert_vfid(gt, vfid) xe_sriov_pf_assert_vfid(gt_to_xe(gt), (vfid)) + +static inline int xe_gt_sriov_pf_get_totalvfs(struct xe_gt *gt) +{ + return xe_sriov_pf_get_totalvfs(gt_to_xe(gt)); +} + +static inline struct mutex *xe_gt_sriov_pf_master_mutex(struct xe_gt *gt) +{ + return xe_sriov_pf_master_mutex(gt_to_xe(gt)); +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c new file mode 100644 index 0000000000..fae5be5a2a --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c @@ -0,0 +1,418 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#include "abi/guc_actions_sriov_abi.h" + +#include "xe_bo.h" +#include "xe_gt.h" +#include "xe_gt_sriov_pf_helpers.h" +#include "xe_gt_sriov_pf_policy.h" +#include "xe_gt_sriov_printk.h" +#include "xe_guc_ct.h" +#include "xe_guc_klv_helpers.h" +#include "xe_pm.h" + +/* + * Return: number of KLVs that were successfully parsed and saved, + * negative error code on failure. + */ +static int guc_action_update_vgt_policy(struct xe_guc *guc, u64 addr, u32 size) +{ + u32 request[] = { + GUC_ACTION_PF2GUC_UPDATE_VGT_POLICY, + lower_32_bits(addr), + upper_32_bits(addr), + size, + }; + + return xe_guc_ct_send_block(&guc->ct, request, ARRAY_SIZE(request)); +} + +/* + * Return: number of KLVs that were successfully parsed and saved, + * negative error code on failure. + */ +static int pf_send_policy_klvs(struct xe_gt *gt, const u32 *klvs, u32 num_dwords) +{ + const u32 bytes = num_dwords * sizeof(u32); + struct xe_tile *tile = gt_to_tile(gt); + struct xe_device *xe = tile_to_xe(tile); + struct xe_guc *guc = >->uc.guc; + struct xe_bo *bo; + int ret; + + bo = xe_bo_create_pin_map(xe, tile, NULL, + ALIGN(bytes, PAGE_SIZE), + ttm_bo_type_kernel, + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_GGTT); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + xe_map_memcpy_to(xe, &bo->vmap, 0, klvs, bytes); + + ret = guc_action_update_vgt_policy(guc, xe_bo_ggtt_addr(bo), num_dwords); + + xe_bo_unpin_map_no_vm(bo); + + return ret; +} + +/* + * Return: 0 on success, -ENOKEY if some KLVs were not updated, -EPROTO if reply was malformed, + * negative error code on failure. + */ +static int pf_push_policy_klvs(struct xe_gt *gt, u32 num_klvs, + const u32 *klvs, u32 num_dwords) +{ + int ret; + + xe_gt_assert(gt, num_klvs == xe_guc_klv_count(klvs, num_dwords)); + + ret = pf_send_policy_klvs(gt, klvs, num_dwords); + + if (ret != num_klvs) { + int err = ret < 0 ? ret : ret < num_klvs ? -ENOKEY : -EPROTO; + struct drm_printer p = xe_gt_info_printer(gt); + + xe_gt_sriov_notice(gt, "Failed to push %u policy KLV%s (%pe)\n", + num_klvs, str_plural(num_klvs), ERR_PTR(err)); + xe_guc_klv_print(klvs, num_dwords, &p); + return err; + } + + return 0; +} + +static int pf_push_policy_u32(struct xe_gt *gt, u16 key, u32 value) +{ + u32 klv[] = { + PREP_GUC_KLV(key, 1), + value, + }; + + return pf_push_policy_klvs(gt, 1, klv, ARRAY_SIZE(klv)); +} + +static int pf_update_policy_bool(struct xe_gt *gt, u16 key, bool *policy, bool value) +{ + int err; + + err = pf_push_policy_u32(gt, key, value); + if (unlikely(err)) { + xe_gt_sriov_notice(gt, "Failed to update policy %#x '%s' to '%s' (%pe)\n", + key, xe_guc_klv_key_to_string(key), + str_enabled_disabled(value), ERR_PTR(err)); + return err; + } + + xe_gt_sriov_dbg(gt, "policy key %#x '%s' updated to '%s'\n", + key, xe_guc_klv_key_to_string(key), + str_enabled_disabled(value)); + + *policy = value; + return 0; +} + +static int pf_update_policy_u32(struct xe_gt *gt, u16 key, u32 *policy, u32 value) +{ + int err; + + err = pf_push_policy_u32(gt, key, value); + if (unlikely(err)) { + xe_gt_sriov_notice(gt, "Failed to update policy %#x '%s' to '%s' (%pe)\n", + key, xe_guc_klv_key_to_string(key), + str_enabled_disabled(value), ERR_PTR(err)); + return err; + } + + xe_gt_sriov_dbg(gt, "policy key %#x '%s' updated to %u\n", + key, xe_guc_klv_key_to_string(key), value); + + *policy = value; + return 0; +} + +static int pf_provision_sched_if_idle(struct xe_gt *gt, bool enable) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_update_policy_bool(gt, GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_KEY, + >->sriov.pf.policy.guc.sched_if_idle, + enable); +} + +static int pf_reprovision_sched_if_idle(struct xe_gt *gt) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_provision_sched_if_idle(gt, gt->sriov.pf.policy.guc.sched_if_idle); +} + +static void pf_sanitize_sched_if_idle(struct xe_gt *gt) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + gt->sriov.pf.policy.guc.sched_if_idle = false; +} + +/** + * xe_gt_sriov_pf_policy_set_sched_if_idle - Control the 'sched_if_idle' policy. + * @gt: the &xe_gt where to apply the policy + * @enable: the value of the 'sched_if_idle' policy + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_policy_set_sched_if_idle(struct xe_gt *gt, bool enable) +{ + int err; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + err = pf_provision_sched_if_idle(gt, enable); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return err; +} + +/** + * xe_gt_sriov_pf_policy_get_sched_if_idle - Retrieve value of 'sched_if_idle' policy. + * @gt: the &xe_gt where to read the policy from + * + * This function can only be called on PF. + * + * Return: value of 'sched_if_idle' policy. + */ +bool xe_gt_sriov_pf_policy_get_sched_if_idle(struct xe_gt *gt) +{ + bool enable; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + enable = gt->sriov.pf.policy.guc.sched_if_idle; + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return enable; +} + +static int pf_provision_reset_engine(struct xe_gt *gt, bool enable) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_update_policy_bool(gt, GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH_KEY, + >->sriov.pf.policy.guc.reset_engine, enable); +} + +static int pf_reprovision_reset_engine(struct xe_gt *gt) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_provision_reset_engine(gt, gt->sriov.pf.policy.guc.reset_engine); +} + +static void pf_sanitize_reset_engine(struct xe_gt *gt) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + gt->sriov.pf.policy.guc.reset_engine = false; +} + +/** + * xe_gt_sriov_pf_policy_set_reset_engine - Control the 'reset_engine' policy. + * @gt: the &xe_gt where to apply the policy + * @enable: the value of the 'reset_engine' policy + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_policy_set_reset_engine(struct xe_gt *gt, bool enable) +{ + int err; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + err = pf_provision_reset_engine(gt, enable); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return err; +} + +/** + * xe_gt_sriov_pf_policy_get_reset_engine - Retrieve value of 'reset_engine' policy. + * @gt: the &xe_gt where to read the policy from + * + * This function can only be called on PF. + * + * Return: value of 'reset_engine' policy. + */ +bool xe_gt_sriov_pf_policy_get_reset_engine(struct xe_gt *gt) +{ + bool enable; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + enable = gt->sriov.pf.policy.guc.reset_engine; + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return enable; +} + +static int pf_provision_sample_period(struct xe_gt *gt, u32 value) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_update_policy_u32(gt, GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD_KEY, + >->sriov.pf.policy.guc.sample_period, value); +} + +static int pf_reprovision_sample_period(struct xe_gt *gt) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_provision_sample_period(gt, gt->sriov.pf.policy.guc.sample_period); +} + +static void pf_sanitize_sample_period(struct xe_gt *gt) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + gt->sriov.pf.policy.guc.sample_period = 0; +} + +/** + * xe_gt_sriov_pf_policy_set_sample_period - Control the 'sample_period' policy. + * @gt: the &xe_gt where to apply the policy + * @value: the value of the 'sample_period' policy + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_policy_set_sample_period(struct xe_gt *gt, u32 value) +{ + int err; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + err = pf_provision_sample_period(gt, value); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return err; +} + +/** + * xe_gt_sriov_pf_policy_get_sample_period - Retrieve value of 'sample_period' policy. + * @gt: the &xe_gt where to read the policy from + * + * This function can only be called on PF. + * + * Return: value of 'sample_period' policy. + */ +u32 xe_gt_sriov_pf_policy_get_sample_period(struct xe_gt *gt) +{ + u32 value; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + value = gt->sriov.pf.policy.guc.sample_period; + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return value; +} + +static void pf_sanitize_guc_policies(struct xe_gt *gt) +{ + pf_sanitize_sched_if_idle(gt); + pf_sanitize_reset_engine(gt); + pf_sanitize_sample_period(gt); +} + +/** + * xe_gt_sriov_pf_policy_sanitize - Reset policy settings. + * @gt: the &xe_gt + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +void xe_gt_sriov_pf_policy_sanitize(struct xe_gt *gt) +{ + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + pf_sanitize_guc_policies(gt); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); +} + +/** + * xe_gt_sriov_pf_policy_reprovision - Reprovision (and optionally reset) policy settings. + * @gt: the &xe_gt + * @reset: if true will reprovision using default values instead of latest + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_policy_reprovision(struct xe_gt *gt, bool reset) +{ + int err = 0; + + xe_pm_runtime_get_noresume(gt_to_xe(gt)); + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + if (reset) + pf_sanitize_guc_policies(gt); + err |= pf_reprovision_sched_if_idle(gt); + err |= pf_reprovision_reset_engine(gt); + err |= pf_reprovision_sample_period(gt); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + xe_pm_runtime_put(gt_to_xe(gt)); + + return err ? -ENXIO : 0; +} + +static void print_guc_policies(struct drm_printer *p, struct xe_gt_sriov_guc_policies *policy) +{ + drm_printf(p, "%s:\t%s\n", + xe_guc_klv_key_to_string(GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_KEY), + str_enabled_disabled(policy->sched_if_idle)); + drm_printf(p, "%s:\t%s\n", + xe_guc_klv_key_to_string(GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH_KEY), + str_enabled_disabled(policy->reset_engine)); + drm_printf(p, "%s:\t%u %s\n", + xe_guc_klv_key_to_string(GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD_KEY), + policy->sample_period, policy->sample_period ? "ms" : "(disabled)"); +} + +/** + * xe_gt_sriov_pf_policy_print - Dump actual policy values. + * @gt: the &xe_gt where to read the policy from + * @p: the &drm_printer + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_policy_print(struct xe_gt *gt, struct drm_printer *p) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + print_guc_policies(p, >->sriov.pf.policy.guc); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return 0; +} diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.h new file mode 100644 index 0000000000..2a5dc33dc6 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_GT_SRIOV_PF_POLICY_H_ +#define _XE_GT_SRIOV_PF_POLICY_H_ + +#include <linux/types.h> + +struct drm_printer; +struct xe_gt; + +int xe_gt_sriov_pf_policy_set_sched_if_idle(struct xe_gt *gt, bool enable); +bool xe_gt_sriov_pf_policy_get_sched_if_idle(struct xe_gt *gt); +int xe_gt_sriov_pf_policy_set_reset_engine(struct xe_gt *gt, bool enable); +bool xe_gt_sriov_pf_policy_get_reset_engine(struct xe_gt *gt); +int xe_gt_sriov_pf_policy_set_sample_period(struct xe_gt *gt, u32 value); +u32 xe_gt_sriov_pf_policy_get_sample_period(struct xe_gt *gt); + +void xe_gt_sriov_pf_policy_sanitize(struct xe_gt *gt); +int xe_gt_sriov_pf_policy_reprovision(struct xe_gt *gt, bool reset); +int xe_gt_sriov_pf_policy_print(struct xe_gt *gt, struct drm_printer *p); + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy_types.h new file mode 100644 index 0000000000..4de532af13 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy_types.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_GT_SRIOV_PF_POLICY_TYPES_H_ +#define _XE_GT_SRIOV_PF_POLICY_TYPES_H_ + +#include <linux/types.h> + +/** + * struct xe_gt_sriov_guc_policies - GuC SR-IOV policies. + * @sched_if_idle: controls strict scheduling policy. + * @reset_engine: controls engines reset on VF switch policy. + * @sample_period: adverse events sampling period (in milliseconds). + */ +struct xe_gt_sriov_guc_policies { + bool sched_if_idle; + bool reset_engine; + u32 sample_period; +}; + +/** + * struct xe_gt_sriov_pf_policy - PF policy data. + * @guc: GuC scheduling policies. + */ +struct xe_gt_sriov_pf_policy { + struct xe_gt_sriov_guc_policies guc; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h new file mode 100644 index 0000000000..faf9ee8266 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_GT_SRIOV_PF_TYPES_H_ +#define _XE_GT_SRIOV_PF_TYPES_H_ + +#include <linux/types.h> + +#include "xe_gt_sriov_pf_config_types.h" +#include "xe_gt_sriov_pf_policy_types.h" + +/** + * struct xe_gt_sriov_metadata - GT level per-VF metadata. + */ +struct xe_gt_sriov_metadata { + /** @config: per-VF provisioning data. */ + struct xe_gt_sriov_config config; +}; + +/** + * struct xe_gt_sriov_pf - GT level PF virtualization data. + * @policy: policy data. + * @spare: PF-only provisioning configuration. + * @vfs: metadata for all VFs. + */ +struct xe_gt_sriov_pf { + struct xe_gt_sriov_pf_policy policy; + struct xe_gt_sriov_spare_config spare; + struct xe_gt_sriov_metadata *vfs; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.c b/drivers/gpu/drm/xe/xe_gt_sysfs.c index c69d2e8a0f..1e5971072b 100644 --- a/drivers/gpu/drm/xe/xe_gt_sysfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sysfs.c @@ -29,7 +29,7 @@ static void gt_sysfs_fini(struct drm_device *drm, void *arg) kobject_put(gt->sysfs); } -void xe_gt_sysfs_init(struct xe_gt *gt) +int xe_gt_sysfs_init(struct xe_gt *gt) { struct xe_tile *tile = gt_to_tile(gt); struct xe_device *xe = gt_to_xe(gt); @@ -38,24 +38,18 @@ void xe_gt_sysfs_init(struct xe_gt *gt) kg = kzalloc(sizeof(*kg), GFP_KERNEL); if (!kg) - return; + return -ENOMEM; kobject_init(&kg->base, &xe_gt_sysfs_kobj_type); kg->gt = gt; err = kobject_add(&kg->base, tile->sysfs, "gt%d", gt->info.id); if (err) { - drm_warn(&xe->drm, "failed to add GT sysfs directory, err: %d\n", err); kobject_put(&kg->base); - return; + return err; } gt->sysfs = &kg->base; - err = drmm_add_action_or_reset(&xe->drm, gt_sysfs_fini, gt); - if (err) { - drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n", - __func__, err); - return; - } + return drmm_add_action_or_reset(&xe->drm, gt_sysfs_fini, gt); } diff --git a/drivers/gpu/drm/xe/xe_gt_sysfs.h b/drivers/gpu/drm/xe/xe_gt_sysfs.h index e3ec278ca0..ecbfcc5c7d 100644 --- a/drivers/gpu/drm/xe/xe_gt_sysfs.h +++ b/drivers/gpu/drm/xe/xe_gt_sysfs.h @@ -8,7 +8,7 @@ #include "xe_gt_sysfs_types.h" -void xe_gt_sysfs_init(struct xe_gt *gt); +int xe_gt_sysfs_init(struct xe_gt *gt); static inline struct xe_gt * kobj_to_gt(struct kobject *kobj) diff --git a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c index 63d640591a..fbe21a8599 100644 --- a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c +++ b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.c @@ -11,6 +11,7 @@ #include "xe_gt_sysfs.h" #include "xe_gt_throttle_sysfs.h" #include "xe_mmio.h" +#include "xe_pm.h" /** * DOC: Xe GT Throttle @@ -38,10 +39,12 @@ static u32 read_perf_limit_reasons(struct xe_gt *gt) { u32 reg; + xe_pm_runtime_get(gt_to_xe(gt)); if (xe_gt_is_media_type(gt)) reg = xe_mmio_read32(gt, MTL_MEDIA_PERF_LIMIT_REASONS); else reg = xe_mmio_read32(gt, GT0_PERF_LIMIT_REASONS); + xe_pm_runtime_put(gt_to_xe(gt)); return reg; } @@ -233,19 +236,14 @@ static void gt_throttle_sysfs_fini(struct drm_device *drm, void *arg) sysfs_remove_group(gt->freq, &throttle_group_attrs); } -void xe_gt_throttle_sysfs_init(struct xe_gt *gt) +int xe_gt_throttle_sysfs_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); int err; err = sysfs_create_group(gt->freq, &throttle_group_attrs); - if (err) { - drm_warn(&xe->drm, "failed to register throttle sysfs, err: %d\n", err); - return; - } - - err = drmm_add_action_or_reset(&xe->drm, gt_throttle_sysfs_fini, gt); if (err) - drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n", - __func__, err); + return err; + + return drmm_add_action_or_reset(&xe->drm, gt_throttle_sysfs_fini, gt); } diff --git a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h index 3ecfd4beff..6c61e6f228 100644 --- a/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h +++ b/drivers/gpu/drm/xe/xe_gt_throttle_sysfs.h @@ -10,7 +10,7 @@ struct xe_gt; -void xe_gt_throttle_sysfs_init(struct xe_gt *gt); +int xe_gt_throttle_sysfs_init(struct xe_gt *gt); #endif /* _XE_GT_THROTTLE_SYSFS_H_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index e598a4363d..93df2d7969 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -11,7 +11,9 @@ #include "xe_gt_printk.h" #include "xe_guc.h" #include "xe_guc_ct.h" +#include "xe_mmio.h" #include "xe_trace.h" +#include "regs/xe_guc_regs.h" #define TLB_TIMEOUT (HZ / 4) @@ -209,7 +211,7 @@ static int send_tlb_invalidation(struct xe_guc *guc, * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success, * negative error code on error. */ -int xe_gt_tlb_invalidation_guc(struct xe_gt *gt) +static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt) { u32 action[] = { XE_GUC_ACTION_TLB_INVALIDATION, @@ -222,6 +224,45 @@ int xe_gt_tlb_invalidation_guc(struct xe_gt *gt) } /** + * xe_gt_tlb_invalidation_ggtt - Issue a TLB invalidation on this GT for the GGTT + * @gt: graphics tile + * + * Issue a TLB invalidation for the GGTT. Completion of TLB invalidation is + * synchronous. + * + * Return: 0 on success, negative error code on error + */ +int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + if (xe_guc_ct_enabled(>->uc.guc.ct) && + gt->uc.guc.submission_state.enabled) { + int seqno; + + seqno = xe_gt_tlb_invalidation_guc(gt); + if (seqno <= 0) + return seqno; + + xe_gt_tlb_invalidation_wait(gt, seqno); + } else if (xe_device_uc_enabled(xe)) { + xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GT)); + if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) { + xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1, + PVC_GUC_TLB_INV_DESC1_INVALIDATE); + xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC0, + PVC_GUC_TLB_INV_DESC0_VALID); + } else { + xe_mmio_write32(gt, GUC_TLB_INV_CR, + GUC_TLB_INV_CR_INVALIDATE); + } + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); + } + + return 0; +} + +/** * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA * @gt: graphics tile * @fence: invalidation fence which will be signal on TLB invalidation diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h index b333c17093..fbb743d80d 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h @@ -16,7 +16,7 @@ struct xe_vma; int xe_gt_tlb_invalidation_init(struct xe_gt *gt); void xe_gt_tlb_invalidation_reset(struct xe_gt *gt); -int xe_gt_tlb_invalidation_guc(struct xe_gt *gt); +int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt); int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, struct xe_vma *vma); diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c index 5dc62fe1be..3733e7a686 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.c +++ b/drivers/gpu/drm/xe/xe_gt_topology.c @@ -8,12 +8,10 @@ #include <linux/bitmap.h> #include "regs/xe_gt_regs.h" +#include "xe_assert.h" #include "xe_gt.h" #include "xe_mmio.h" -#define XE_MAX_DSS_FUSE_BITS (32 * XE_MAX_DSS_FUSE_REGS) -#define XE_MAX_EU_FUSE_BITS (32 * XE_MAX_EU_FUSE_REGS) - static void load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...) { @@ -62,6 +60,114 @@ load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask) bitmap_from_arr32(mask, &val, XE_MAX_EU_FUSE_BITS); } +/** + * gen_l3_mask_from_pattern - Replicate a bit pattern according to a mask + * + * It is used to compute the L3 bank masks in a generic format on + * various platforms where the internal representation of L3 node + * and masks from registers are different. + * + * @xe: device + * @dst: destination + * @pattern: pattern to replicate + * @patternbits: size of the pattern, in bits + * @mask: mask describing where to replicate the pattern + * + * Example 1: + * ---------- + * @pattern = 0b1111 + * └┬─┘ + * @patternbits = 4 (bits) + * @mask = 0b0101 + * ││││ + * │││└────────────────── 0b1111 (=1×0b1111) + * ││└──────────── 0b0000 │ (=0×0b1111) + * │└────── 0b1111 │ │ (=1×0b1111) + * └ 0b0000 │ │ │ (=0×0b1111) + * │ │ │ │ + * @dst = 0b0000 0b1111 0b0000 0b1111 + * + * Example 2: + * ---------- + * @pattern = 0b11111111 + * └┬─────┘ + * @patternbits = 8 (bits) + * @mask = 0b10 + * ││ + * ││ + * ││ + * │└────────── 0b00000000 (=0×0b11111111) + * └ 0b11111111 │ (=1×0b11111111) + * │ │ + * @dst = 0b11111111 0b00000000 + */ +static void +gen_l3_mask_from_pattern(struct xe_device *xe, xe_l3_bank_mask_t dst, + xe_l3_bank_mask_t pattern, int patternbits, + unsigned long mask) +{ + unsigned long bit; + + xe_assert(xe, fls(mask) <= patternbits); + for_each_set_bit(bit, &mask, 32) { + xe_l3_bank_mask_t shifted_pattern = {}; + + bitmap_shift_left(shifted_pattern, pattern, bit * patternbits, + XE_MAX_L3_BANK_MASK_BITS); + bitmap_or(dst, dst, shifted_pattern, XE_MAX_L3_BANK_MASK_BITS); + } +} + +static void +load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask) +{ + struct xe_device *xe = gt_to_xe(gt); + u32 fuse3 = xe_mmio_read32(gt, MIRROR_FUSE3); + + if (GRAPHICS_VER(xe) >= 20) { + xe_l3_bank_mask_t per_node = {}; + u32 meml3_en = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, fuse3); + u32 bank_val = REG_FIELD_GET(XE2_GT_L3_MODE_MASK, fuse3); + + bitmap_from_arr32(per_node, &bank_val, 32); + gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 4, + meml3_en); + } else if (GRAPHICS_VERx100(xe) >= 1270) { + xe_l3_bank_mask_t per_node = {}; + xe_l3_bank_mask_t per_mask_bit = {}; + u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3); + u32 fuse4 = xe_mmio_read32(gt, XEHP_FUSE4); + u32 bank_val = REG_FIELD_GET(GT_L3_EXC_MASK, fuse4); + + bitmap_set_value8(per_mask_bit, 0x3, 0); + gen_l3_mask_from_pattern(xe, per_node, per_mask_bit, 2, bank_val); + gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 4, + meml3_en); + } else if (xe->info.platform == XE_PVC) { + xe_l3_bank_mask_t per_node = {}; + xe_l3_bank_mask_t per_mask_bit = {}; + u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3); + u32 bank_val = REG_FIELD_GET(XEHPC_GT_L3_MODE_MASK, fuse3); + + bitmap_set_value8(per_mask_bit, 0xf, 0); + gen_l3_mask_from_pattern(xe, per_node, per_mask_bit, 4, + bank_val); + gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 16, + meml3_en); + } else if (xe->info.platform == XE_DG2) { + xe_l3_bank_mask_t per_node = {}; + u32 mask = REG_FIELD_GET(MEML3_EN_MASK, fuse3); + + bitmap_set_value8(per_node, 0xff, 0); + gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 8, mask); + } else { + /* 1:1 register bit to mask bit (inverted register bits) */ + u32 mask = REG_FIELD_GET(XELP_GT_L3_MODE_MASK, ~fuse3); + + bitmap_from_arr32(l3_bank_mask, &mask, 32); + } +} + static void get_num_dss_regs(struct xe_device *xe, int *geometry_regs, int *compute_regs) { @@ -106,6 +212,7 @@ xe_gt_topology_init(struct xe_gt *gt) XEHPC_GT_COMPUTE_DSS_ENABLE_EXT, XE2_GT_COMPUTE_DSS_2); load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss); + load_l3_bank_mask(gt, gt->fuse_topo.l3_bank_mask); p = drm_dbg_printer(>_to_xe(gt)->drm, DRM_UT_DRIVER, "GT topology"); @@ -123,6 +230,8 @@ xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p) drm_printf(p, "EU mask per DSS: %*pb\n", XE_MAX_EU_FUSE_BITS, gt->fuse_topo.eu_mask_per_dss); + drm_printf(p, "L3 bank mask: %*pb\n", XE_MAX_L3_BANK_MASK_BITS, + gt->fuse_topo.l3_bank_mask); } /* diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h index d1b54fb52e..b3e357777a 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.h +++ b/drivers/gpu/drm/xe/xe_gt_topology.h @@ -8,6 +8,17 @@ #include "xe_gt_types.h" +/* + * Loop over each DSS with the bit is 1 in geometry or compute mask + * @dss: iterated DSS bit from the DSS mask + * @gt: GT structure + */ +#define for_each_dss(dss, gt) \ + for_each_or_bit((dss), \ + (gt)->fuse_topo.g_dss_mask, \ + (gt)->fuse_topo.c_dss_mask, \ + XE_MAX_DSS_FUSE_BITS) + struct drm_printer; void xe_gt_topology_init(struct xe_gt *gt); diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 07b2f724ec..cfdc761ff7 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -8,6 +8,7 @@ #include "xe_force_wake_types.h" #include "xe_gt_idle_types.h" +#include "xe_gt_sriov_pf_types.h" #include "xe_hw_engine_types.h" #include "xe_hw_fence_types.h" #include "xe_reg_sr_types.h" @@ -24,11 +25,15 @@ enum xe_gt_type { XE_GT_TYPE_MEDIA, }; -#define XE_MAX_DSS_FUSE_REGS 3 -#define XE_MAX_EU_FUSE_REGS 1 +#define XE_MAX_DSS_FUSE_REGS 3 +#define XE_MAX_DSS_FUSE_BITS (32 * XE_MAX_DSS_FUSE_REGS) +#define XE_MAX_EU_FUSE_REGS 1 +#define XE_MAX_EU_FUSE_BITS (32 * XE_MAX_EU_FUSE_REGS) +#define XE_MAX_L3_BANK_MASK_BITS 64 -typedef unsigned long xe_dss_mask_t[BITS_TO_LONGS(32 * XE_MAX_DSS_FUSE_REGS)]; -typedef unsigned long xe_eu_mask_t[BITS_TO_LONGS(32 * XE_MAX_EU_FUSE_REGS)]; +typedef unsigned long xe_dss_mask_t[BITS_TO_LONGS(XE_MAX_DSS_FUSE_BITS)]; +typedef unsigned long xe_eu_mask_t[BITS_TO_LONGS(XE_MAX_EU_FUSE_BITS)]; +typedef unsigned long xe_l3_bank_mask_t[BITS_TO_LONGS(XE_MAX_L3_BANK_MASK_BITS)]; struct xe_mmio_range { u32 start; @@ -138,6 +143,12 @@ struct xe_gt { u32 adj_offset; } mmio; + /** @sriov: virtualization data related to GT */ + union { + /** @sriov.pf: PF data. Valid only if driver is running as PF */ + struct xe_gt_sriov_pf pf; + } sriov; + /** * @reg_sr: table with registers to be restored on GT init/resume/reset */ @@ -325,6 +336,9 @@ struct xe_gt { /** @fuse_topo.eu_mask_per_dss: EU mask per DSS*/ xe_eu_mask_t eu_mask_per_dss; + + /** @fuse_topo.l3_bank_mask: L3 bank mask */ + xe_l3_bank_mask_t l3_bank_mask; } fuse_topo; /** @steering: register steering for individual HW units */ diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index a38f59b435..5faca4fc2f 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -12,11 +12,13 @@ #include "abi/guc_actions_abi.h" #include "abi/guc_errors_abi.h" #include "regs/xe_gt_regs.h" +#include "regs/xe_gtt_defs.h" #include "regs/xe_guc_regs.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_force_wake.h" #include "xe_gt.h" +#include "xe_gt_printk.h" #include "xe_guc_ads.h" #include "xe_guc_ct.h" #include "xe_guc_hwconfig.h" @@ -33,14 +35,13 @@ #include "xe_wa.h" #include "xe_wopcm.h" -/* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */ -#define GUC_GGTT_TOP 0xFEE00000 static u32 guc_bo_ggtt_addr(struct xe_guc *guc, struct xe_bo *bo) { struct xe_device *xe = guc_to_xe(guc); u32 addr = xe_bo_ggtt_addr(bo); + /* GuC addresses above GUC_GGTT_TOP don't map through the GTT */ xe_assert(xe, addr >= xe_wopcm_size(guc_to_xe(guc))); xe_assert(xe, addr < GUC_GGTT_TOP); xe_assert(xe, bo->size <= GUC_GGTT_TOP - addr); @@ -133,15 +134,10 @@ static u32 guc_ctl_ads_flags(struct xe_guc *guc) return flags; } -#define GUC_VER(maj, min, pat) (((maj) << 16) | ((min) << 8) | (pat)) - static u32 guc_ctl_wa_flags(struct xe_guc *guc) { struct xe_device *xe = guc_to_xe(guc); struct xe_gt *gt = guc_to_gt(guc); - struct xe_uc_fw *uc_fw = &guc->fw; - struct xe_uc_fw_version *version = &uc_fw->versions.found[XE_UC_FW_VER_RELEASE]; - u32 flags = 0; if (XE_WA(gt, 22012773006)) @@ -164,20 +160,15 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc) if (XE_WA(gt, 22012727170) || XE_WA(gt, 22012727685)) flags |= GUC_WA_CONTEXT_ISOLATION; - if ((XE_WA(gt, 16015675438) || XE_WA(gt, 18020744125)) && + if (XE_WA(gt, 18020744125) && !xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_RENDER)) flags |= GUC_WA_RCS_REGS_IN_CCS_REGS_LIST; if (XE_WA(gt, 1509372804)) flags |= GUC_WA_RENDER_RST_RC6_EXIT; - if (XE_WA(gt, 14018913170)) { - if (GUC_VER(version->major, version->minor, version->patch) >= GUC_VER(70, 7, 0)) - flags |= GUC_WA_ENABLE_TSC_CHECK_ON_RC6; - else - drm_dbg(&xe->drm, "Skip WA 14018913170: GUC version expected >= 70.7.0, found %u.%u.%u\n", - version->major, version->minor, version->patch); - } + if (XE_WA(gt, 14018913170)) + flags |= GUC_WA_ENABLE_TSC_CHECK_ON_RC6; return flags; } @@ -189,15 +180,23 @@ static u32 guc_ctl_devid(struct xe_guc *guc) return (((u32)xe->info.devid) << 16) | xe->info.revid; } -static void guc_init_params(struct xe_guc *guc) +static void guc_print_params(struct xe_guc *guc) { - struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); u32 *params = guc->params; int i; BUILD_BUG_ON(sizeof(guc->params) != GUC_CTL_MAX_DWORDS * sizeof(u32)); BUILD_BUG_ON(GUC_CTL_MAX_DWORDS + 2 != SOFT_SCRATCH_COUNT); + for (i = 0; i < GUC_CTL_MAX_DWORDS; i++) + xe_gt_dbg(gt, "GuC param[%2d] = 0x%08x\n", i, params[i]); +} + +static void guc_init_params(struct xe_guc *guc) +{ + u32 *params = guc->params; + params[GUC_CTL_LOG_PARAMS] = guc_ctl_log_params_flags(guc); params[GUC_CTL_FEATURE] = 0; params[GUC_CTL_DEBUG] = guc_ctl_debug_flags(guc); @@ -205,18 +204,12 @@ static void guc_init_params(struct xe_guc *guc) params[GUC_CTL_WA] = 0; params[GUC_CTL_DEVID] = guc_ctl_devid(guc); - for (i = 0; i < GUC_CTL_MAX_DWORDS; i++) - drm_dbg(&xe->drm, "GuC param[%2d] = 0x%08x\n", i, params[i]); + guc_print_params(guc); } static void guc_init_params_post_hwconfig(struct xe_guc *guc) { - struct xe_device *xe = guc_to_xe(guc); u32 *params = guc->params; - int i; - - BUILD_BUG_ON(sizeof(guc->params) != GUC_CTL_MAX_DWORDS * sizeof(u32)); - BUILD_BUG_ON(GUC_CTL_MAX_DWORDS + 2 != SOFT_SCRATCH_COUNT); params[GUC_CTL_LOG_PARAMS] = guc_ctl_log_params_flags(guc); params[GUC_CTL_FEATURE] = guc_ctl_feature_flags(guc); @@ -225,8 +218,7 @@ static void guc_init_params_post_hwconfig(struct xe_guc *guc) params[GUC_CTL_WA] = guc_ctl_wa_flags(guc); params[GUC_CTL_DEVID] = guc_ctl_devid(guc); - for (i = 0; i < GUC_CTL_MAX_DWORDS; i++) - drm_dbg(&xe->drm, "GuC param[%2d] = 0x%08x\n", i, params[i]); + guc_print_params(guc); } /* @@ -250,10 +242,11 @@ static void guc_write_params(struct xe_guc *guc) static void guc_fini(struct drm_device *drm, void *arg) { struct xe_guc *guc = arg; + struct xe_gt *gt = guc_to_gt(guc); - xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); + xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)); xe_uc_fini_hw(&guc_to_gt(guc)->uc); - xe_force_wake_put(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); + xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); } /** @@ -330,7 +323,7 @@ int xe_guc_init(struct xe_guc *guc) if (ret) goto out; - ret = drmm_add_action_or_reset(>_to_xe(gt)->drm, guc_fini, guc); + ret = drmm_add_action_or_reset(&xe->drm, guc_fini, guc); if (ret) goto out; @@ -343,7 +336,7 @@ int xe_guc_init(struct xe_guc *guc) return 0; out: - drm_err(&xe->drm, "GuC init failed with %d", ret); + xe_gt_err(gt, "GuC init failed with %pe\n", ERR_PTR(ret)); return ret; } @@ -380,7 +373,6 @@ int xe_guc_post_load_init(struct xe_guc *guc) int xe_guc_reset(struct xe_guc *guc) { - struct xe_device *xe = guc_to_xe(guc); struct xe_gt *gt = guc_to_gt(guc); u32 guc_status, gdrst; int ret; @@ -391,16 +383,14 @@ int xe_guc_reset(struct xe_guc *guc) ret = xe_mmio_wait32(gt, GDRST, GRDOM_GUC, 0, 5000, &gdrst, false); if (ret) { - drm_err(&xe->drm, "GuC reset timed out, GDRST=0x%8x\n", - gdrst); + xe_gt_err(gt, "GuC reset timed out, GDRST=%#x\n", gdrst); goto err_out; } guc_status = xe_mmio_read32(gt, GUC_STATUS); if (!(guc_status & GS_MIA_IN_RESET)) { - drm_err(&xe->drm, - "GuC status: 0x%x, MIA core expected to be in reset\n", - guc_status); + xe_gt_err(gt, "GuC status: %#x, MIA core expected to be in reset\n", + guc_status); ret = -EIO; goto err_out; } @@ -463,7 +453,7 @@ static int guc_xfer_rsa(struct xe_guc *guc) static int guc_wait_ucode(struct xe_guc *guc) { - struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); u32 status; int ret; @@ -484,35 +474,32 @@ static int guc_wait_ucode(struct xe_guc *guc) * 200ms. Even at slowest clock, this should be sufficient. And * in the working case, a larger timeout makes no difference. */ - ret = xe_mmio_wait32(guc_to_gt(guc), GUC_STATUS, GS_UKERNEL_MASK, + ret = xe_mmio_wait32(gt, GUC_STATUS, GS_UKERNEL_MASK, FIELD_PREP(GS_UKERNEL_MASK, XE_GUC_LOAD_STATUS_READY), 200000, &status, false); if (ret) { - struct drm_device *drm = &xe->drm; - - drm_info(drm, "GuC load failed: status = 0x%08X\n", status); - drm_info(drm, "GuC load failed: status: Reset = %d, BootROM = 0x%02X, UKernel = 0x%02X, MIA = 0x%02X, Auth = 0x%02X\n", - REG_FIELD_GET(GS_MIA_IN_RESET, status), - REG_FIELD_GET(GS_BOOTROM_MASK, status), - REG_FIELD_GET(GS_UKERNEL_MASK, status), - REG_FIELD_GET(GS_MIA_MASK, status), - REG_FIELD_GET(GS_AUTH_STATUS_MASK, status)); + xe_gt_info(gt, "GuC load failed: status = 0x%08X\n", status); + xe_gt_info(gt, "GuC status: Reset = %u, BootROM = %#X, UKernel = %#X, MIA = %#X, Auth = %#X\n", + REG_FIELD_GET(GS_MIA_IN_RESET, status), + REG_FIELD_GET(GS_BOOTROM_MASK, status), + REG_FIELD_GET(GS_UKERNEL_MASK, status), + REG_FIELD_GET(GS_MIA_MASK, status), + REG_FIELD_GET(GS_AUTH_STATUS_MASK, status)); if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) { - drm_info(drm, "GuC firmware signature verification failed\n"); + xe_gt_info(gt, "GuC firmware signature verification failed\n"); ret = -ENOEXEC; } if (REG_FIELD_GET(GS_UKERNEL_MASK, status) == XE_GUC_LOAD_STATUS_EXCEPTION) { - drm_info(drm, "GuC firmware exception. EIP: %#x\n", - xe_mmio_read32(guc_to_gt(guc), - SOFT_SCRATCH(13))); + xe_gt_info(gt, "GuC firmware exception. EIP: %#x\n", + xe_mmio_read32(gt, SOFT_SCRATCH(13))); ret = -ENXIO; } } else { - drm_dbg(&xe->drm, "GuC successfully loaded"); + xe_gt_dbg(gt, "GuC successfully loaded\n"); } return ret; @@ -604,6 +591,9 @@ static void guc_handle_mmio_msg(struct xe_guc *guc) struct xe_gt *gt = guc_to_gt(guc); u32 msg; + if (IS_SRIOV_VF(guc_to_xe(guc))) + return; + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); msg = xe_mmio_read32(gt, SOFT_SCRATCH(15)); @@ -612,12 +602,10 @@ static void guc_handle_mmio_msg(struct xe_guc *guc) xe_mmio_write32(gt, SOFT_SCRATCH(15), 0); if (msg & XE_GUC_RECV_MSG_CRASH_DUMP_POSTED) - drm_err(&guc_to_xe(guc)->drm, - "Received early GuC crash dump notification!\n"); + xe_gt_err(gt, "Received early GuC crash dump notification!\n"); if (msg & XE_GUC_RECV_MSG_EXCEPTION) - drm_err(&guc_to_xe(guc)->drm, - "Received early GuC exception notification!\n"); + xe_gt_err(gt, "Received early GuC exception notification!\n"); } static void guc_enable_irq(struct xe_guc *guc) @@ -668,15 +656,15 @@ int xe_guc_enable_communication(struct xe_guc *guc) int xe_guc_suspend(struct xe_guc *guc) { - int ret; + struct xe_gt *gt = guc_to_gt(guc); u32 action[] = { XE_GUC_ACTION_CLIENT_SOFT_RESET, }; + int ret; ret = xe_guc_mmio_send(guc, action, ARRAY_SIZE(action)); if (ret) { - drm_err(&guc_to_xe(guc)->drm, - "GuC suspend: CLIENT_SOFT_RESET fail: %d!\n", ret); + xe_gt_err(gt, "GuC suspend failed: %pe\n", ERR_PTR(ret)); return ret; } @@ -751,8 +739,8 @@ retry: 50000, &reply, false); if (ret) { timeout: - drm_err(&xe->drm, "mmio request %#x: no reply %#x\n", - request[0], reply); + xe_gt_err(gt, "GuC mmio request %#x: no reply %#x\n", + request[0], reply); return ret; } @@ -790,8 +778,8 @@ timeout: GUC_HXG_TYPE_NO_RESPONSE_RETRY) { u32 reason = FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, header); - drm_dbg(&xe->drm, "mmio request %#x: retrying, reason %#x\n", - request[0], reason); + xe_gt_dbg(gt, "GuC mmio request %#x: retrying, reason %#x\n", + request[0], reason); goto retry; } @@ -800,16 +788,16 @@ timeout: u32 hint = FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, header); u32 error = FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, header); - drm_err(&xe->drm, "mmio request %#x: failure %#x/%#x\n", - request[0], error, hint); + xe_gt_err(gt, "GuC mmio request %#x: failure %#x hint %#x\n", + request[0], error, hint); return -ENXIO; } if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) != GUC_HXG_TYPE_RESPONSE_SUCCESS) { proto: - drm_err(&xe->drm, "mmio request %#x: unexpected reply %#x\n", - request[0], header); + xe_gt_err(gt, "GuC mmio request %#x: unexpected reply %#x\n", + request[0], header); return -EPROTO; } diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 5339f8a490..7f5a523795 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -7,6 +7,8 @@ #include <drm/drm_managed.h> +#include <generated/xe_wa_oob.h> + #include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" #include "regs/xe_guc_regs.h" @@ -19,6 +21,7 @@ #include "xe_map.h" #include "xe_mmio.h" #include "xe_platform_types.h" +#include "xe_wa.h" /* Slack of a few additional entries per engine */ #define ADS_REGSET_EXTRA_MAX 8 @@ -80,6 +83,10 @@ ads_to_map(struct xe_guc_ads *ads) * +---------------------------------------+ * | padding | * +---------------------------------------+ <== 4K aligned + * | w/a KLVs | + * +---------------------------------------+ + * | padding | + * +---------------------------------------+ <== 4K aligned * | capture lists | * +---------------------------------------+ * | padding | @@ -131,6 +138,11 @@ static size_t guc_ads_golden_lrc_size(struct xe_guc_ads *ads) return PAGE_ALIGN(ads->golden_lrc_size); } +static u32 guc_ads_waklv_size(struct xe_guc_ads *ads) +{ + return PAGE_ALIGN(ads->ads_waklv_size); +} + static size_t guc_ads_capture_size(struct xe_guc_ads *ads) { /* FIXME: Allocate a proper capture list */ @@ -167,12 +179,22 @@ static size_t guc_ads_golden_lrc_offset(struct xe_guc_ads *ads) return PAGE_ALIGN(offset); } +static size_t guc_ads_waklv_offset(struct xe_guc_ads *ads) +{ + u32 offset; + + offset = guc_ads_golden_lrc_offset(ads) + + guc_ads_golden_lrc_size(ads); + + return PAGE_ALIGN(offset); +} + static size_t guc_ads_capture_offset(struct xe_guc_ads *ads) { size_t offset; - offset = guc_ads_golden_lrc_offset(ads) + - guc_ads_golden_lrc_size(ads); + offset = guc_ads_waklv_offset(ads) + + guc_ads_waklv_size(ads); return PAGE_ALIGN(offset); } @@ -260,6 +282,110 @@ static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads) return total_size; } +static void guc_waklv_enable_one_word(struct xe_guc_ads *ads, + enum xe_guc_klv_ids klv_id, + u32 value, + u32 *offset, u32 *remain) +{ + u32 size; + u32 klv_entry[] = { + /* 16:16 key/length */ + FIELD_PREP(GUC_KLV_0_KEY, klv_id) | + FIELD_PREP(GUC_KLV_0_LEN, 1), + value, + /* 1 dword data */ + }; + + size = sizeof(klv_entry); + + if (*remain < size) { + drm_warn(&ads_to_xe(ads)->drm, + "w/a klv buffer too small to add klv id %d\n", klv_id); + } else { + xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), *offset, + klv_entry, size); + *offset += size; + *remain -= size; + } +} + +static void guc_waklv_enable_simple(struct xe_guc_ads *ads, + enum xe_guc_klv_ids klv_id, u32 *offset, u32 *remain) +{ + u32 klv_entry[] = { + /* 16:16 key/length */ + FIELD_PREP(GUC_KLV_0_KEY, klv_id) | + FIELD_PREP(GUC_KLV_0_LEN, 0), + /* 0 dwords data */ + }; + u32 size; + + size = sizeof(klv_entry); + + if (xe_gt_WARN(ads_to_gt(ads), *remain < size, + "w/a klv buffer too small to add klv id %d\n", klv_id)) + return; + + xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), *offset, + klv_entry, size); + *offset += size; + *remain -= size; +} + +static void guc_waklv_init(struct xe_guc_ads *ads) +{ + struct xe_gt *gt = ads_to_gt(ads); + u64 addr_ggtt; + u32 offset, remain, size; + + offset = guc_ads_waklv_offset(ads); + remain = guc_ads_waklv_size(ads); + + if (XE_WA(gt, 14019882105)) + guc_waklv_enable_simple(ads, + GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED, + &offset, &remain); + if (XE_WA(gt, 18024947630)) + guc_waklv_enable_simple(ads, + GUC_WORKAROUND_KLV_ID_GAM_PFQ_SHADOW_TAIL_POLLING, + &offset, &remain); + if (XE_WA(gt, 16022287689)) + guc_waklv_enable_simple(ads, + GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE, + &offset, &remain); + + /* + * On RC6 exit, GuC will write register 0xB04 with the default value provided. As of now, + * the default value for this register is determined to be 0xC40. This could change in the + * future, so GuC depends on KMD to send it the correct value. + */ + if (XE_WA(gt, 13011645652)) + guc_waklv_enable_one_word(ads, + GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE, + 0xC40, + &offset, &remain); + + size = guc_ads_waklv_size(ads) - remain; + if (!size) + return; + + offset = guc_ads_waklv_offset(ads); + addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset; + + ads_blob_write(ads, ads.wa_klv_addr_lo, lower_32_bits(addr_ggtt)); + ads_blob_write(ads, ads.wa_klv_addr_hi, upper_32_bits(addr_ggtt)); + ads_blob_write(ads, ads.wa_klv_size, size); +} + +static int calculate_waklv_size(struct xe_guc_ads *ads) +{ + /* + * A single page is both the minimum size possible and + * is sufficiently large enough for all current platforms. + */ + return SZ_4K; +} + #define MAX_GOLDEN_LRC_SIZE (SZ_4K * 64) int xe_guc_ads_init(struct xe_guc_ads *ads) @@ -271,10 +397,12 @@ int xe_guc_ads_init(struct xe_guc_ads *ads) ads->golden_lrc_size = calculate_golden_lrc_size(ads); ads->regset_size = calculate_regset_size(gt); + ads->ads_waklv_size = calculate_waklv_size(ads); bo = xe_managed_bo_create_pin_map(xe, tile, guc_ads_size(ads) + MAX_GOLDEN_LRC_SIZE, - XE_BO_CREATE_SYSTEM_BIT | - XE_BO_CREATE_GGTT_BIT); + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE); if (IS_ERR(bo)) return PTR_ERR(bo); @@ -597,6 +725,7 @@ void xe_guc_ads_populate(struct xe_guc_ads *ads) guc_mapping_table_init(gt, &info_map); guc_capture_list_init(ads); guc_doorbell_init(ads); + guc_waklv_init(ads); if (xe->info.has_usm) { guc_um_init_params(ads); diff --git a/drivers/gpu/drm/xe/xe_guc_ads_types.h b/drivers/gpu/drm/xe/xe_guc_ads_types.h index 4afe44bece..2de5decfe0 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads_types.h +++ b/drivers/gpu/drm/xe/xe_guc_ads_types.h @@ -20,6 +20,8 @@ struct xe_guc_ads { size_t golden_lrc_size; /** @regset_size: size of register set passed to GuC for save/restore */ u32 regset_size; + /** @ads_waklv_size: total waklv size supported by platform */ + u32 ads_waklv_size; }; #endif diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 8bbfa45798..0151d29b3c 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -21,6 +21,7 @@ #include "xe_gt.h" #include "xe_gt_pagefault.h" #include "xe_gt_printk.h" +#include "xe_gt_sriov_pf_control.h" #include "xe_gt_tlb_invalidation.h" #include "xe_guc.h" #include "xe_guc_relay.h" @@ -144,7 +145,7 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) struct xe_bo *bo; int err; - xe_assert(xe, !(guc_ct_size() % PAGE_SIZE)); + xe_gt_assert(gt, !(guc_ct_size() % PAGE_SIZE)); ct->g2h_wq = alloc_ordered_workqueue("xe-g2h-wq", 0); if (!ct->g2h_wq) @@ -163,8 +164,9 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) primelockdep(ct); bo = xe_managed_bo_create_pin_map(xe, tile, guc_ct_size(), - XE_BO_CREATE_SYSTEM_BIT | - XE_BO_CREATE_GGTT_BIT); + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE); if (IS_ERR(bo)) return PTR_ERR(bo); @@ -174,7 +176,7 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) if (err) return err; - xe_assert(xe, ct->state == XE_GUC_CT_STATE_NOT_INITIALIZED); + xe_gt_assert(gt, ct->state == XE_GUC_CT_STATE_NOT_INITIALIZED); ct->state = XE_GUC_CT_STATE_DISABLED; return 0; } @@ -321,9 +323,10 @@ static void xe_guc_ct_set_state(struct xe_guc_ct *ct, int xe_guc_ct_enable(struct xe_guc_ct *ct) { struct xe_device *xe = ct_to_xe(ct); + struct xe_gt *gt = ct_to_gt(ct); int err; - xe_assert(xe, !xe_guc_ct_enabled(ct)); + xe_gt_assert(gt, !xe_guc_ct_enabled(ct)); guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap); guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap); @@ -344,12 +347,12 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct) smp_mb(); wake_up_all(&ct->wq); - drm_dbg(&xe->drm, "GuC CT communication channel enabled\n"); + xe_gt_dbg(gt, "GuC CT communication channel enabled\n"); return 0; err_out: - drm_err(&xe->drm, "Failed to enable CT (%d)\n", err); + xe_gt_err(gt, "Failed to enable GuC CT (%pe)\n", ERR_PTR(err)); return err; } @@ -430,7 +433,7 @@ static void h2g_reserve_space(struct xe_guc_ct *ct, u32 cmd_len) static void __g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h) { - xe_assert(ct_to_xe(ct), g2h_len <= ct->ctbs.g2h.info.space); + xe_gt_assert(ct_to_gt(ct), g2h_len <= ct->ctbs.g2h.info.space); if (g2h_len) { lockdep_assert_held(&ct->fast_lock); @@ -443,8 +446,8 @@ static void __g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h) static void __g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len) { lockdep_assert_held(&ct->fast_lock); - xe_assert(ct_to_xe(ct), ct->ctbs.g2h.info.space + g2h_len <= - ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space); + xe_gt_assert(ct_to_gt(ct), ct->ctbs.g2h.info.space + g2h_len <= + ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space); ct->ctbs.g2h.info.space += g2h_len; --ct->g2h_outstanding; @@ -463,6 +466,7 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, u32 ct_fence_value, bool want_response) { struct xe_device *xe = ct_to_xe(ct); + struct xe_gt *gt = ct_to_gt(ct); struct guc_ctb *h2g = &ct->ctbs.h2g; u32 cmd[H2G_CT_HEADERS]; u32 tail = h2g->info.tail; @@ -473,8 +477,8 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, full_len = len + GUC_CTB_HDR_LEN; lockdep_assert_held(&ct->lock); - xe_assert(xe, full_len <= GUC_CTB_MSG_MAX_LEN); - xe_assert(xe, tail <= h2g->info.size); + xe_gt_assert(gt, full_len <= GUC_CTB_MSG_MAX_LEN); + xe_gt_assert(gt, tail <= h2g->info.size); /* Command will wrap, zero fill (NOPs), return and check credits again */ if (tail + full_len > h2g->info.size) { @@ -523,7 +527,7 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, /* Update descriptor */ desc_write(xe, h2g, tail, h2g->info.tail); - trace_xe_guc_ctb_h2g(ct_to_gt(ct)->info.id, *(action - 1), full_len, + trace_xe_guc_ctb_h2g(gt->info.id, *(action - 1), full_len, desc_read(xe, h2g, head), h2g->info.tail); return 0; @@ -552,15 +556,15 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len, u32 g2h_len, u32 num_g2h, struct g2h_fence *g2h_fence) { - struct xe_device *xe = ct_to_xe(ct); + struct xe_gt *gt __maybe_unused = ct_to_gt(ct); u16 seqno; int ret; - xe_assert(xe, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED); - xe_assert(xe, !g2h_len || !g2h_fence); - xe_assert(xe, !num_g2h || !g2h_fence); - xe_assert(xe, !g2h_len || num_g2h); - xe_assert(xe, g2h_len || !num_g2h); + xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED); + xe_gt_assert(gt, !g2h_len || !g2h_fence); + xe_gt_assert(gt, !num_g2h || !g2h_fence); + xe_gt_assert(gt, !g2h_len || num_g2h); + xe_gt_assert(gt, g2h_len || !num_g2h); lockdep_assert_held(&ct->lock); if (unlikely(ct->ctbs.h2g.info.broken)) { @@ -578,7 +582,7 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, goto out; } - xe_assert(xe, xe_guc_ct_enabled(ct)); + xe_gt_assert(gt, xe_guc_ct_enabled(ct)); if (g2h_fence) { g2h_len = GUC_CTB_HXG_MSG_MAX_LEN; @@ -636,12 +640,12 @@ static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len, u32 g2h_len, u32 num_g2h, struct g2h_fence *g2h_fence) { - struct drm_device *drm = &ct_to_xe(ct)->drm; - struct drm_printer p = drm_info_printer(drm->dev); + struct xe_gt *gt = ct_to_gt(ct); + struct drm_printer p = xe_gt_info_printer(gt); unsigned int sleep_period_ms = 1; int ret; - xe_assert(ct_to_xe(ct), !g2h_len || !g2h_fence); + xe_gt_assert(gt, !g2h_len || !g2h_fence); lockdep_assert_held(&ct->lock); xe_device_assert_mem_access(ct_to_xe(ct)); @@ -699,7 +703,7 @@ try_again: return ret; broken: - drm_err(drm, "No forward process on H2G, reset required"); + xe_gt_err(gt, "No forward process on H2G, reset required\n"); xe_guc_ct_print(ct, &p, true); ct->ctbs.h2g.info.broken = true; @@ -711,7 +715,7 @@ static int guc_ct_send(struct xe_guc_ct *ct, const u32 *action, u32 len, { int ret; - xe_assert(ct_to_xe(ct), !g2h_len || !g2h_fence); + xe_gt_assert(ct_to_gt(ct), !g2h_len || !g2h_fence); mutex_lock(&ct->lock); ret = guc_ct_send_locked(ct, action, len, g2h_len, num_g2h, g2h_fence); @@ -779,7 +783,7 @@ static bool retry_failure(struct xe_guc_ct *ct, int ret) static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len, u32 *response_buffer, bool no_fail) { - struct xe_device *xe = ct_to_xe(ct); + struct xe_gt *gt = ct_to_gt(ct); struct g2h_fence g2h_fence; int ret = 0; @@ -821,20 +825,20 @@ retry_same_fence: ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ); if (!ret) { - drm_err(&xe->drm, "Timed out wait for G2H, fence %u, action %04x", - g2h_fence.seqno, action[0]); + xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x", + g2h_fence.seqno, action[0]); xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno); return -ETIME; } if (g2h_fence.retry) { - drm_warn(&xe->drm, "Send retry, action 0x%04x, reason %d", - action[0], g2h_fence.reason); + xe_gt_warn(gt, "H2G retry, action 0x%04x, reason %u", + action[0], g2h_fence.reason); goto retry; } if (g2h_fence.fail) { - drm_err(&xe->drm, "Send failed, action 0x%04x, error %d, hint %d", - action[0], g2h_fence.error, g2h_fence.hint); + xe_gt_err(gt, "H2G send failed, action 0x%04x, error %d, hint %u", + action[0], g2h_fence.error, g2h_fence.hint); ret = -EIO; } @@ -903,7 +907,6 @@ static int parse_g2h_event(struct xe_guc_ct *ct, u32 *msg, u32 len) static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len) { struct xe_gt *gt = ct_to_gt(ct); - struct xe_device *xe = gt_to_xe(gt); u32 *hxg = msg_to_hxg(msg); u32 hxg_len = msg_len_to_hxg_len(len); u32 fence = FIELD_GET(GUC_CTB_MSG_0_FENCE, msg[0]); @@ -941,7 +944,7 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len) return 0; } - xe_assert(xe, fence == g2h_fence->seqno); + xe_gt_assert(gt, fence == g2h_fence->seqno); if (type == GUC_HXG_TYPE_RESPONSE_FAILURE) { g2h_fence->fail = true; @@ -969,7 +972,7 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len) static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) { - struct xe_device *xe = ct_to_xe(ct); + struct xe_gt *gt = ct_to_gt(ct); u32 *hxg = msg_to_hxg(msg); u32 origin, type; int ret; @@ -978,9 +981,8 @@ static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) origin = FIELD_GET(GUC_HXG_MSG_0_ORIGIN, hxg[0]); if (unlikely(origin != GUC_HXG_ORIGIN_GUC)) { - drm_err(&xe->drm, - "G2H channel broken on read, origin=%d, reset required\n", - origin); + xe_gt_err(gt, "G2H channel broken on read, origin=%u, reset required\n", + origin); ct->ctbs.g2h.info.broken = true; return -EPROTO; @@ -997,9 +999,8 @@ static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) ret = parse_g2h_response(ct, msg, len); break; default: - drm_err(&xe->drm, - "G2H channel broken on read, type=%d, reset required\n", - type); + xe_gt_err(gt, "G2H channel broken on read, type=%u, reset required\n", + type); ct->ctbs.g2h.info.broken = true; ret = -EOPNOTSUPP; @@ -1010,8 +1011,8 @@ static int parse_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) { - struct xe_device *xe = ct_to_xe(ct); struct xe_guc *guc = ct_to_guc(ct); + struct xe_gt *gt = ct_to_gt(ct); u32 hxg_len = msg_len_to_hxg_len(len); u32 *hxg = msg_to_hxg(msg); u32 action, adj_len; @@ -1067,13 +1068,16 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) case XE_GUC_ACTION_GUC2VF_RELAY_FROM_PF: ret = xe_guc_relay_process_guc2vf(&guc->relay, hxg, hxg_len); break; + case GUC_ACTION_GUC2PF_VF_STATE_NOTIFY: + ret = xe_gt_sriov_pf_control_process_guc2pf(gt, hxg, hxg_len); + break; default: - drm_err(&xe->drm, "unexpected action 0x%04x\n", action); + xe_gt_err(gt, "unexpected G2H action 0x%04x\n", action); } if (ret) - drm_err(&xe->drm, "action 0x%04x failed processing, ret=%d\n", - action, ret); + xe_gt_err(gt, "G2H action 0x%04x failed (%pe)\n", + action, ERR_PTR(ret)); return 0; } @@ -1081,13 +1085,14 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path) { struct xe_device *xe = ct_to_xe(ct); + struct xe_gt *gt = ct_to_gt(ct); struct guc_ctb *g2h = &ct->ctbs.g2h; u32 tail, head, len; s32 avail; u32 action; u32 *hxg; - xe_assert(xe, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED); + xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED); lockdep_assert_held(&ct->fast_lock); if (ct->state == XE_GUC_CT_STATE_DISABLED) @@ -1099,7 +1104,7 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path) if (g2h->info.broken) return -EPIPE; - xe_assert(xe, xe_guc_ct_enabled(ct)); + xe_gt_assert(gt, xe_guc_ct_enabled(ct)); /* Calculate DW available to read */ tail = desc_read(xe, g2h, tail); @@ -1115,9 +1120,8 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path) sizeof(u32)); len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, msg[0]) + GUC_CTB_MSG_MIN_LEN; if (len > avail) { - drm_err(&xe->drm, - "G2H channel broken on read, avail=%d, len=%d, reset required\n", - avail, len); + xe_gt_err(gt, "G2H channel broken on read, avail=%d, len=%d, reset required\n", + avail, len); g2h->info.broken = true; return -EPROTO; @@ -1170,7 +1174,7 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path) static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len) { - struct xe_device *xe = ct_to_xe(ct); + struct xe_gt *gt = ct_to_gt(ct); struct xe_guc *guc = ct_to_guc(ct); u32 hxg_len = msg_len_to_hxg_len(len); u32 *hxg = msg_to_hxg(msg); @@ -1189,12 +1193,12 @@ static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len) adj_len); break; default: - drm_warn(&xe->drm, "NOT_POSSIBLE"); + xe_gt_warn(gt, "NOT_POSSIBLE"); } if (ret) - drm_err(&xe->drm, "action 0x%04x failed processing, ret=%d\n", - action, ret); + xe_gt_err(gt, "G2H action 0x%04x failed (%pe)\n", + action, ERR_PTR(ret)); } /** @@ -1211,7 +1215,7 @@ void xe_guc_ct_fast_path(struct xe_guc_ct *ct) bool ongoing; int len; - ongoing = xe_device_mem_access_get_if_ongoing(ct_to_xe(ct)); + ongoing = xe_pm_runtime_get_if_active(ct_to_xe(ct)); if (!ongoing && xe_pm_read_callback_task(ct_to_xe(ct)) == NULL) return; @@ -1224,7 +1228,7 @@ void xe_guc_ct_fast_path(struct xe_guc_ct *ct) spin_unlock(&ct->fast_lock); if (ongoing) - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); } /* Returns less than zero on error, 0 on done, 1 on more available */ @@ -1255,6 +1259,7 @@ static int dequeue_one_g2h(struct xe_guc_ct *ct) static void g2h_worker_func(struct work_struct *w) { struct xe_guc_ct *ct = container_of(w, struct xe_guc_ct, g2h_worker); + struct xe_gt *gt = ct_to_gt(ct); bool ongoing; int ret; @@ -1281,7 +1286,7 @@ static void g2h_worker_func(struct work_struct *w) * responses, if the worker here is blocked on those callbacks * completing, creating a deadlock. */ - ongoing = xe_device_mem_access_get_if_ongoing(ct_to_xe(ct)); + ongoing = xe_pm_runtime_get_if_active(ct_to_xe(ct)); if (!ongoing && xe_pm_read_callback_task(ct_to_xe(ct)) == NULL) return; @@ -1291,8 +1296,7 @@ static void g2h_worker_func(struct work_struct *w) mutex_unlock(&ct->lock); if (unlikely(ret == -EPROTO || ret == -EOPNOTSUPP)) { - struct drm_device *drm = &ct_to_xe(ct)->drm; - struct drm_printer p = drm_info_printer(drm->dev); + struct drm_printer p = xe_gt_info_printer(gt); xe_guc_ct_print(ct, &p, false); kick_reset(ct); @@ -1300,7 +1304,7 @@ static void g2h_worker_func(struct work_struct *w) } while (ret == 1); if (ongoing) - xe_device_mem_access_put(ct_to_xe(ct)); + xe_pm_runtime_put(ct_to_xe(ct)); } static void guc_ctb_snapshot_capture(struct xe_device *xe, struct guc_ctb *ctb, @@ -1402,7 +1406,7 @@ struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct, return NULL; } - if (xe_guc_ct_enabled(ct)) { + if (xe_guc_ct_enabled(ct) || ct->state == XE_GUC_CT_STATE_STOPPED) { snapshot->ct_enabled = true; snapshot->g2h_outstanding = READ_ONCE(ct->g2h_outstanding); guc_ctb_snapshot_capture(xe, &ct->ctbs.h2g, diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c index ffd7d53bcc..d3822cbea2 100644 --- a/drivers/gpu/drm/xe/xe_guc_debugfs.c +++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c @@ -14,6 +14,7 @@ #include "xe_guc_ct.h" #include "xe_guc_log.h" #include "xe_macros.h" +#include "xe_pm.h" static struct xe_guc *node_to_guc(struct drm_info_node *node) { @@ -26,9 +27,9 @@ static int guc_info(struct seq_file *m, void *data) struct xe_device *xe = guc_to_xe(guc); struct drm_printer p = drm_seq_file_printer(m); - xe_device_mem_access_get(xe); + xe_pm_runtime_get(xe); xe_guc_print_info(guc, &p); - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); return 0; } @@ -39,9 +40,9 @@ static int guc_log(struct seq_file *m, void *data) struct xe_device *xe = guc_to_xe(guc); struct drm_printer p = drm_seq_file_printer(m); - xe_device_mem_access_get(xe); + xe_pm_runtime_get(xe); xe_guc_log_print(&guc->log, &p); - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); return 0; } diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h index c281fdbfd2..19ee71aeaf 100644 --- a/drivers/gpu/drm/xe/xe_guc_fwif.h +++ b/drivers/gpu/drm/xe/xe_guc_fwif.h @@ -14,6 +14,8 @@ #define G2H_LEN_DW_DEREGISTER_CONTEXT 3 #define G2H_LEN_DW_TLB_INVALIDATE 3 +#define GUC_ID_MAX 65535 + #define GUC_CONTEXT_DISABLE 0 #define GUC_CONTEXT_ENABLE 1 @@ -207,7 +209,10 @@ struct guc_ads { u32 capture_instance[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES]; u32 capture_class[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES]; u32 capture_global[GUC_CAPTURE_LIST_INDEX_MAX]; - u32 reserved[14]; + u32 wa_klv_addr_lo; + u32 wa_klv_addr_hi; + u32 wa_klv_size; + u32 reserved[11]; } __packed; /* Engine usage stats */ diff --git a/drivers/gpu/drm/xe/xe_guc_hwconfig.c b/drivers/gpu/drm/xe/xe_guc_hwconfig.c index ea49f3885c..d9b570a154 100644 --- a/drivers/gpu/drm/xe/xe_guc_hwconfig.c +++ b/drivers/gpu/drm/xe/xe_guc_hwconfig.c @@ -14,7 +14,7 @@ #include "xe_guc.h" #include "xe_map.h" -static int send_get_hwconfig(struct xe_guc *guc, u32 ggtt_addr, u32 size) +static int send_get_hwconfig(struct xe_guc *guc, u64 ggtt_addr, u32 size) { u32 action[] = { XE_GUC_ACTION_GET_HWCONFIG, @@ -78,8 +78,9 @@ int xe_guc_hwconfig_init(struct xe_guc *guc) return -EINVAL; bo = xe_managed_bo_create_pin_map(xe, tile, PAGE_ALIGN(size), - XE_BO_CREATE_SYSTEM_BIT | - XE_BO_CREATE_GGTT_BIT); + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE); if (IS_ERR(bo)) return PTR_ERR(bo); guc->hwconfig.bo = bo; diff --git a/drivers/gpu/drm/xe/xe_guc_id_mgr.c b/drivers/gpu/drm/xe/xe_guc_id_mgr.c new file mode 100644 index 0000000000..0fb7c6b78c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_id_mgr.c @@ -0,0 +1,279 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2024 Intel Corporation + */ + +#include <linux/bitmap.h> +#include <linux/mutex.h> + +#include <drm/drm_managed.h> + +#include "xe_assert.h" +#include "xe_gt_printk.h" +#include "xe_guc.h" +#include "xe_guc_id_mgr.h" +#include "xe_guc_types.h" + +static struct xe_guc *idm_to_guc(struct xe_guc_id_mgr *idm) +{ + return container_of(idm, struct xe_guc, submission_state.idm); +} + +static struct xe_gt *idm_to_gt(struct xe_guc_id_mgr *idm) +{ + return guc_to_gt(idm_to_guc(idm)); +} + +static struct xe_device *idm_to_xe(struct xe_guc_id_mgr *idm) +{ + return gt_to_xe(idm_to_gt(idm)); +} + +#define idm_assert(idm, cond) xe_gt_assert(idm_to_gt(idm), cond) +#define idm_mutex(idm) (&idm_to_guc(idm)->submission_state.lock) + +static void idm_print_locked(struct xe_guc_id_mgr *idm, struct drm_printer *p, int indent); + +static void __fini_idm(struct drm_device *drm, void *arg) +{ + struct xe_guc_id_mgr *idm = arg; + + mutex_lock(idm_mutex(idm)); + + if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) { + unsigned int weight = bitmap_weight(idm->bitmap, idm->total); + + if (weight) { + struct drm_printer p = xe_gt_info_printer(idm_to_gt(idm)); + + xe_gt_err(idm_to_gt(idm), "GUC ID manager unclean (%u/%u)\n", + weight, idm->total); + idm_print_locked(idm, &p, 1); + } + } + + bitmap_free(idm->bitmap); + idm->bitmap = NULL; + idm->total = 0; + idm->used = 0; + + mutex_unlock(idm_mutex(idm)); +} + +/** + * xe_guc_id_mgr_init() - Initialize GuC context ID Manager. + * @idm: the &xe_guc_id_mgr to initialize + * @limit: number of IDs to manage + * + * The bare-metal or PF driver can pass ~0 as &limit to indicate that all + * context IDs supported by the GuC firmware are available for use. + * + * Only VF drivers will have to provide explicit number of context IDs + * that they can use. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_guc_id_mgr_init(struct xe_guc_id_mgr *idm, unsigned int limit) +{ + int ret; + + idm_assert(idm, !idm->bitmap); + idm_assert(idm, !idm->total); + idm_assert(idm, !idm->used); + + if (limit == ~0) + limit = GUC_ID_MAX; + else if (limit > GUC_ID_MAX) + return -ERANGE; + else if (!limit) + return -EINVAL; + + idm->bitmap = bitmap_zalloc(limit, GFP_KERNEL); + if (!idm->bitmap) + return -ENOMEM; + idm->total = limit; + + ret = drmm_add_action_or_reset(&idm_to_xe(idm)->drm, __fini_idm, idm); + if (ret) + return ret; + + xe_gt_info(idm_to_gt(idm), "using %u GUC ID(s)\n", idm->total); + return 0; +} + +static unsigned int find_last_zero_area(unsigned long *bitmap, + unsigned int total, + unsigned int count) +{ + unsigned int found = total; + unsigned int rs, re, range; + + for_each_clear_bitrange(rs, re, bitmap, total) { + range = re - rs; + if (range < count) + continue; + found = rs + (range - count); + } + return found; +} + +static int idm_reserve_chunk_locked(struct xe_guc_id_mgr *idm, + unsigned int count, unsigned int retain) +{ + int id; + + idm_assert(idm, count); + lockdep_assert_held(idm_mutex(idm)); + + if (!idm->total) + return -ENODATA; + + if (retain) { + /* + * For IDs reservations (used on PF for VFs) we want to make + * sure there will be at least 'retain' available for the PF + */ + if (idm->used + count + retain > idm->total) + return -EDQUOT; + /* + * ... and we want to reserve highest IDs close to the end. + */ + id = find_last_zero_area(idm->bitmap, idm->total, count); + } else { + /* + * For regular IDs reservations (used by submission code) + * we start searching from the lower range of IDs. + */ + id = bitmap_find_next_zero_area(idm->bitmap, idm->total, 0, count, 0); + } + if (id >= idm->total) + return -ENOSPC; + + bitmap_set(idm->bitmap, id, count); + idm->used += count; + + return id; +} + +static void idm_release_chunk_locked(struct xe_guc_id_mgr *idm, + unsigned int start, unsigned int count) +{ + idm_assert(idm, count); + idm_assert(idm, count <= idm->used); + idm_assert(idm, start < idm->total); + idm_assert(idm, start + count - 1 < idm->total); + lockdep_assert_held(idm_mutex(idm)); + + if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) { + unsigned int n; + + for (n = 0; n < count; n++) + idm_assert(idm, test_bit(start + n, idm->bitmap)); + } + bitmap_clear(idm->bitmap, start, count); + idm->used -= count; +} + +/** + * xe_guc_id_mgr_reserve_locked() - Reserve one or more GuC context IDs. + * @idm: the &xe_guc_id_mgr + * @count: number of IDs to allocate (can't be 0) + * + * This function is dedicated for the use by the GuC submission code, + * where submission lock is already taken. + * + * Return: ID of allocated GuC context or a negative error code on failure. + */ +int xe_guc_id_mgr_reserve_locked(struct xe_guc_id_mgr *idm, unsigned int count) +{ + return idm_reserve_chunk_locked(idm, count, 0); +} + +/** + * xe_guc_id_mgr_release_locked() - Release one or more GuC context IDs. + * @idm: the &xe_guc_id_mgr + * @id: the GuC context ID to release + * @count: number of IDs to release (can't be 0) + * + * This function is dedicated for the use by the GuC submission code, + * where submission lock is already taken. + */ +void xe_guc_id_mgr_release_locked(struct xe_guc_id_mgr *idm, unsigned int id, + unsigned int count) +{ + return idm_release_chunk_locked(idm, id, count); +} + +/** + * xe_guc_id_mgr_reserve() - Reserve a range of GuC context IDs. + * @idm: the &xe_guc_id_mgr + * @count: number of GuC context IDs to reserve (can't be 0) + * @retain: number of GuC context IDs to keep available (can't be 0) + * + * This function is dedicated for the use by the PF driver which expects that + * reserved range of IDs will be contiguous and that there will be at least + * &retain IDs still available for the PF after this reservation. + * + * Return: starting ID of the allocated GuC context ID range or + * a negative error code on failure. + */ +int xe_guc_id_mgr_reserve(struct xe_guc_id_mgr *idm, + unsigned int count, unsigned int retain) +{ + int ret; + + idm_assert(idm, count); + idm_assert(idm, retain); + + mutex_lock(idm_mutex(idm)); + ret = idm_reserve_chunk_locked(idm, count, retain); + mutex_unlock(idm_mutex(idm)); + + return ret; +} + +/** + * xe_guc_id_mgr_release() - Release a range of GuC context IDs. + * @idm: the &xe_guc_id_mgr + * @start: the starting ID of GuC context range to release + * @count: number of GuC context IDs to release + */ +void xe_guc_id_mgr_release(struct xe_guc_id_mgr *idm, + unsigned int start, unsigned int count) +{ + mutex_lock(idm_mutex(idm)); + idm_release_chunk_locked(idm, start, count); + mutex_unlock(idm_mutex(idm)); +} + +static void idm_print_locked(struct xe_guc_id_mgr *idm, struct drm_printer *p, int indent) +{ + unsigned int rs, re; + + lockdep_assert_held(idm_mutex(idm)); + + drm_printf_indent(p, indent, "total %u\n", idm->total); + if (!idm->bitmap) + return; + + drm_printf_indent(p, indent, "used %u\n", idm->used); + for_each_set_bitrange(rs, re, idm->bitmap, idm->total) + drm_printf_indent(p, indent, "range %u..%u (%u)\n", rs, re - 1, re - rs); +} + +/** + * xe_guc_id_mgr_print() - Print status of GuC ID Manager. + * @idm: the &xe_guc_id_mgr to print + * @p: the &drm_printer to print to + * @indent: tab indentation level + */ +void xe_guc_id_mgr_print(struct xe_guc_id_mgr *idm, struct drm_printer *p, int indent) +{ + mutex_lock(idm_mutex(idm)); + idm_print_locked(idm, p, indent); + mutex_unlock(idm_mutex(idm)); +} + +#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST) +#include "tests/xe_guc_id_mgr_test.c" +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_id_mgr.h b/drivers/gpu/drm/xe/xe_guc_id_mgr.h new file mode 100644 index 0000000000..368f8c80e4 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_id_mgr.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _XE_GUC_ID_MGR_H_ +#define _XE_GUC_ID_MGR_H_ + +struct drm_printer; +struct xe_guc_id_mgr; + +int xe_guc_id_mgr_init(struct xe_guc_id_mgr *idm, unsigned int count); + +int xe_guc_id_mgr_reserve_locked(struct xe_guc_id_mgr *idm, unsigned int count); +void xe_guc_id_mgr_release_locked(struct xe_guc_id_mgr *idm, unsigned int id, unsigned int count); + +int xe_guc_id_mgr_reserve(struct xe_guc_id_mgr *idm, unsigned int count, unsigned int retain); +void xe_guc_id_mgr_release(struct xe_guc_id_mgr *idm, unsigned int start, unsigned int count); + +void xe_guc_id_mgr_print(struct xe_guc_id_mgr *idm, struct drm_printer *p, int indent); + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_klv_helpers.c b/drivers/gpu/drm/xe/xe_guc_klv_helpers.c new file mode 100644 index 0000000000..ceca949932 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_klv_helpers.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2024 Intel Corporation + */ + +#include <linux/bitfield.h> +#include <drm/drm_print.h> + +#include "abi/guc_klvs_abi.h" +#include "xe_guc_klv_helpers.h" + +#define make_u64(hi, lo) ((u64)((u64)(u32)(hi) << 32 | (u32)(lo))) + +/** + * xe_guc_klv_key_to_string - Convert KLV key into friendly name. + * @key: the `GuC KLV`_ key + * + * Return: name of the KLV key. + */ +const char *xe_guc_klv_key_to_string(u16 key) +{ + switch (key) { + /* VGT POLICY keys */ + case GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_KEY: + return "sched_if_idle"; + case GUC_KLV_VGT_POLICY_ADVERSE_SAMPLE_PERIOD_KEY: + return "sample_period"; + case GUC_KLV_VGT_POLICY_RESET_AFTER_VF_SWITCH_KEY: + return "reset_engine"; + /* VF CFG keys */ + case GUC_KLV_VF_CFG_GGTT_START_KEY: + return "ggtt_start"; + case GUC_KLV_VF_CFG_GGTT_SIZE_KEY: + return "ggtt_size"; + case GUC_KLV_VF_CFG_LMEM_SIZE_KEY: + return "lmem_size"; + case GUC_KLV_VF_CFG_NUM_CONTEXTS_KEY: + return "num_contexts"; + case GUC_KLV_VF_CFG_TILE_MASK_KEY: + return "tile_mask"; + case GUC_KLV_VF_CFG_NUM_DOORBELLS_KEY: + return "num_doorbells"; + case GUC_KLV_VF_CFG_EXEC_QUANTUM_KEY: + return "exec_quantum"; + case GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY: + return "preempt_timeout"; + case GUC_KLV_VF_CFG_BEGIN_DOORBELL_ID_KEY: + return "begin_db_id"; + case GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_KEY: + return "begin_ctx_id"; + default: + return "(unknown)"; + } +} + +/** + * xe_guc_klv_print - Print content of the buffer with `GuC KLV`_. + * @klvs: the buffer with KLVs + * @num_dwords: number of dwords (u32) available in the buffer + * @p: the &drm_printer + * + * The buffer may contain more than one KLV. + */ +void xe_guc_klv_print(const u32 *klvs, u32 num_dwords, struct drm_printer *p) +{ + while (num_dwords >= GUC_KLV_LEN_MIN) { + u32 key = FIELD_GET(GUC_KLV_0_KEY, klvs[0]); + u32 len = FIELD_GET(GUC_KLV_0_LEN, klvs[0]); + + klvs += GUC_KLV_LEN_MIN; + num_dwords -= GUC_KLV_LEN_MIN; + + if (num_dwords < len) { + drm_printf(p, "{ key %#06x : truncated %zu of %zu bytes %*ph } # %s\n", + key, num_dwords * sizeof(u32), len * sizeof(u32), + (int)(num_dwords * sizeof(u32)), klvs, + xe_guc_klv_key_to_string(key)); + return; + } + + switch (len) { + case 0: + drm_printf(p, "{ key %#06x : no value } # %s\n", + key, xe_guc_klv_key_to_string(key)); + break; + case 1: + drm_printf(p, "{ key %#06x : 32b value %u } # %s\n", + key, klvs[0], xe_guc_klv_key_to_string(key)); + break; + case 2: + drm_printf(p, "{ key %#06x : 64b value %#llx } # %s\n", + key, make_u64(klvs[1], klvs[0]), + xe_guc_klv_key_to_string(key)); + break; + default: + drm_printf(p, "{ key %#06x : %zu bytes %*ph } # %s\n", + key, len * sizeof(u32), (int)(len * sizeof(u32)), + klvs, xe_guc_klv_key_to_string(key)); + break; + } + + klvs += len; + num_dwords -= len; + } + + /* we don't expect any leftovers, fix if KLV header is ever changed */ + BUILD_BUG_ON(GUC_KLV_LEN_MIN > 1); +} + +/** + * xe_guc_klv_count - Count KLVs present in the buffer. + * @klvs: the buffer with KLVs + * @num_dwords: number of dwords (u32) in the buffer + * + * Return: number of recognized KLVs or + * a negative error code if KLV buffer is truncated. + */ +int xe_guc_klv_count(const u32 *klvs, u32 num_dwords) +{ + int num_klvs = 0; + + while (num_dwords >= GUC_KLV_LEN_MIN) { + u32 len = FIELD_GET(GUC_KLV_0_LEN, klvs[0]); + + if (num_dwords < len + GUC_KLV_LEN_MIN) + break; + + klvs += GUC_KLV_LEN_MIN + len; + num_dwords -= GUC_KLV_LEN_MIN + len; + num_klvs++; + } + + return num_dwords ? -ENODATA : num_klvs; +} diff --git a/drivers/gpu/drm/xe/xe_guc_klv_helpers.h b/drivers/gpu/drm/xe/xe_guc_klv_helpers.h new file mode 100644 index 0000000000..b835e0ebe6 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_guc_klv_helpers.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _XE_GUC_KLV_HELPERS_H_ +#define _XE_GUC_KLV_HELPERS_H_ + +#include <linux/types.h> + +struct drm_printer; + +const char *xe_guc_klv_key_to_string(u16 key); + +void xe_guc_klv_print(const u32 *klvs, u32 num_dwords, struct drm_printer *p); +int xe_guc_klv_count(const u32 *klvs, u32 num_dwords); + +/** + * PREP_GUC_KLV - Prepare KLV header value based on provided key and len. + * @key: KLV key + * @len: KLV length + * + * Return: value of the KLV header (u32). + */ +#define PREP_GUC_KLV(key, len) \ + (FIELD_PREP(GUC_KLV_0_KEY, (key)) | \ + FIELD_PREP(GUC_KLV_0_LEN, (len))) + +/** + * PREP_GUC_KLV_CONST - Prepare KLV header value based on const key and len. + * @key: const KLV key + * @len: const KLV length + * + * Return: value of the KLV header (u32). + */ +#define PREP_GUC_KLV_CONST(key, len) \ + (FIELD_PREP_CONST(GUC_KLV_0_KEY, (key)) | \ + FIELD_PREP_CONST(GUC_KLV_0_LEN, (len))) + +/** + * PREP_GUC_KLV_TAG - Prepare KLV header value based on unique KLV definition tag. + * @TAG: unique tag of the KLV definition + * + * Combine separate KEY and LEN definitions of the KLV identified by the TAG. + * + * Return: value of the KLV header (u32). + */ +#define PREP_GUC_KLV_TAG(TAG) \ + PREP_GUC_KLV_CONST(GUC_KLV_##TAG##_KEY, GUC_KLV_##TAG##_LEN) + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c index 45135c3520..a37ee34194 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.c +++ b/drivers/gpu/drm/xe/xe_guc_log.c @@ -84,8 +84,9 @@ int xe_guc_log_init(struct xe_guc_log *log) struct xe_bo *bo; bo = xe_managed_bo_create_pin_map(xe, tile, guc_log_size(), - XE_BO_CREATE_SYSTEM_BIT | - XE_BO_CREATE_GGTT_BIT); + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index dd9d65f923..23382ced4e 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -145,25 +145,6 @@ static int pc_action_reset(struct xe_guc_pc *pc) return ret; } -static int pc_action_shutdown(struct xe_guc_pc *pc) -{ - struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; - int ret; - u32 action[] = { - GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, - SLPC_EVENT(SLPC_EVENT_SHUTDOWN, 2), - xe_bo_ggtt_addr(pc->bo), - 0, - }; - - ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); - if (ret) - drm_err(&pc_to_xe(pc)->drm, "GuC PC shutdown %pe", - ERR_PTR(ret)); - - return ret; -} - static int pc_action_query_task_state(struct xe_guc_pc *pc) { struct xe_guc_ct *ct = &pc_to_guc(pc)->ct; @@ -714,24 +695,28 @@ static int pc_adjust_freq_bounds(struct xe_guc_pc *pc) ret = pc_action_query_task_state(pc); if (ret) - return ret; + goto out; /* * GuC defaults to some RPmax that is not actually achievable without * overclocking. Let's adjust it to the Hardware RP0, which is the * regular maximum */ - if (pc_get_max_freq(pc) > pc->rp0_freq) - pc_set_max_freq(pc, pc->rp0_freq); + if (pc_get_max_freq(pc) > pc->rp0_freq) { + ret = pc_set_max_freq(pc, pc->rp0_freq); + if (ret) + goto out; + } /* * Same thing happens for Server platforms where min is listed as * RPMax */ if (pc_get_min_freq(pc) > pc->rp0_freq) - pc_set_min_freq(pc, pc->rp0_freq); + ret = pc_set_min_freq(pc, pc->rp0_freq); - return 0; +out: + return ret; } static int pc_adjust_requested_freq(struct xe_guc_pc *pc) @@ -889,7 +874,6 @@ out: int xe_guc_pc_stop(struct xe_guc_pc *pc) { struct xe_device *xe = pc_to_xe(pc); - int ret; if (xe->info.skip_guc_pc) { xe_gt_idle_disable_c6(pc_to_gt(pc)); @@ -900,15 +884,6 @@ int xe_guc_pc_stop(struct xe_guc_pc *pc) pc->freq_ready = false; mutex_unlock(&pc->freq_lock); - ret = pc_action_shutdown(pc); - if (ret) - return ret; - - if (wait_for_pc_state(pc, SLPC_GLOBAL_STATE_NOT_RUNNING)) { - drm_err(&pc_to_xe(pc)->drm, "GuC PC Shutdown failed\n"); - return -EIO; - } - return 0; } @@ -921,7 +896,7 @@ static void xe_guc_pc_fini(struct drm_device *drm, void *arg) { struct xe_guc_pc *pc = arg; - xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL); + XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL)); XE_WARN_ON(xe_guc_pc_gucrc_disable(pc)); XE_WARN_ON(xe_guc_pc_stop(pc)); xe_force_wake_put(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL); @@ -948,16 +923,13 @@ int xe_guc_pc_init(struct xe_guc_pc *pc) return err; bo = xe_managed_bo_create_pin_map(xe, tile, size, - XE_BO_CREATE_VRAM_IF_DGFX(tile) | - XE_BO_CREATE_GGTT_BIT); + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE); if (IS_ERR(bo)) return PTR_ERR(bo); pc->bo = bo; - err = drmm_add_action_or_reset(&xe->drm, xe_guc_pc_fini, pc); - if (err) - return err; - - return 0; + return drmm_add_action_or_reset(&xe->drm, xe_guc_pc_fini, pc); } diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 3757e0dc5c..e4e3658e6a 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -27,6 +27,7 @@ #include "xe_guc.h" #include "xe_guc_ct.h" #include "xe_guc_exec_queue_types.h" +#include "xe_guc_id_mgr.h" #include "xe_guc_submit_types.h" #include "xe_hw_engine.h" #include "xe_hw_fence.h" @@ -236,17 +237,9 @@ static void guc_submit_fini(struct drm_device *drm, void *arg) struct xe_guc *guc = arg; xa_destroy(&guc->submission_state.exec_queue_lookup); - ida_destroy(&guc->submission_state.guc_ids); - bitmap_free(guc->submission_state.guc_ids_bitmap); free_submit_wq(guc); - mutex_destroy(&guc->submission_state.lock); } -#define GUC_ID_MAX 65535 -#define GUC_ID_NUMBER_MLRC 4096 -#define GUC_ID_NUMBER_SLRC (GUC_ID_MAX - GUC_ID_NUMBER_MLRC) -#define GUC_ID_START_MLRC GUC_ID_NUMBER_SLRC - static const struct xe_exec_queue_ops guc_exec_queue_ops; static void primelockdep(struct xe_guc *guc) @@ -269,33 +262,28 @@ int xe_guc_submit_init(struct xe_guc *guc) struct xe_gt *gt = guc_to_gt(guc); int err; - guc->submission_state.guc_ids_bitmap = - bitmap_zalloc(GUC_ID_NUMBER_MLRC, GFP_KERNEL); - if (!guc->submission_state.guc_ids_bitmap) - return -ENOMEM; + err = drmm_mutex_init(&xe->drm, &guc->submission_state.lock); + if (err) + return err; + + err = xe_guc_id_mgr_init(&guc->submission_state.idm, ~0); + if (err) + return err; err = alloc_submit_wq(guc); - if (err) { - bitmap_free(guc->submission_state.guc_ids_bitmap); + if (err) return err; - } gt->exec_queue_ops = &guc_exec_queue_ops; - mutex_init(&guc->submission_state.lock); xa_init(&guc->submission_state.exec_queue_lookup); - ida_init(&guc->submission_state.guc_ids); spin_lock_init(&guc->submission_state.suspend.lock); guc->submission_state.suspend.context = dma_fence_context_alloc(1); primelockdep(guc); - err = drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); - if (err) - return err; - - return 0; + return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); } static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count) @@ -307,12 +295,8 @@ static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa for (i = 0; i < xa_count; ++i) xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i); - if (xe_exec_queue_is_parallel(q)) - bitmap_release_region(guc->submission_state.guc_ids_bitmap, - q->guc->id - GUC_ID_START_MLRC, - order_base_2(q->width)); - else - ida_free(&guc->submission_state.guc_ids, q->guc->id); + xe_guc_id_mgr_release_locked(&guc->submission_state.idm, + q->guc->id, q->width); } static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) @@ -330,21 +314,12 @@ static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) */ lockdep_assert_held(&guc->submission_state.lock); - if (xe_exec_queue_is_parallel(q)) { - void *bitmap = guc->submission_state.guc_ids_bitmap; - - ret = bitmap_find_free_region(bitmap, GUC_ID_NUMBER_MLRC, - order_base_2(q->width)); - } else { - ret = ida_alloc_max(&guc->submission_state.guc_ids, - GUC_ID_NUMBER_SLRC - 1, GFP_NOWAIT); - } + ret = xe_guc_id_mgr_reserve_locked(&guc->submission_state.idm, + q->width); if (ret < 0) return ret; q->guc->id = ret; - if (xe_exec_queue_is_parallel(q)) - q->guc->id += GUC_ID_START_MLRC; for (i = 0; i < q->width; ++i) { ptr = xa_store(&guc->submission_state.exec_queue_lookup, @@ -533,7 +508,7 @@ static void register_engine(struct xe_exec_queue *q) info.flags = CONTEXT_REGISTRATION_FLAG_KMD; if (xe_exec_queue_is_parallel(q)) { - u32 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); + u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); struct iosys_map map = xe_lrc_parallel_map(lrc); info.wq_desc_lo = lower_32_bits(ggtt_addr + @@ -833,7 +808,9 @@ static void simple_error_capture(struct xe_exec_queue *q) } } - xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL); + if (xe_force_wake_get(gt_to_fw(guc_to_gt(guc)), XE_FORCEWAKE_ALL)) + xe_gt_info(guc_to_gt(guc), + "failed to get forcewake for error capture"); xe_guc_ct_print(&guc->ct, &p, true); guc_exec_queue_print(q, &p); for_each_hw_engine(hwe, guc_to_gt(guc), id) { @@ -929,20 +906,26 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) int err = -ETIME; int i = 0; - if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) { - drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx", - xe_sched_job_seqno(job), q->guc->id, q->flags); - xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL, - "Kernel-submitted job timed out\n"); - xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q), - "VM job timed out on non-killed execqueue\n"); + /* + * TDR has fired before free job worker. Common if exec queue + * immediately closed after last fence signaled. + */ + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) { + guc_exec_queue_free_job(drm_job); - simple_error_capture(q); - xe_devcoredump(job); - } else { - drm_dbg(&xe->drm, "Timedout signaled job: seqno=%u, guc_id=%d, flags=0x%lx", - xe_sched_job_seqno(job), q->guc->id, q->flags); + return DRM_GPU_SCHED_STAT_NOMINAL; } + + drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx", + xe_sched_job_seqno(job), q->guc->id, q->flags); + xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL, + "Kernel-submitted job timed out\n"); + xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q), + "VM job timed out on non-killed execqueue\n"); + + simple_error_capture(q); + xe_devcoredump(job); + trace_xe_sched_job_timedout(job); /* Kill the run_job entry point */ @@ -1569,28 +1552,8 @@ static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action)); } -int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) +static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q) { - struct xe_device *xe = guc_to_xe(guc); - struct xe_exec_queue *q; - u32 guc_id = msg[0]; - - if (unlikely(len < 2)) { - drm_err(&xe->drm, "Invalid length %u", len); - return -EPROTO; - } - - q = g2h_exec_queue_lookup(guc, guc_id); - if (unlikely(!q)) - return -EPROTO; - - if (unlikely(!exec_queue_pending_enable(q) && - !exec_queue_pending_disable(q))) { - drm_err(&xe->drm, "Unexpected engine state 0x%04x", - atomic_read(&q->guc->state)); - return -EPROTO; - } - trace_xe_exec_queue_scheduling_done(q); if (exec_queue_pending_enable(q)) { @@ -1610,17 +1573,15 @@ int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) deregister_exec_queue(guc, q); } } - - return 0; } -int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) +int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) { struct xe_device *xe = guc_to_xe(guc); struct xe_exec_queue *q; u32 guc_id = msg[0]; - if (unlikely(len < 1)) { + if (unlikely(len < 2)) { drm_err(&xe->drm, "Invalid length %u", len); return -EPROTO; } @@ -1629,13 +1590,20 @@ int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) if (unlikely(!q)) return -EPROTO; - if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) || - exec_queue_pending_enable(q) || exec_queue_enabled(q)) { + if (unlikely(!exec_queue_pending_enable(q) && + !exec_queue_pending_disable(q))) { drm_err(&xe->drm, "Unexpected engine state 0x%04x", atomic_read(&q->guc->state)); return -EPROTO; } + handle_sched_done(guc, q); + + return 0; +} + +static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q) +{ trace_xe_exec_queue_deregister_done(q); clear_exec_queue_registered(q); @@ -1644,6 +1612,31 @@ int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) xe_exec_queue_put(q); else __guc_exec_queue_fini(guc, q); +} + +int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_exec_queue *q; + u32 guc_id = msg[0]; + + if (unlikely(len < 1)) { + drm_err(&xe->drm, "Invalid length %u", len); + return -EPROTO; + } + + q = g2h_exec_queue_lookup(guc, guc_id); + if (unlikely(!q)) + return -EPROTO; + + if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) || + exec_queue_pending_enable(q) || exec_queue_enabled(q)) { + drm_err(&xe->drm, "Unexpected engine state 0x%04x", + atomic_read(&q->guc->state)); + return -EPROTO; + } + + handle_deregister_done(guc, q); return 0; } @@ -1783,7 +1776,7 @@ guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps /** * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine. - * @job: faulty Xe scheduled job. + * @q: faulty exec queue * * This can be printed out in a later stage like during dev_coredump * analysis. @@ -1792,9 +1785,8 @@ guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps * caller, using `xe_guc_exec_queue_snapshot_free`. */ struct xe_guc_submit_exec_queue_snapshot * -xe_guc_exec_queue_snapshot_capture(struct xe_sched_job *job) +xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q) { - struct xe_exec_queue *q = job->q; struct xe_gpu_scheduler *sched = &q->guc->sched; struct xe_guc_submit_exec_queue_snapshot *snapshot; int i; @@ -1815,21 +1807,14 @@ xe_guc_exec_queue_snapshot_capture(struct xe_sched_job *job) snapshot->sched_props.preempt_timeout_us = q->sched_props.preempt_timeout_us; - snapshot->lrc = kmalloc_array(q->width, sizeof(struct lrc_snapshot), + snapshot->lrc = kmalloc_array(q->width, sizeof(struct xe_lrc_snapshot *), GFP_ATOMIC); if (snapshot->lrc) { for (i = 0; i < q->width; ++i) { struct xe_lrc *lrc = q->lrc + i; - snapshot->lrc[i].context_desc = - lower_32_bits(xe_lrc_ggtt_addr(lrc)); - snapshot->lrc[i].head = xe_lrc_ring_head(lrc); - snapshot->lrc[i].tail.internal = lrc->ring.tail; - snapshot->lrc[i].tail.memory = - xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL); - snapshot->lrc[i].start_seqno = xe_lrc_start_seqno(lrc); - snapshot->lrc[i].seqno = xe_lrc_seqno(lrc); + snapshot->lrc[i] = xe_lrc_snapshot_capture(lrc); } } @@ -1868,6 +1853,24 @@ xe_guc_exec_queue_snapshot_capture(struct xe_sched_job *job) } /** + * xe_guc_exec_queue_snapshot_capture_delayed - Take delayed part of snapshot of the GuC Engine. + * @snapshot: Previously captured snapshot of job. + * + * This captures some data that requires taking some locks, so it cannot be done in signaling path. + */ +void +xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot *snapshot) +{ + int i; + + if (!snapshot || !snapshot->lrc) + return; + + for (i = 0; i < snapshot->width; ++i) + xe_lrc_snapshot_capture_delayed(snapshot->lrc[i]); +} + +/** * xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot. * @snapshot: GuC Submit Engine snapshot object. * @p: drm_printer where it will be printed out. @@ -1895,18 +1898,9 @@ xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps drm_printf(p, "\tPreempt timeout: %u (us)\n", snapshot->sched_props.preempt_timeout_us); - for (i = 0; snapshot->lrc && i < snapshot->width; ++i) { - drm_printf(p, "\tHW Context Desc: 0x%08x\n", - snapshot->lrc[i].context_desc); - drm_printf(p, "\tLRC Head: (memory) %u\n", - snapshot->lrc[i].head); - drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n", - snapshot->lrc[i].tail.internal, - snapshot->lrc[i].tail.memory); - drm_printf(p, "\tStart seqno: (memory) %d\n", - snapshot->lrc[i].start_seqno); - drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->lrc[i].seqno); - } + for (i = 0; snapshot->lrc && i < snapshot->width; ++i) + xe_lrc_snapshot_print(snapshot->lrc[i], p); + drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state); drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags); @@ -1931,10 +1925,16 @@ xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps */ void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot) { + int i; + if (!snapshot) return; - kfree(snapshot->lrc); + if (snapshot->lrc) { + for (i = 0; i < snapshot->width; i++) + xe_lrc_snapshot_free(snapshot->lrc[i]); + kfree(snapshot->lrc); + } kfree(snapshot->pending_list); kfree(snapshot); } @@ -1942,28 +1942,10 @@ void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *s static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p) { struct xe_guc_submit_exec_queue_snapshot *snapshot; - struct xe_gpu_scheduler *sched = &q->guc->sched; - struct xe_sched_job *job; - bool found = false; - - spin_lock(&sched->base.job_list_lock); - list_for_each_entry(job, &sched->base.pending_list, drm.list) { - if (job->q == q) { - xe_sched_job_get(job); - found = true; - break; - } - } - spin_unlock(&sched->base.job_list_lock); - if (!found) - return; - - snapshot = xe_guc_exec_queue_snapshot_capture(job); + snapshot = xe_guc_exec_queue_snapshot_capture(q); xe_guc_exec_queue_snapshot_print(snapshot, p); xe_guc_exec_queue_snapshot_free(snapshot); - - xe_sched_job_put(job); } /** diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h index 723dc2bd8d..fad0421ead 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.h +++ b/drivers/gpu/drm/xe/xe_guc_submit.h @@ -9,8 +9,8 @@ #include <linux/types.h> struct drm_printer; +struct xe_exec_queue; struct xe_guc; -struct xe_sched_job; int xe_guc_submit_init(struct xe_guc *guc); @@ -27,7 +27,9 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len); struct xe_guc_submit_exec_queue_snapshot * -xe_guc_exec_queue_snapshot_capture(struct xe_sched_job *job); +xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q); +void +xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot *snapshot); void xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, struct drm_printer *p); diff --git a/drivers/gpu/drm/xe/xe_guc_submit_types.h b/drivers/gpu/drm/xe/xe_guc_submit_types.h index 72fc0f42b0..dc7456c345 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit_types.h +++ b/drivers/gpu/drm/xe/xe_guc_submit_types.h @@ -61,17 +61,6 @@ struct guc_submit_parallel_scratch { u32 wq[WQ_SIZE / sizeof(u32)]; }; -struct lrc_snapshot { - u32 context_desc; - u32 head; - struct { - u32 internal; - u32 memory; - } tail; - u32 start_seqno; - u32 seqno; -}; - struct pending_list_snapshot { u32 seqno; bool fence; @@ -109,7 +98,7 @@ struct xe_guc_submit_exec_queue_snapshot { } sched_props; /** @lrc: LRC Snapshot */ - struct lrc_snapshot *lrc; + struct xe_lrc_snapshot **lrc; /** @schedule_state: Schedule State at the moment of Crash */ u32 schedule_state; diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h index edcd1a950b..82bd93f786 100644 --- a/drivers/gpu/drm/xe/xe_guc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_types.h @@ -32,6 +32,21 @@ struct xe_guc_db_mgr { }; /** + * struct xe_guc_id_mgr - GuC context ID Manager. + * + * Note: GuC context ID Manager is relying on &xe_guc::submission_state.lock + * to protect its members. + */ +struct xe_guc_id_mgr { + /** @bitmap: bitmap to track allocated IDs */ + unsigned long *bitmap; + /** @total: total number of IDs being managed */ + unsigned int total; + /** @used: number of IDs currently in use */ + unsigned int used; +}; + +/** * struct xe_guc - Graphic micro controller */ struct xe_guc { @@ -49,12 +64,10 @@ struct xe_guc { struct xe_guc_db_mgr dbm; /** @submission_state: GuC submission state */ struct { + /** @submission_state.idm: GuC context ID Manager */ + struct xe_guc_id_mgr idm; /** @submission_state.exec_queue_lookup: Lookup an xe_engine from guc_id */ struct xarray exec_queue_lookup; - /** @submission_state.guc_ids: used to allocate new guc_ids, single-lrc */ - struct ida guc_ids; - /** @submission_state.guc_ids_bitmap: used to allocate new guc_ids, multi-lrc */ - unsigned long *guc_ids_bitmap; /** @submission_state.stopped: submissions are stopped */ atomic_t stopped; /** @submission_state.lock: protects submission state */ diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c new file mode 100644 index 0000000000..2c32dc46f7 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hmm.c @@ -0,0 +1,253 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2024 Intel Corporation + */ + +#include <linux/scatterlist.h> +#include <linux/mmu_notifier.h> +#include <linux/dma-mapping.h> +#include <linux/memremap.h> +#include <linux/swap.h> +#include <linux/hmm.h> +#include <linux/mm.h> +#include "xe_hmm.h" +#include "xe_vm.h" +#include "xe_bo.h" + +static u64 xe_npages_in_range(unsigned long start, unsigned long end) +{ + return (end - start) >> PAGE_SHIFT; +} + +/* + * xe_mark_range_accessed() - mark a range is accessed, so core mm + * have such information for memory eviction or write back to + * hard disk + * + * @range: the range to mark + * @write: if write to this range, we mark pages in this range + * as dirty + */ +static void xe_mark_range_accessed(struct hmm_range *range, bool write) +{ + struct page *page; + u64 i, npages; + + npages = xe_npages_in_range(range->start, range->end); + for (i = 0; i < npages; i++) { + page = hmm_pfn_to_page(range->hmm_pfns[i]); + if (write) + set_page_dirty_lock(page); + + mark_page_accessed(page); + } +} + +/* + * xe_build_sg() - build a scatter gather table for all the physical pages/pfn + * in a hmm_range. dma-map pages if necessary. dma-address is save in sg table + * and will be used to program GPU page table later. + * + * @xe: the xe device who will access the dma-address in sg table + * @range: the hmm range that we build the sg table from. range->hmm_pfns[] + * has the pfn numbers of pages that back up this hmm address range. + * @st: pointer to the sg table. + * @write: whether we write to this range. This decides dma map direction + * for system pages. If write we map it bi-diretional; otherwise + * DMA_TO_DEVICE + * + * All the contiguous pfns will be collapsed into one entry in + * the scatter gather table. This is for the purpose of efficiently + * programming GPU page table. + * + * The dma_address in the sg table will later be used by GPU to + * access memory. So if the memory is system memory, we need to + * do a dma-mapping so it can be accessed by GPU/DMA. + * + * FIXME: This function currently only support pages in system + * memory. If the memory is GPU local memory (of the GPU who + * is going to access memory), we need gpu dpa (device physical + * address), and there is no need of dma-mapping. This is TBD. + * + * FIXME: dma-mapping for peer gpu device to access remote gpu's + * memory. Add this when you support p2p + * + * This function allocates the storage of the sg table. It is + * caller's responsibility to free it calling sg_free_table. + * + * Returns 0 if successful; -ENOMEM if fails to allocate memory + */ +static int xe_build_sg(struct xe_device *xe, struct hmm_range *range, + struct sg_table *st, bool write) +{ + struct device *dev = xe->drm.dev; + struct page **pages; + u64 i, npages; + int ret; + + npages = xe_npages_in_range(range->start, range->end); + pages = kvmalloc_array(npages, sizeof(*pages), GFP_KERNEL); + if (!pages) + return -ENOMEM; + + for (i = 0; i < npages; i++) { + pages[i] = hmm_pfn_to_page(range->hmm_pfns[i]); + xe_assert(xe, !is_device_private_page(pages[i])); + } + + ret = sg_alloc_table_from_pages_segment(st, pages, npages, 0, npages << PAGE_SHIFT, + xe_sg_segment_size(dev), GFP_KERNEL); + if (ret) + goto free_pages; + + ret = dma_map_sgtable(dev, st, write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING); + if (ret) { + sg_free_table(st); + st = NULL; + } + +free_pages: + kvfree(pages); + return ret; +} + +/* + * xe_hmm_userptr_free_sg() - Free the scatter gather table of userptr + * + * @uvma: the userptr vma which hold the scatter gather table + * + * With function xe_userptr_populate_range, we allocate storage of + * the userptr sg table. This is a helper function to free this + * sg table, and dma unmap the address in the table. + */ +void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma) +{ + struct xe_userptr *userptr = &uvma->userptr; + struct xe_vma *vma = &uvma->vma; + bool write = !xe_vma_read_only(vma); + struct xe_vm *vm = xe_vma_vm(vma); + struct xe_device *xe = vm->xe; + struct device *dev = xe->drm.dev; + + xe_assert(xe, userptr->sg); + dma_unmap_sgtable(dev, userptr->sg, + write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, 0); + + sg_free_table(userptr->sg); + userptr->sg = NULL; +} + +/** + * xe_hmm_userptr_populate_range() - Populate physical pages of a virtual + * address range + * + * @uvma: userptr vma which has information of the range to populate. + * @is_mm_mmap_locked: True if mmap_read_lock is already acquired by caller. + * + * This function populate the physical pages of a virtual + * address range. The populated physical pages is saved in + * userptr's sg table. It is similar to get_user_pages but call + * hmm_range_fault. + * + * This function also read mmu notifier sequence # ( + * mmu_interval_read_begin), for the purpose of later + * comparison (through mmu_interval_read_retry). + * + * This must be called with mmap read or write lock held. + * + * This function allocates the storage of the userptr sg table. + * It is caller's responsibility to free it calling sg_free_table. + * + * returns: 0 for succuss; negative error no on failure + */ +int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, + bool is_mm_mmap_locked) +{ + unsigned long timeout = + jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); + unsigned long *pfns, flags = HMM_PFN_REQ_FAULT; + struct xe_userptr *userptr; + struct xe_vma *vma = &uvma->vma; + u64 userptr_start = xe_vma_userptr(vma); + u64 userptr_end = userptr_start + xe_vma_size(vma); + struct xe_vm *vm = xe_vma_vm(vma); + struct hmm_range hmm_range; + bool write = !xe_vma_read_only(vma); + unsigned long notifier_seq; + u64 npages; + int ret; + + userptr = &uvma->userptr; + + if (is_mm_mmap_locked) + mmap_assert_locked(userptr->notifier.mm); + + if (vma->gpuva.flags & XE_VMA_DESTROYED) + return 0; + + notifier_seq = mmu_interval_read_begin(&userptr->notifier); + if (notifier_seq == userptr->notifier_seq) + return 0; + + if (userptr->sg) + xe_hmm_userptr_free_sg(uvma); + + npages = xe_npages_in_range(userptr_start, userptr_end); + pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); + if (unlikely(!pfns)) + return -ENOMEM; + + if (write) + flags |= HMM_PFN_REQ_WRITE; + + if (!mmget_not_zero(userptr->notifier.mm)) { + ret = -EFAULT; + goto free_pfns; + } + + hmm_range.default_flags = flags; + hmm_range.hmm_pfns = pfns; + hmm_range.notifier = &userptr->notifier; + hmm_range.start = userptr_start; + hmm_range.end = userptr_end; + hmm_range.dev_private_owner = vm->xe; + + while (true) { + hmm_range.notifier_seq = mmu_interval_read_begin(&userptr->notifier); + + if (!is_mm_mmap_locked) + mmap_read_lock(userptr->notifier.mm); + + ret = hmm_range_fault(&hmm_range); + + if (!is_mm_mmap_locked) + mmap_read_unlock(userptr->notifier.mm); + + if (ret == -EBUSY) { + if (time_after(jiffies, timeout)) + break; + + continue; + } + break; + } + + mmput(userptr->notifier.mm); + + if (ret) + goto free_pfns; + + ret = xe_build_sg(vm->xe, &hmm_range, &userptr->sgt, write); + if (ret) + goto free_pfns; + + xe_mark_range_accessed(&hmm_range, write); + userptr->sg = &userptr->sgt; + userptr->notifier_seq = hmm_range.notifier_seq; + +free_pfns: + kvfree(pfns); + return ret; +} + diff --git a/drivers/gpu/drm/xe/xe_hmm.h b/drivers/gpu/drm/xe/xe_hmm.h new file mode 100644 index 0000000000..909dc2bdcd --- /dev/null +++ b/drivers/gpu/drm/xe/xe_hmm.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: MIT + * + * Copyright © 2024 Intel Corporation + */ + +#include <linux/types.h> + +struct xe_userptr_vma; + +int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, bool is_mm_mmap_locked); +void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma); diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index 6b9b1cbedd..39a484a575 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -58,8 +58,8 @@ static int huc_alloc_gsc_pkt(struct xe_huc *huc) bo = xe_bo_create_pin_map(xe, gt_to_tile(gt), NULL, PXP43_HUC_AUTH_INOUT_SIZE * 2, ttm_bo_type_kernel, - XE_BO_CREATE_SYSTEM_BIT | - XE_BO_CREATE_GGTT_BIT); + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT); if (IS_ERR(bo)) return PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_huc_debugfs.c b/drivers/gpu/drm/xe/xe_huc_debugfs.c index 18585a7eeb..3a888a4018 100644 --- a/drivers/gpu/drm/xe/xe_huc_debugfs.c +++ b/drivers/gpu/drm/xe/xe_huc_debugfs.c @@ -12,6 +12,7 @@ #include "xe_gt.h" #include "xe_huc.h" #include "xe_macros.h" +#include "xe_pm.h" static struct xe_gt * huc_to_gt(struct xe_huc *huc) @@ -36,9 +37,9 @@ static int huc_info(struct seq_file *m, void *data) struct xe_device *xe = huc_to_xe(huc); struct drm_printer p = drm_seq_file_printer(m); - xe_device_mem_access_get(xe); + xe_pm_runtime_get(xe); xe_huc_print_info(huc, &p); - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); return 0; } diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index b5e83ea172..455f375c1c 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -14,8 +14,10 @@ #include "xe_device.h" #include "xe_execlist.h" #include "xe_force_wake.h" +#include "xe_gsc.h" #include "xe_gt.h" #include "xe_gt_ccs_mode.h" +#include "xe_gt_printk.h" #include "xe_gt_topology.h" #include "xe_hw_fence.h" #include "xe_irq.h" @@ -463,6 +465,32 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe, hwe->eclass->sched_props.preempt_timeout_us = XE_HW_ENGINE_PREEMPT_TIMEOUT; hwe->eclass->sched_props.preempt_timeout_min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN; hwe->eclass->sched_props.preempt_timeout_max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX; + + /* + * The GSC engine can accept submissions while the GSC shim is + * being reset, during which time the submission is stalled. In + * the worst case, the shim reset can take up to the maximum GSC + * command execution time (250ms), so the request start can be + * delayed by that much; the request itself can take that long + * without being preemptible, which means worst case it can + * theoretically take up to 500ms for a preemption to go through + * on the GSC engine. Adding to that an extra 100ms as a safety + * margin, we get a minimum recommended timeout of 600ms. + * The preempt_timeout value can't be tuned for OTHER_CLASS + * because the class is reserved for kernel usage, so we just + * need to make sure that the starting value is above that + * threshold; since our default value (640ms) is greater than + * 600ms, the only way we can go below is via a kconfig setting. + * If that happens, log it in dmesg and update the value. + */ + if (hwe->class == XE_ENGINE_CLASS_OTHER) { + const u32 min_preempt_timeout = 600 * 1000; + if (hwe->eclass->sched_props.preempt_timeout_us < min_preempt_timeout) { + hwe->eclass->sched_props.preempt_timeout_us = min_preempt_timeout; + xe_gt_notice(gt, "Increasing preempt_timeout for GSC to 600ms\n"); + } + } + /* Record default props */ hwe->eclass->defaults = hwe->eclass->sched_props; } @@ -490,8 +518,9 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, xe_reg_sr_apply_whitelist(hwe); hwe->hwsp = xe_managed_bo_create_pin_map(xe, tile, SZ_4K, - XE_BO_CREATE_VRAM_IF_DGFX(tile) | - XE_BO_CREATE_GGTT_BIT); + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE); if (IS_ERR(hwe->hwsp)) { err = PTR_ERR(hwe->hwsp); goto err_name; @@ -509,18 +538,19 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, } } - if (xe_device_uc_enabled(xe)) + if (xe_device_uc_enabled(xe)) { + /* GSCCS has a special interrupt for reset */ + if (hwe->class == XE_ENGINE_CLASS_OTHER) + hwe->irq_handler = xe_gsc_hwe_irq_handler; + xe_hw_engine_enable_ring(hwe); + } /* We reserve the highest BCS instance for USM */ if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY) gt->usm.reserved_bcs_instance = hwe->instance; - err = drmm_add_action_or_reset(&xe->drm, hw_engine_fini, hwe); - if (err) - return err; - - return 0; + return drmm_add_action_or_reset(&xe->drm, hw_engine_fini, hwe); err_kernel_lrc: xe_lrc_finish(&hwe->kernel_lrc); diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c index 2345fb42fa..844ec68cbb 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.c @@ -7,8 +7,10 @@ #include <linux/kobject.h> #include <linux/sysfs.h> +#include "xe_device.h" #include "xe_gt.h" #include "xe_hw_engine_class_sysfs.h" +#include "xe_pm.h" #define MAX_ENGINE_CLASS_NAME_LEN 16 static int xe_add_hw_engine_class_defaults(struct xe_device *xe, @@ -70,7 +72,7 @@ static ssize_t job_timeout_max_show(struct kobject *kobj, { struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); - return sprintf(buf, "%u\n", eclass->sched_props.job_timeout_max); + return sysfs_emit(buf, "%u\n", eclass->sched_props.job_timeout_max); } static const struct kobj_attribute job_timeout_max_attr = @@ -106,7 +108,7 @@ static ssize_t job_timeout_min_show(struct kobject *kobj, { struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); - return sprintf(buf, "%u\n", eclass->sched_props.job_timeout_min); + return sysfs_emit(buf, "%u\n", eclass->sched_props.job_timeout_min); } static const struct kobj_attribute job_timeout_min_attr = @@ -139,7 +141,7 @@ static ssize_t job_timeout_show(struct kobject *kobj, { struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); - return sprintf(buf, "%u\n", eclass->sched_props.job_timeout_ms); + return sysfs_emit(buf, "%u\n", eclass->sched_props.job_timeout_ms); } static const struct kobj_attribute job_timeout_attr = @@ -150,7 +152,7 @@ static ssize_t job_timeout_default(struct kobject *kobj, { struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent); - return sprintf(buf, "%u\n", eclass->defaults.job_timeout_ms); + return sysfs_emit(buf, "%u\n", eclass->defaults.job_timeout_ms); } static const struct kobj_attribute job_timeout_def = @@ -161,7 +163,7 @@ static ssize_t job_timeout_min_default(struct kobject *kobj, { struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent); - return sprintf(buf, "%u\n", eclass->defaults.job_timeout_min); + return sysfs_emit(buf, "%u\n", eclass->defaults.job_timeout_min); } static const struct kobj_attribute job_timeout_min_def = @@ -172,7 +174,7 @@ static ssize_t job_timeout_max_default(struct kobject *kobj, { struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent); - return sprintf(buf, "%u\n", eclass->defaults.job_timeout_max); + return sysfs_emit(buf, "%u\n", eclass->defaults.job_timeout_max); } static const struct kobj_attribute job_timeout_max_def = @@ -231,7 +233,7 @@ static ssize_t timeslice_duration_max_show(struct kobject *kobj, { struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); - return sprintf(buf, "%u\n", eclass->sched_props.timeslice_max); + return sysfs_emit(buf, "%u\n", eclass->sched_props.timeslice_max); } static const struct kobj_attribute timeslice_duration_max_attr = @@ -269,7 +271,7 @@ static ssize_t timeslice_duration_min_show(struct kobject *kobj, { struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); - return sprintf(buf, "%u\n", eclass->sched_props.timeslice_min); + return sysfs_emit(buf, "%u\n", eclass->sched_props.timeslice_min); } static const struct kobj_attribute timeslice_duration_min_attr = @@ -281,7 +283,7 @@ static ssize_t timeslice_duration_show(struct kobject *kobj, { struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); - return sprintf(buf, "%u\n", eclass->sched_props.timeslice_us); + return sysfs_emit(buf, "%u\n", eclass->sched_props.timeslice_us); } static const struct kobj_attribute timeslice_duration_attr = @@ -293,7 +295,7 @@ static ssize_t timeslice_default(struct kobject *kobj, { struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent); - return sprintf(buf, "%u\n", eclass->defaults.timeslice_us); + return sysfs_emit(buf, "%u\n", eclass->defaults.timeslice_us); } static const struct kobj_attribute timeslice_duration_def = @@ -304,7 +306,7 @@ static ssize_t timeslice_min_default(struct kobject *kobj, { struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent); - return sprintf(buf, "%u\n", eclass->defaults.timeslice_min); + return sysfs_emit(buf, "%u\n", eclass->defaults.timeslice_min); } static const struct kobj_attribute timeslice_duration_min_def = @@ -315,7 +317,7 @@ static ssize_t timeslice_max_default(struct kobject *kobj, { struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent); - return sprintf(buf, "%u\n", eclass->defaults.timeslice_max); + return sysfs_emit(buf, "%u\n", eclass->defaults.timeslice_max); } static const struct kobj_attribute timeslice_duration_max_def = @@ -348,7 +350,7 @@ static ssize_t preempt_timeout_show(struct kobject *kobj, { struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); - return sprintf(buf, "%u\n", eclass->sched_props.preempt_timeout_us); + return sysfs_emit(buf, "%u\n", eclass->sched_props.preempt_timeout_us); } static const struct kobj_attribute preempt_timeout_attr = @@ -360,7 +362,7 @@ static ssize_t preempt_timeout_default(struct kobject *kobj, { struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent); - return sprintf(buf, "%u\n", eclass->defaults.preempt_timeout_us); + return sysfs_emit(buf, "%u\n", eclass->defaults.preempt_timeout_us); } static const struct kobj_attribute preempt_timeout_def = @@ -372,7 +374,7 @@ static ssize_t preempt_timeout_min_default(struct kobject *kobj, { struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent); - return sprintf(buf, "%u\n", eclass->defaults.preempt_timeout_min); + return sysfs_emit(buf, "%u\n", eclass->defaults.preempt_timeout_min); } static const struct kobj_attribute preempt_timeout_min_def = @@ -384,7 +386,7 @@ static ssize_t preempt_timeout_max_default(struct kobject *kobj, { struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj->parent); - return sprintf(buf, "%u\n", eclass->defaults.preempt_timeout_max); + return sysfs_emit(buf, "%u\n", eclass->defaults.preempt_timeout_max); } static const struct kobj_attribute preempt_timeout_max_def = @@ -420,7 +422,7 @@ static ssize_t preempt_timeout_max_show(struct kobject *kobj, { struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); - return sprintf(buf, "%u\n", eclass->sched_props.preempt_timeout_max); + return sysfs_emit(buf, "%u\n", eclass->sched_props.preempt_timeout_max); } static const struct kobj_attribute preempt_timeout_max_attr = @@ -457,7 +459,7 @@ static ssize_t preempt_timeout_min_show(struct kobject *kobj, { struct xe_hw_engine_class_intf *eclass = kobj_to_eclass(kobj); - return sprintf(buf, "%u\n", eclass->sched_props.preempt_timeout_min); + return sysfs_emit(buf, "%u\n", eclass->sched_props.preempt_timeout_min); } static const struct kobj_attribute preempt_timeout_min_attr = @@ -498,8 +500,8 @@ static void kobj_xe_hw_engine_class_fini(struct drm_device *drm, void *arg) kobject_put(kobj); } - static struct kobj_eclass * -kobj_xe_hw_engine_class(struct xe_device *xe, struct kobject *parent, char *name) +static struct kobj_eclass * +kobj_xe_hw_engine_class(struct xe_device *xe, struct kobject *parent, const char *name) { struct kobj_eclass *keclass; int err = 0; @@ -513,13 +515,13 @@ kobj_xe_hw_engine_class(struct xe_device *xe, struct kobject *parent, char *name kobject_put(&keclass->base); return NULL; } + keclass->xe = xe; err = drmm_add_action_or_reset(&xe->drm, kobj_xe_hw_engine_class_fini, &keclass->base); if (err) - drm_warn(&xe->drm, - "%s: drmm_add_action_or_reset failed, err: %d\n", - __func__, err); + return NULL; + return keclass; } @@ -550,13 +552,8 @@ static int xe_add_hw_engine_class_defaults(struct xe_device *xe, if (err) goto err_object; - err = drmm_add_action_or_reset(&xe->drm, hw_engine_class_defaults_fini, - kobj); - if (err) - drm_warn(&xe->drm, - "%s: drmm_add_action_or_reset failed, err: %d\n", - __func__, err); - return err; + return drmm_add_action_or_reset(&xe->drm, hw_engine_class_defaults_fini, kobj); + err_object: kobject_put(kobj); return err; @@ -567,9 +564,51 @@ static void xe_hw_engine_sysfs_kobj_release(struct kobject *kobj) kfree(kobj); } +static ssize_t xe_hw_engine_class_sysfs_attr_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct xe_device *xe = kobj_to_xe(kobj); + struct kobj_attribute *kattr; + ssize_t ret = -EIO; + + kattr = container_of(attr, struct kobj_attribute, attr); + if (kattr->show) { + xe_pm_runtime_get(xe); + ret = kattr->show(kobj, kattr, buf); + xe_pm_runtime_put(xe); + } + + return ret; +} + +static ssize_t xe_hw_engine_class_sysfs_attr_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, + size_t count) +{ + struct xe_device *xe = kobj_to_xe(kobj); + struct kobj_attribute *kattr; + ssize_t ret = -EIO; + + kattr = container_of(attr, struct kobj_attribute, attr); + if (kattr->store) { + xe_pm_runtime_get(xe); + ret = kattr->store(kobj, kattr, buf, count); + xe_pm_runtime_put(xe); + } + + return ret; +} + +static const struct sysfs_ops xe_hw_engine_class_sysfs_ops = { + .show = xe_hw_engine_class_sysfs_attr_show, + .store = xe_hw_engine_class_sysfs_attr_store, +}; + static const struct kobj_type xe_hw_engine_sysfs_kobj_type = { .release = xe_hw_engine_sysfs_kobj_release, - .sysfs_ops = &kobj_sysfs_ops, + .sysfs_ops = &xe_hw_engine_class_sysfs_ops, }; static void hw_engine_class_sysfs_fini(struct drm_device *drm, void *arg) @@ -579,6 +618,24 @@ static void hw_engine_class_sysfs_fini(struct drm_device *drm, void *arg) kobject_put(kobj); } +static const char *xe_hw_engine_class_to_str(enum xe_engine_class class) +{ + switch (class) { + case XE_ENGINE_CLASS_RENDER: + return "rcs"; + case XE_ENGINE_CLASS_VIDEO_DECODE: + return "vcs"; + case XE_ENGINE_CLASS_VIDEO_ENHANCE: + return "vecs"; + case XE_ENGINE_CLASS_COPY: + return "bcs"; + case XE_ENGINE_CLASS_COMPUTE: + return "ccs"; + default: + return NULL; + } +} + /** * xe_hw_engine_class_sysfs_init - Init HW engine classes on GT. * @gt: Xe GT. @@ -608,7 +665,7 @@ int xe_hw_engine_class_sysfs_init(struct xe_gt *gt) goto err_object; for_each_hw_engine(hwe, gt, id) { - char name[MAX_ENGINE_CLASS_NAME_LEN]; + const char *name; struct kobj_eclass *keclass; if (hwe->class == XE_ENGINE_CLASS_OTHER || @@ -619,24 +676,8 @@ int xe_hw_engine_class_sysfs_init(struct xe_gt *gt) continue; class_mask |= 1 << hwe->class; - - switch (hwe->class) { - case XE_ENGINE_CLASS_RENDER: - strcpy(name, "rcs"); - break; - case XE_ENGINE_CLASS_VIDEO_DECODE: - strcpy(name, "vcs"); - break; - case XE_ENGINE_CLASS_VIDEO_ENHANCE: - strcpy(name, "vecs"); - break; - case XE_ENGINE_CLASS_COPY: - strcpy(name, "bcs"); - break; - case XE_ENGINE_CLASS_COMPUTE: - strcpy(name, "ccs"); - break; - default: + name = xe_hw_engine_class_to_str(hwe->class); + if (!name) { err = -EINVAL; goto err_object; } @@ -649,26 +690,16 @@ int xe_hw_engine_class_sysfs_init(struct xe_gt *gt) keclass->eclass = hwe->eclass; err = xe_add_hw_engine_class_defaults(xe, &keclass->base); - if (err) { - drm_warn(&xe->drm, - "Add .defaults to engines failed!, err: %d\n", - err); + if (err) goto err_object; - } err = sysfs_create_files(&keclass->base, files); if (err) goto err_object; } - err = drmm_add_action_or_reset(&xe->drm, hw_engine_class_sysfs_fini, - kobj); - if (err) - drm_warn(&xe->drm, - "%s: drmm_add_action_or_reset failed, err: %d\n", - __func__, err); + return drmm_add_action_or_reset(&xe->drm, hw_engine_class_sysfs_fini, kobj); - return err; err_object: kobject_put(kobj); return err; diff --git a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h index ec5ba673b3..28a0d7c909 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_class_sysfs.h @@ -26,6 +26,8 @@ struct kobj_eclass { struct kobject base; /** @eclass: A pointer to the hw engine class interface */ struct xe_hw_engine_class_intf *eclass; + /** @xe: A pointer to the xe device */ + struct xe_device *xe; }; static inline struct xe_hw_engine_class_intf *kobj_to_eclass(struct kobject *kobj) @@ -33,4 +35,9 @@ static inline struct xe_hw_engine_class_intf *kobj_to_eclass(struct kobject *kob return container_of(kobj, struct kobj_eclass, base)->eclass; } +static inline struct xe_device *kobj_to_xe(struct kobject *kobj) +{ + return container_of(kobj, struct kobj_eclass, base)->xe; +} + #endif diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c index a5de3e7b0b..f872ef1031 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence.c +++ b/drivers/gpu/drm/xe/xe_hw_fence.c @@ -130,7 +130,7 @@ void xe_hw_fence_ctx_init(struct xe_hw_fence_ctx *ctx, struct xe_gt *gt, ctx->irq = irq; ctx->dma_fence_ctx = dma_fence_context_alloc(1); ctx->next_seqno = XE_FENCE_INITIAL_SEQNO; - sprintf(ctx->name, "%s", name); + snprintf(ctx->name, sizeof(ctx->name), "%s", name); } void xe_hw_fence_ctx_finish(struct xe_hw_fence_ctx *ctx) diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 9ac7fbe201..453e601ddd 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -18,6 +18,7 @@ #include "xe_pcode.h" #include "xe_pcode_api.h" #include "xe_sriov.h" +#include "xe_pm.h" enum xe_hwmon_reg { REG_PKG_RAPL_LIMIT, @@ -33,6 +34,12 @@ enum xe_hwmon_reg_operation { REG_READ64, }; +enum xe_hwmon_channel { + CHANNEL_CARD, + CHANNEL_PKG, + CHANNEL_MAX, +}; + /* * SF_* - scale factors for particular quantities according to hwmon spec. */ @@ -68,61 +75,61 @@ struct xe_hwmon { int scl_shift_energy; /** @scl_shift_time: pkg time unit */ int scl_shift_time; - /** @ei: Energy info for energy1_input */ - struct xe_hwmon_energy_info ei; + /** @ei: Energy info for energyN_input */ + struct xe_hwmon_energy_info ei[CHANNEL_MAX]; }; -static u32 xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg) +static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg, + int channel) { struct xe_device *xe = gt_to_xe(hwmon->gt); - struct xe_reg reg = XE_REG(0); switch (hwmon_reg) { case REG_PKG_RAPL_LIMIT: - if (xe->info.platform == XE_PVC) - reg = PVC_GT0_PACKAGE_RAPL_LIMIT; - else if (xe->info.platform == XE_DG2) - reg = PCU_CR_PACKAGE_RAPL_LIMIT; + if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) + return PVC_GT0_PACKAGE_RAPL_LIMIT; + else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG)) + return PCU_CR_PACKAGE_RAPL_LIMIT; break; case REG_PKG_POWER_SKU: - if (xe->info.platform == XE_PVC) - reg = PVC_GT0_PACKAGE_POWER_SKU; - else if (xe->info.platform == XE_DG2) - reg = PCU_CR_PACKAGE_POWER_SKU; + if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) + return PVC_GT0_PACKAGE_POWER_SKU; + else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG)) + return PCU_CR_PACKAGE_POWER_SKU; break; case REG_PKG_POWER_SKU_UNIT: if (xe->info.platform == XE_PVC) - reg = PVC_GT0_PACKAGE_POWER_SKU_UNIT; + return PVC_GT0_PACKAGE_POWER_SKU_UNIT; else if (xe->info.platform == XE_DG2) - reg = PCU_CR_PACKAGE_POWER_SKU_UNIT; + return PCU_CR_PACKAGE_POWER_SKU_UNIT; break; case REG_GT_PERF_STATUS: - if (xe->info.platform == XE_DG2) - reg = GT_PERF_STATUS; + if (xe->info.platform == XE_DG2 && channel == CHANNEL_PKG) + return GT_PERF_STATUS; break; case REG_PKG_ENERGY_STATUS: - if (xe->info.platform == XE_PVC) - reg = PVC_GT0_PLATFORM_ENERGY_STATUS; - else if (xe->info.platform == XE_DG2) - reg = PCU_CR_PACKAGE_ENERGY_STATUS; + if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) + return PVC_GT0_PLATFORM_ENERGY_STATUS; + else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG)) + return PCU_CR_PACKAGE_ENERGY_STATUS; break; default: drm_warn(&xe->drm, "Unknown xe hwmon reg id: %d\n", hwmon_reg); break; } - return reg.raw; + return XE_REG(0); } static void xe_hwmon_process_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg, enum xe_hwmon_reg_operation operation, u64 *value, - u32 clr, u32 set) + u32 clr, u32 set, int channel) { struct xe_reg reg; - reg.raw = xe_hwmon_get_reg(hwmon, hwmon_reg); + reg = xe_hwmon_get_reg(hwmon, hwmon_reg, channel); - if (!reg.raw) + if (!xe_reg_is_valid(reg)) return; switch (operation) { @@ -150,13 +157,13 @@ static void xe_hwmon_process_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon * same pattern for sysfs, allow arbitrary PL1 limits to be set but display * clamped values when read. */ -static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, long *value) +static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, int channel, long *value) { u64 reg_val, min, max; mutex_lock(&hwmon->hwmon_lock); - xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_READ32, ®_val, 0, 0); + xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_READ32, ®_val, 0, 0, channel); /* Check if PL1 limit is disabled */ if (!(reg_val & PKG_PWR_LIM_1_EN)) { *value = PL1_DISABLE; @@ -166,7 +173,7 @@ static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, long *value) reg_val = REG_FIELD_GET(PKG_PWR_LIM_1, reg_val); *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power); - xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU, REG_READ64, ®_val, 0, 0); + xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU, REG_READ64, ®_val, 0, 0, channel); min = REG_FIELD_GET(PKG_MIN_PWR, reg_val); min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power); max = REG_FIELD_GET(PKG_MAX_PWR, reg_val); @@ -178,7 +185,7 @@ unlock: mutex_unlock(&hwmon->hwmon_lock); } -static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, long value) +static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, int channel, long value) { int ret = 0; u64 reg_val; @@ -188,9 +195,9 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, long value) /* Disable PL1 limit and verify, as limit cannot be disabled on all platforms */ if (value == PL1_DISABLE) { xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW32, ®_val, - PKG_PWR_LIM_1_EN, 0); + PKG_PWR_LIM_1_EN, 0, channel); xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_READ32, ®_val, - PKG_PWR_LIM_1_EN, 0); + PKG_PWR_LIM_1_EN, 0, channel); if (reg_val & PKG_PWR_LIM_1_EN) { ret = -EOPNOTSUPP; @@ -203,17 +210,17 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, long value) reg_val = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, reg_val); xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW32, ®_val, - PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, reg_val); + PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, reg_val, channel); unlock: mutex_unlock(&hwmon->hwmon_lock); return ret; } -static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, long *value) +static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, int channel, long *value) { u64 reg_val; - xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU, REG_READ32, ®_val, 0, 0); + xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU, REG_READ32, ®_val, 0, 0, channel); reg_val = REG_FIELD_GET(PKG_TDP, reg_val); *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power); } @@ -236,16 +243,16 @@ static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, long *value) * the hwmon API. Using x86_64 128 bit arithmetic (see mul_u64_u32_shr()), * a 'long' of 63 bits, SF_ENERGY of 1e6 (~20 bits) and * hwmon->scl_shift_energy of 14 bits we have 57 (63 - 20 + 14) bits before - * energy1_input overflows. This at 1000 W is an overflow duration of 278 years. + * energyN_input overflows. This at 1000 W is an overflow duration of 278 years. */ static void -xe_hwmon_energy_get(struct xe_hwmon *hwmon, long *energy) +xe_hwmon_energy_get(struct xe_hwmon *hwmon, int channel, long *energy) { - struct xe_hwmon_energy_info *ei = &hwmon->ei; + struct xe_hwmon_energy_info *ei = &hwmon->ei[channel]; u64 reg_val; xe_hwmon_process_reg(hwmon, REG_PKG_ENERGY_STATUS, REG_READ32, - ®_val, 0, 0); + ®_val, 0, 0, channel); if (reg_val >= ei->reg_val_prev) ei->accum_energy += reg_val - ei->reg_val_prev; @@ -259,23 +266,24 @@ xe_hwmon_energy_get(struct xe_hwmon *hwmon, long *energy) } static ssize_t -xe_hwmon_power1_max_interval_show(struct device *dev, struct device_attribute *attr, - char *buf) +xe_hwmon_power_max_interval_show(struct device *dev, struct device_attribute *attr, + char *buf) { struct xe_hwmon *hwmon = dev_get_drvdata(dev); u32 x, y, x_w = 2; /* 2 bits */ u64 r, tau4, out; + int sensor_index = to_sensor_dev_attr(attr)->index; - xe_device_mem_access_get(gt_to_xe(hwmon->gt)); + xe_pm_runtime_get(gt_to_xe(hwmon->gt)); mutex_lock(&hwmon->hwmon_lock); xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, - REG_READ32, &r, 0, 0); + REG_READ32, &r, 0, 0, sensor_index); mutex_unlock(&hwmon->hwmon_lock); - xe_device_mem_access_put(gt_to_xe(hwmon->gt)); + xe_pm_runtime_put(gt_to_xe(hwmon->gt)); x = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_X, r); y = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_Y, r); @@ -299,14 +307,15 @@ xe_hwmon_power1_max_interval_show(struct device *dev, struct device_attribute *a } static ssize_t -xe_hwmon_power1_max_interval_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) +xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { struct xe_hwmon *hwmon = dev_get_drvdata(dev); u32 x, y, rxy, x_w = 2; /* 2 bits */ u64 tau4, r, max_win; unsigned long val; int ret; + int sensor_index = to_sensor_dev_attr(attr)->index; ret = kstrtoul(buf, 0, &val); if (ret) @@ -325,7 +334,7 @@ xe_hwmon_power1_max_interval_store(struct device *dev, struct device_attribute * /* * val must be < max in hwmon interface units. The steps below are - * explained in xe_hwmon_power1_max_interval_show() + * explained in xe_hwmon_power_max_interval_show() */ r = FIELD_PREP(PKG_MAX_WIN, PKG_MAX_WIN_DEFAULT); x = REG_FIELD_GET(PKG_MAX_WIN_X, r); @@ -354,26 +363,31 @@ xe_hwmon_power1_max_interval_store(struct device *dev, struct device_attribute * rxy = REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_X, x) | REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_Y, y); - xe_device_mem_access_get(gt_to_xe(hwmon->gt)); + xe_pm_runtime_get(gt_to_xe(hwmon->gt)); mutex_lock(&hwmon->hwmon_lock); xe_hwmon_process_reg(hwmon, REG_PKG_RAPL_LIMIT, REG_RMW32, (u64 *)&r, - PKG_PWR_LIM_1_TIME, rxy); + PKG_PWR_LIM_1_TIME, rxy, sensor_index); mutex_unlock(&hwmon->hwmon_lock); - xe_device_mem_access_put(gt_to_xe(hwmon->gt)); + xe_pm_runtime_put(gt_to_xe(hwmon->gt)); return count; } static SENSOR_DEVICE_ATTR(power1_max_interval, 0664, - xe_hwmon_power1_max_interval_show, - xe_hwmon_power1_max_interval_store, 0); + xe_hwmon_power_max_interval_show, + xe_hwmon_power_max_interval_store, CHANNEL_CARD); + +static SENSOR_DEVICE_ATTR(power2_max_interval, 0664, + xe_hwmon_power_max_interval_show, + xe_hwmon_power_max_interval_store, CHANNEL_PKG); static struct attribute *hwmon_attributes[] = { &sensor_dev_attr_power1_max_interval.dev_attr.attr, + &sensor_dev_attr_power2_max_interval.dev_attr.attr, NULL }; @@ -384,12 +398,11 @@ static umode_t xe_hwmon_attributes_visible(struct kobject *kobj, struct xe_hwmon *hwmon = dev_get_drvdata(dev); int ret = 0; - xe_device_mem_access_get(gt_to_xe(hwmon->gt)); + xe_pm_runtime_get(gt_to_xe(hwmon->gt)); - if (attr == &sensor_dev_attr_power1_max_interval.dev_attr.attr) - ret = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT) ? attr->mode : 0; + ret = xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, index)) ? attr->mode : 0; - xe_device_mem_access_put(gt_to_xe(hwmon->gt)); + xe_pm_runtime_put(gt_to_xe(hwmon->gt)); return ret; } @@ -405,10 +418,11 @@ static const struct attribute_group *hwmon_groups[] = { }; static const struct hwmon_channel_info * const hwmon_info[] = { - HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_CRIT), - HWMON_CHANNEL_INFO(curr, HWMON_C_CRIT), - HWMON_CHANNEL_INFO(in, HWMON_I_INPUT), - HWMON_CHANNEL_INFO(energy, HWMON_E_INPUT), + HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL, + HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_CRIT | HWMON_P_LABEL), + HWMON_CHANNEL_INFO(curr, HWMON_C_LABEL, HWMON_C_CRIT | HWMON_C_LABEL), + HWMON_CHANNEL_INFO(in, HWMON_I_INPUT | HWMON_I_LABEL, HWMON_I_INPUT | HWMON_I_LABEL), + HWMON_CHANNEL_INFO(energy, HWMON_E_INPUT | HWMON_E_LABEL, HWMON_E_INPUT | HWMON_E_LABEL), NULL }; @@ -431,7 +445,8 @@ static int xe_hwmon_pcode_write_i1(struct xe_gt *gt, u32 uval) uval); } -static int xe_hwmon_power_curr_crit_read(struct xe_hwmon *hwmon, long *value, u32 scale_factor) +static int xe_hwmon_power_curr_crit_read(struct xe_hwmon *hwmon, int channel, + long *value, u32 scale_factor) { int ret; u32 uval; @@ -449,7 +464,8 @@ unlock: return ret; } -static int xe_hwmon_power_curr_crit_write(struct xe_hwmon *hwmon, long value, u32 scale_factor) +static int xe_hwmon_power_curr_crit_write(struct xe_hwmon *hwmon, int channel, + long value, u32 scale_factor) { int ret; u32 uval; @@ -463,117 +479,131 @@ static int xe_hwmon_power_curr_crit_write(struct xe_hwmon *hwmon, long value, u3 return ret; } -static void xe_hwmon_get_voltage(struct xe_hwmon *hwmon, long *value) +static void xe_hwmon_get_voltage(struct xe_hwmon *hwmon, int channel, long *value) { u64 reg_val; xe_hwmon_process_reg(hwmon, REG_GT_PERF_STATUS, - REG_READ32, ®_val, 0, 0); + REG_READ32, ®_val, 0, 0, channel); /* HW register value in units of 2.5 millivolt */ *value = DIV_ROUND_CLOSEST(REG_FIELD_GET(VOLTAGE_MASK, reg_val) * 2500, SF_VOLTAGE); } static umode_t -xe_hwmon_power_is_visible(struct xe_hwmon *hwmon, u32 attr, int chan) +xe_hwmon_power_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) { u32 uval; switch (attr) { case hwmon_power_max: - return xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT) ? 0664 : 0; + return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, + channel)) ? 0664 : 0; case hwmon_power_rated_max: - return xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU) ? 0444 : 0; + return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, + channel)) ? 0444 : 0; case hwmon_power_crit: - return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) || - !(uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644; + if (channel == CHANNEL_PKG) + return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) || + !(uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644; + break; + case hwmon_power_label: + return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT, + channel)) ? 0444 : 0; default: return 0; } + return 0; } static int -xe_hwmon_power_read(struct xe_hwmon *hwmon, u32 attr, int chan, long *val) +xe_hwmon_power_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) { switch (attr) { case hwmon_power_max: - xe_hwmon_power_max_read(hwmon, val); + xe_hwmon_power_max_read(hwmon, channel, val); return 0; case hwmon_power_rated_max: - xe_hwmon_power_rated_max_read(hwmon, val); + xe_hwmon_power_rated_max_read(hwmon, channel, val); return 0; case hwmon_power_crit: - return xe_hwmon_power_curr_crit_read(hwmon, val, SF_POWER); + return xe_hwmon_power_curr_crit_read(hwmon, channel, val, SF_POWER); default: return -EOPNOTSUPP; } } static int -xe_hwmon_power_write(struct xe_hwmon *hwmon, u32 attr, int chan, long val) +xe_hwmon_power_write(struct xe_hwmon *hwmon, u32 attr, int channel, long val) { switch (attr) { case hwmon_power_max: - return xe_hwmon_power_max_write(hwmon, val); + return xe_hwmon_power_max_write(hwmon, channel, val); case hwmon_power_crit: - return xe_hwmon_power_curr_crit_write(hwmon, val, SF_POWER); + return xe_hwmon_power_curr_crit_write(hwmon, channel, val, SF_POWER); default: return -EOPNOTSUPP; } } static umode_t -xe_hwmon_curr_is_visible(const struct xe_hwmon *hwmon, u32 attr) +xe_hwmon_curr_is_visible(const struct xe_hwmon *hwmon, u32 attr, int channel) { u32 uval; switch (attr) { case hwmon_curr_crit: - return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) || - (uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644; + case hwmon_curr_label: + if (channel == CHANNEL_PKG) + return (xe_hwmon_pcode_read_i1(hwmon->gt, &uval) || + (uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644; + break; default: return 0; } + return 0; } static int -xe_hwmon_curr_read(struct xe_hwmon *hwmon, u32 attr, long *val) +xe_hwmon_curr_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) { switch (attr) { case hwmon_curr_crit: - return xe_hwmon_power_curr_crit_read(hwmon, val, SF_CURR); + return xe_hwmon_power_curr_crit_read(hwmon, channel, val, SF_CURR); default: return -EOPNOTSUPP; } } static int -xe_hwmon_curr_write(struct xe_hwmon *hwmon, u32 attr, long val) +xe_hwmon_curr_write(struct xe_hwmon *hwmon, u32 attr, int channel, long val) { switch (attr) { case hwmon_curr_crit: - return xe_hwmon_power_curr_crit_write(hwmon, val, SF_CURR); + return xe_hwmon_power_curr_crit_write(hwmon, channel, val, SF_CURR); default: return -EOPNOTSUPP; } } static umode_t -xe_hwmon_in_is_visible(struct xe_hwmon *hwmon, u32 attr) +xe_hwmon_in_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) { switch (attr) { case hwmon_in_input: - return xe_hwmon_get_reg(hwmon, REG_GT_PERF_STATUS) ? 0444 : 0; + case hwmon_in_label: + return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_GT_PERF_STATUS, + channel)) ? 0444 : 0; default: return 0; } } static int -xe_hwmon_in_read(struct xe_hwmon *hwmon, u32 attr, long *val) +xe_hwmon_in_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) { switch (attr) { case hwmon_in_input: - xe_hwmon_get_voltage(hwmon, val); + xe_hwmon_get_voltage(hwmon, channel, val); return 0; default: return -EOPNOTSUPP; @@ -581,22 +611,24 @@ xe_hwmon_in_read(struct xe_hwmon *hwmon, u32 attr, long *val) } static umode_t -xe_hwmon_energy_is_visible(struct xe_hwmon *hwmon, u32 attr) +xe_hwmon_energy_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) { switch (attr) { case hwmon_energy_input: - return xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS) ? 0444 : 0; + case hwmon_energy_label: + return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS, + channel)) ? 0444 : 0; default: return 0; } } static int -xe_hwmon_energy_read(struct xe_hwmon *hwmon, u32 attr, long *val) +xe_hwmon_energy_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) { switch (attr) { case hwmon_energy_input: - xe_hwmon_energy_get(hwmon, val); + xe_hwmon_energy_get(hwmon, channel, val); return 0; default: return -EOPNOTSUPP; @@ -610,27 +642,27 @@ xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type, struct xe_hwmon *hwmon = (struct xe_hwmon *)drvdata; int ret; - xe_device_mem_access_get(gt_to_xe(hwmon->gt)); + xe_pm_runtime_get(gt_to_xe(hwmon->gt)); switch (type) { case hwmon_power: ret = xe_hwmon_power_is_visible(hwmon, attr, channel); break; case hwmon_curr: - ret = xe_hwmon_curr_is_visible(hwmon, attr); + ret = xe_hwmon_curr_is_visible(hwmon, attr, channel); break; case hwmon_in: - ret = xe_hwmon_in_is_visible(hwmon, attr); + ret = xe_hwmon_in_is_visible(hwmon, attr, channel); break; case hwmon_energy: - ret = xe_hwmon_energy_is_visible(hwmon, attr); + ret = xe_hwmon_energy_is_visible(hwmon, attr, channel); break; default: ret = 0; break; } - xe_device_mem_access_put(gt_to_xe(hwmon->gt)); + xe_pm_runtime_put(gt_to_xe(hwmon->gt)); return ret; } @@ -642,27 +674,27 @@ xe_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, struct xe_hwmon *hwmon = dev_get_drvdata(dev); int ret; - xe_device_mem_access_get(gt_to_xe(hwmon->gt)); + xe_pm_runtime_get(gt_to_xe(hwmon->gt)); switch (type) { case hwmon_power: ret = xe_hwmon_power_read(hwmon, attr, channel, val); break; case hwmon_curr: - ret = xe_hwmon_curr_read(hwmon, attr, val); + ret = xe_hwmon_curr_read(hwmon, attr, channel, val); break; case hwmon_in: - ret = xe_hwmon_in_read(hwmon, attr, val); + ret = xe_hwmon_in_read(hwmon, attr, channel, val); break; case hwmon_energy: - ret = xe_hwmon_energy_read(hwmon, attr, val); + ret = xe_hwmon_energy_read(hwmon, attr, channel, val); break; default: ret = -EOPNOTSUPP; break; } - xe_device_mem_access_put(gt_to_xe(hwmon->gt)); + xe_pm_runtime_put(gt_to_xe(hwmon->gt)); return ret; } @@ -674,29 +706,49 @@ xe_hwmon_write(struct device *dev, enum hwmon_sensor_types type, u32 attr, struct xe_hwmon *hwmon = dev_get_drvdata(dev); int ret; - xe_device_mem_access_get(gt_to_xe(hwmon->gt)); + xe_pm_runtime_get(gt_to_xe(hwmon->gt)); switch (type) { case hwmon_power: ret = xe_hwmon_power_write(hwmon, attr, channel, val); break; case hwmon_curr: - ret = xe_hwmon_curr_write(hwmon, attr, val); + ret = xe_hwmon_curr_write(hwmon, attr, channel, val); break; default: ret = -EOPNOTSUPP; break; } - xe_device_mem_access_put(gt_to_xe(hwmon->gt)); + xe_pm_runtime_put(gt_to_xe(hwmon->gt)); return ret; } +static int xe_hwmon_read_label(struct device *dev, + enum hwmon_sensor_types type, + u32 attr, int channel, const char **str) +{ + switch (type) { + case hwmon_power: + case hwmon_energy: + case hwmon_curr: + case hwmon_in: + if (channel == CHANNEL_CARD) + *str = "card"; + else if (channel == CHANNEL_PKG) + *str = "pkg"; + return 0; + default: + return -EOPNOTSUPP; + } +} + static const struct hwmon_ops hwmon_ops = { .is_visible = xe_hwmon_is_visible, .read = xe_hwmon_read, .write = xe_hwmon_write, + .read_string = xe_hwmon_read_label, }; static const struct hwmon_chip_info hwmon_chip_info = { @@ -710,14 +762,15 @@ xe_hwmon_get_preregistration_info(struct xe_device *xe) struct xe_hwmon *hwmon = xe->hwmon; long energy; u64 val_sku_unit = 0; + int channel; /* * The contents of register PKG_POWER_SKU_UNIT do not change, * so read it once and store the shift values. */ - if (xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT)) { + if (xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT, 0))) { xe_hwmon_process_reg(hwmon, REG_PKG_POWER_SKU_UNIT, - REG_READ32, &val_sku_unit, 0, 0); + REG_READ32, &val_sku_unit, 0, 0, 0); hwmon->scl_shift_power = REG_FIELD_GET(PKG_PWR_UNIT, val_sku_unit); hwmon->scl_shift_energy = REG_FIELD_GET(PKG_ENERGY_UNIT, val_sku_unit); hwmon->scl_shift_time = REG_FIELD_GET(PKG_TIME_UNIT, val_sku_unit); @@ -727,8 +780,9 @@ xe_hwmon_get_preregistration_info(struct xe_device *xe) * Initialize 'struct xe_hwmon_energy_info', i.e. set fields to the * first value of the energy register read */ - if (xe_hwmon_is_visible(hwmon, hwmon_energy, hwmon_energy_input, 0)) - xe_hwmon_energy_get(hwmon, &energy); + for (channel = 0; channel < CHANNEL_MAX; channel++) + if (xe_hwmon_is_visible(hwmon, hwmon_energy, hwmon_energy_input, channel)) + xe_hwmon_energy_get(hwmon, channel, &energy); } static void xe_hwmon_mutex_destroy(void *arg) diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 2f5d179e0d..9968063531 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -187,7 +187,7 @@ void xe_irq_enable_hwe(struct xe_gt *gt) * GSCCS interrupts, but it has its own mask register. */ if (xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_OTHER)) { - gsc_mask = irqs; + gsc_mask = irqs | GSC_ER_COMPLETE; heci_mask = GSC_IRQ_INTF(1); } else if (HAS_HECI_GSCFI(xe)) { gsc_mask = GSC_IRQ_INTF(1); @@ -326,7 +326,6 @@ static void gt_irq_handler(struct xe_tile *tile, xe_heci_gsc_irq_handler(xe, intr_vec); else gt_other_irq_handler(engine_gt, instance, intr_vec); - continue; } } } diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c index 0d7c5514e0..418661a889 100644 --- a/drivers/gpu/drm/xe/xe_lmtt.c +++ b/drivers/gpu/drm/xe/xe_lmtt.c @@ -35,7 +35,7 @@ static bool xe_has_multi_level_lmtt(struct xe_device *xe) { - return xe->info.platform == XE_PVC; + return GRAPHICS_VERx100(xe) >= 1260; } static struct xe_tile *lmtt_to_tile(struct xe_lmtt *lmtt) @@ -70,8 +70,8 @@ static struct xe_lmtt_pt *lmtt_pt_alloc(struct xe_lmtt *lmtt, unsigned int level PAGE_ALIGN(lmtt->ops->lmtt_pte_size(level) * lmtt->ops->lmtt_pte_num(level)), ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(lmtt_to_tile(lmtt)) | - XE_BO_CREATE_PINNED_BIT); + XE_BO_FLAG_VRAM_IF_DGFX(lmtt_to_tile(lmtt)) | + XE_BO_NEEDS_64K | XE_BO_FLAG_PINNED); if (IS_ERR(bo)) { err = PTR_ERR(bo); goto out_free_pt; diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 57066faf57..615bbc372a 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -5,8 +5,11 @@ #include "xe_lrc.h" +#include <linux/ascii85.h> + #include "instructions/xe_mi_commands.h" #include "instructions/xe_gfxpipe_commands.h" +#include "instructions/xe_gfx_state_commands.h" #include "regs/xe_engine_regs.h" #include "regs/xe_gpu_commands.h" #include "regs/xe_lrc_layout.h" @@ -23,13 +26,28 @@ #include "xe_sriov.h" #include "xe_vm.h" -#define LRC_VALID (1 << 0) -#define LRC_PRIVILEGE (1 << 8) -#define LRC_ADDRESSING_MODE_SHIFT 3 +#define LRC_VALID BIT_ULL(0) +#define LRC_PRIVILEGE BIT_ULL(8) +#define LRC_ADDRESSING_MODE GENMASK_ULL(4, 3) #define LRC_LEGACY_64B_CONTEXT 3 -#define ENGINE_CLASS_SHIFT 61 -#define ENGINE_INSTANCE_SHIFT 48 +#define LRC_ENGINE_CLASS GENMASK_ULL(63, 61) +#define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48) + +struct xe_lrc_snapshot { + struct xe_bo *lrc_bo; + void *lrc_snapshot; + unsigned long lrc_size, lrc_offset; + + u32 context_desc; + u32 head; + struct { + u32 internal; + u32 memory; + } tail; + u32 start_seqno; + u32 seqno; +}; static struct xe_device * lrc_to_xe(struct xe_lrc *lrc) @@ -634,7 +652,7 @@ static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \ iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \ return map; \ } \ -static inline u32 __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \ +static inline u32 __maybe_unused __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \ { \ return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \ } \ @@ -724,8 +742,9 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, lrc->bo = xe_bo_create_pin_map(xe, tile, vm, ring_size + xe_lrc_size(xe, hwe->class), ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(tile) | - XE_BO_CREATE_GGTT_BIT); + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE); if (IS_ERR(lrc->bo)) return PTR_ERR(lrc->bo); @@ -776,7 +795,7 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid); lrc->desc = LRC_VALID; - lrc->desc |= LRC_LEGACY_64B_CONTEXT << LRC_ADDRESSING_MODE_SHIFT; + lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT); /* TODO: Priority */ /* While this appears to have something about privileged batches or @@ -786,8 +805,8 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, lrc->desc |= LRC_PRIVILEGE; if (GRAPHICS_VERx100(xe) < 1250) { - lrc->desc |= (u64)hwe->instance << ENGINE_INSTANCE_SHIFT; - lrc->desc |= (u64)hwe->class << ENGINE_CLASS_SHIFT; + lrc->desc |= FIELD_PREP(LRC_ENGINE_INSTANCE, hwe->instance); + lrc->desc |= FIELD_PREP(LRC_ENGINE_CLASS, hwe->class); } arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE; @@ -1034,6 +1053,8 @@ static int dump_gfxpipe_command(struct drm_printer *p, MATCH(GPGPU_CSR_BASE_ADDRESS); MATCH(STATE_COMPUTE_MODE); MATCH3D(3DSTATE_BTD); + MATCH(STATE_SYSTEM_MEM_FENCE_ADDRESS); + MATCH(STATE_CONTEXT_DATA_BASE_ADDRESS); MATCH3D(3DSTATE_VF_STATISTICS); @@ -1058,6 +1079,7 @@ static int dump_gfxpipe_command(struct drm_printer *p, MATCH3D(3DSTATE_WM); MATCH3D(3DSTATE_CONSTANT_VS); MATCH3D(3DSTATE_CONSTANT_GS); + MATCH3D(3DSTATE_CONSTANT_PS); MATCH3D(3DSTATE_SAMPLE_MASK); MATCH3D(3DSTATE_CONSTANT_HS); MATCH3D(3DSTATE_CONSTANT_DS); @@ -1150,6 +1172,31 @@ static int dump_gfxpipe_command(struct drm_printer *p, } } +static int dump_gfx_state_command(struct drm_printer *p, + struct xe_gt *gt, + u32 *dw, + int remaining_dw) +{ + u32 numdw = instr_dw(*dw); + u32 opcode = REG_FIELD_GET(GFX_STATE_OPCODE, *dw); + + /* + * Make sure we haven't mis-parsed a number of dwords that exceeds the + * remaining size of the LRC. + */ + if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) + numdw = remaining_dw; + + switch (*dw & (XE_INSTR_GFX_STATE | GFX_STATE_OPCODE)) { + MATCH(STATE_WRITE_INLINE); + + default: + drm_printf(p, "[%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n", + *dw, opcode, numdw); + return numdw; + } +} + void xe_lrc_dump_default(struct drm_printer *p, struct xe_gt *gt, enum xe_engine_class hwe_class) @@ -1174,6 +1221,8 @@ void xe_lrc_dump_default(struct drm_printer *p, num_dw = dump_mi_command(p, gt, dw, remaining_dw); } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) { num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw); + } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFX_STATE) { + num_dw = dump_gfx_state_command(p, gt, dw, remaining_dw); } else { num_dw = min(instr_dw(*dw), remaining_dw); drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n", @@ -1297,3 +1346,101 @@ void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *b bb->len += num_dw; } } + +struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) +{ + struct xe_lrc_snapshot *snapshot = kmalloc(sizeof(*snapshot), GFP_NOWAIT); + + if (!snapshot) + return NULL; + + snapshot->context_desc = lower_32_bits(xe_lrc_ggtt_addr(lrc)); + snapshot->head = xe_lrc_ring_head(lrc); + snapshot->tail.internal = lrc->ring.tail; + snapshot->tail.memory = xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL); + snapshot->start_seqno = xe_lrc_start_seqno(lrc); + snapshot->seqno = xe_lrc_seqno(lrc); + snapshot->lrc_bo = xe_bo_get(lrc->bo); + snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc); + snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset; + snapshot->lrc_snapshot = NULL; + return snapshot; +} + +void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) +{ + struct xe_bo *bo; + struct iosys_map src; + + if (!snapshot) + return; + + bo = snapshot->lrc_bo; + snapshot->lrc_bo = NULL; + + snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL); + if (!snapshot->lrc_snapshot) + goto put_bo; + + dma_resv_lock(bo->ttm.base.resv, NULL); + if (!ttm_bo_vmap(&bo->ttm, &src)) { + xe_map_memcpy_from(xe_bo_device(bo), + snapshot->lrc_snapshot, &src, snapshot->lrc_offset, + snapshot->lrc_size); + ttm_bo_vunmap(&bo->ttm, &src); + } else { + kvfree(snapshot->lrc_snapshot); + snapshot->lrc_snapshot = NULL; + } + dma_resv_unlock(bo->ttm.base.resv); +put_bo: + xe_bo_put(bo); +} + +void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p) +{ + unsigned long i; + + if (!snapshot) + return; + + drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc); + drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head); + drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n", + snapshot->tail.internal, snapshot->tail.memory); + drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno); + drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno); + + if (!snapshot->lrc_snapshot) + return; + + drm_printf(p, "\t[HWSP].length: 0x%x\n", LRC_PPHWSP_SIZE); + drm_puts(p, "\t[HWSP].data: "); + for (i = 0; i < LRC_PPHWSP_SIZE; i += sizeof(u32)) { + u32 *val = snapshot->lrc_snapshot + i; + char dumped[ASCII85_BUFSZ]; + + drm_puts(p, ascii85_encode(*val, dumped)); + } + + drm_printf(p, "\n\t[HWCTX].length: 0x%lx\n", snapshot->lrc_size - LRC_PPHWSP_SIZE); + drm_puts(p, "\t[HWCTX].data: "); + for (; i < snapshot->lrc_size; i += sizeof(u32)) { + u32 *val = snapshot->lrc_snapshot + i; + char dumped[ASCII85_BUFSZ]; + + drm_puts(p, ascii85_encode(*val, dumped)); + } + drm_puts(p, "\n"); +} + +void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot) +{ + if (!snapshot) + return; + + kvfree(snapshot->lrc_snapshot); + if (snapshot->lrc_bo) + xe_bo_put(snapshot->lrc_bo); + kfree(snapshot); +} diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index 28b1d3f404..d32fa31faa 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -55,4 +55,9 @@ void xe_lrc_dump_default(struct drm_printer *p, void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb); +struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc); +void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot); +void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p); +void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot); + #endif diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h index 24f20ed66f..b716df0dfb 100644 --- a/drivers/gpu/drm/xe/xe_lrc_types.h +++ b/drivers/gpu/drm/xe/xe_lrc_types.h @@ -43,4 +43,6 @@ struct xe_lrc { struct xe_hw_fence_ctx fence_ctx; }; +struct xe_lrc_snapshot; + #endif diff --git a/drivers/gpu/drm/xe/xe_memirq.c b/drivers/gpu/drm/xe/xe_memirq.c index 76e95535d7..95b6e9d7b7 100644 --- a/drivers/gpu/drm/xe/xe_memirq.c +++ b/drivers/gpu/drm/xe/xe_memirq.c @@ -127,10 +127,11 @@ static int memirq_alloc_pages(struct xe_memirq *memirq) /* XXX: convert to managed bo */ bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K, ttm_bo_type_kernel, - XE_BO_CREATE_SYSTEM_BIT | - XE_BO_CREATE_GGTT_BIT | - XE_BO_NEEDS_UC | - XE_BO_NEEDS_CPU_ACCESS); + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE | + XE_BO_FLAG_NEEDS_UC | + XE_BO_FLAG_NEEDS_CPU_ACCESS); if (IS_ERR(bo)) { err = PTR_ERR(bo); goto out; diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index bebfdc8897..198f5c2189 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -16,6 +16,7 @@ #include "instructions/xe_mi_commands.h" #include "regs/xe_gpu_commands.h" +#include "regs/xe_gtt_defs.h" #include "tests/xe_test.h" #include "xe_assert.h" #include "xe_bb.h" @@ -154,8 +155,8 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, bo = xe_bo_create_pin_map(vm->xe, tile, vm, num_entries * XE_PAGE_SIZE, ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(tile) | - XE_BO_CREATE_PINNED_BIT); + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_PINNED); if (IS_ERR(bo)) return PTR_ERR(bo); @@ -982,7 +983,6 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, struct xe_res_cursor src_it; struct ttm_resource *src = dst; int err; - int pass = 0; if (!clear_vram) xe_res_first_sg(xe_bo_sg(bo), 0, bo->size, &src_it); @@ -1003,8 +1003,6 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, clear_L0 = xe_migrate_res_sizes(m, &src_it); - drm_dbg(&xe->drm, "Pass %u, size: %llu\n", pass++, clear_L0); - /* Calculate final sizes and batch size.. */ batch_size = 2 + pte_update_size(m, clear_vram, src, &src_it, diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 7ba2477452..334637511e 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -163,6 +163,42 @@ static int xe_determine_lmem_bar_size(struct xe_device *xe) return 0; } +static inline u64 get_flat_ccs_offset(struct xe_gt *gt, u64 tile_size) +{ + struct xe_device *xe = gt_to_xe(gt); + u64 offset; + u32 reg; + + if (GRAPHICS_VER(xe) >= 20) { + u64 ccs_size = tile_size / 512; + u64 offset_hi, offset_lo; + u32 nodes, num_enabled; + + reg = xe_mmio_read32(gt, MIRROR_FUSE3); + nodes = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, reg); + num_enabled = hweight32(nodes); /* Number of enabled l3 nodes */ + + reg = xe_gt_mcr_unicast_read_any(gt, XE2_FLAT_CCS_BASE_RANGE_LOWER); + offset_lo = REG_FIELD_GET(XE2_FLAT_CCS_BASE_LOWER_ADDR_MASK, reg); + + reg = xe_gt_mcr_unicast_read_any(gt, XE2_FLAT_CCS_BASE_RANGE_UPPER); + offset_hi = REG_FIELD_GET(XE2_FLAT_CCS_BASE_UPPER_ADDR_MASK, reg); + + offset = offset_hi << 32; /* HW view bits 39:32 */ + offset |= offset_lo << 6; /* HW view bits 31:6 */ + offset *= num_enabled; /* convert to SW view */ + + /* We don't expect any holes */ + xe_assert_msg(xe, offset == (xe_mmio_read64_2x32(gt, GSMBASE) - ccs_size), + "Hole between CCS and GSM.\n"); + } else { + reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR); + offset = (u64)REG_FIELD_GET(XEHP_FLAT_CCS_PTR, reg) * SZ_64K; + } + + return offset; +} + /** * xe_mmio_tile_vram_size() - Collect vram size and offset information * @tile: tile to get info for @@ -207,8 +243,7 @@ static int xe_mmio_tile_vram_size(struct xe_tile *tile, u64 *vram_size, /* minus device usage */ if (xe->info.has_flat_ccs) { - reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR); - offset = (u64)REG_FIELD_GET(GENMASK(31, 8), reg) * SZ_64K; + offset = get_flat_ccs_offset(gt, *tile_size); } else { offset = xe_mmio_read64_2x32(gt, GSMBASE); } @@ -360,32 +395,9 @@ static void mmio_fini(struct drm_device *drm, void *arg) iounmap(xe->mem.vram.mapping); } -static int xe_verify_lmem_ready(struct xe_device *xe) -{ - struct xe_gt *gt = xe_root_mmio_gt(xe); - - if (!IS_DGFX(xe)) - return 0; - - if (IS_SRIOV_VF(xe)) - return 0; - - /* - * The boot firmware initializes local memory and assesses its health. - * If memory training fails, the punit will have been instructed to - * keep the GT powered down; we won't be able to communicate with it - * and we should not continue with driver initialization. - */ - if (!(xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT)) { - drm_err(&xe->drm, "VRAM not initialized by firmware\n"); - return -ENODEV; - } - - return 0; -} - int xe_mmio_init(struct xe_device *xe) { + struct xe_tile *root_tile = xe_device_get_root_tile(xe); struct pci_dev *pdev = to_pci_dev(xe->drm.dev); const int mmio_bar = 0; @@ -401,23 +413,83 @@ int xe_mmio_init(struct xe_device *xe) return -EIO; } + /* Setup first tile; other tiles (if present) will be setup later. */ + root_tile->mmio.size = SZ_16M; + root_tile->mmio.regs = xe->mmio.regs; + return drmm_add_action_or_reset(&xe->drm, mmio_fini, xe); } -int xe_mmio_root_tile_init(struct xe_device *xe) +u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg) { - struct xe_tile *root_tile = xe_device_get_root_tile(xe); - int err; + struct xe_tile *tile = gt_to_tile(gt); - /* Setup first tile; other tiles (if present) will be setup later. */ - root_tile->mmio.size = SZ_16M; - root_tile->mmio.regs = xe->mmio.regs; + if (reg.addr < gt->mmio.adj_limit) + reg.addr += gt->mmio.adj_offset; - err = xe_verify_lmem_ready(xe); - if (err) - return err; + return readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr); +} - return 0; +u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg) +{ + struct xe_tile *tile = gt_to_tile(gt); + + if (reg.addr < gt->mmio.adj_limit) + reg.addr += gt->mmio.adj_offset; + + return readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr); +} + +void xe_mmio_write32(struct xe_gt *gt, struct xe_reg reg, u32 val) +{ + struct xe_tile *tile = gt_to_tile(gt); + + if (reg.addr < gt->mmio.adj_limit) + reg.addr += gt->mmio.adj_offset; + + writel(val, (reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr); +} + +u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg) +{ + struct xe_tile *tile = gt_to_tile(gt); + + if (reg.addr < gt->mmio.adj_limit) + reg.addr += gt->mmio.adj_offset; + + return readl((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr); +} + +u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, u32 set) +{ + u32 old, reg_val; + + old = xe_mmio_read32(gt, reg); + reg_val = (old & ~clr) | set; + xe_mmio_write32(gt, reg, reg_val); + + return old; +} + +int xe_mmio_write32_and_verify(struct xe_gt *gt, + struct xe_reg reg, u32 val, u32 mask, u32 eval) +{ + u32 reg_val; + + xe_mmio_write32(gt, reg, val); + reg_val = xe_mmio_read32(gt, reg); + + return (reg_val & mask) != eval ? -EINVAL : 0; +} + +bool xe_mmio_in_range(const struct xe_gt *gt, + const struct xe_mmio_range *range, + struct xe_reg reg) +{ + if (reg.addr < gt->mmio.adj_limit) + reg.addr += gt->mmio.adj_offset; + + return range && reg.addr >= range->start && reg.addr <= range->end; } /** diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h index 98de5c13c8..a3cd7b3036 100644 --- a/drivers/gpu/drm/xe/xe_mmio.h +++ b/drivers/gpu/drm/xe/xe_mmio.h @@ -21,83 +21,15 @@ struct xe_device; #define LMEM_BAR 2 int xe_mmio_init(struct xe_device *xe); -int xe_mmio_root_tile_init(struct xe_device *xe); void xe_mmio_probe_tiles(struct xe_device *xe); -static inline u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg) -{ - struct xe_tile *tile = gt_to_tile(gt); - - if (reg.addr < gt->mmio.adj_limit) - reg.addr += gt->mmio.adj_offset; - - return readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr); -} - -static inline u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg) -{ - struct xe_tile *tile = gt_to_tile(gt); - - if (reg.addr < gt->mmio.adj_limit) - reg.addr += gt->mmio.adj_offset; - - return readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr); -} - -static inline void xe_mmio_write32(struct xe_gt *gt, - struct xe_reg reg, u32 val) -{ - struct xe_tile *tile = gt_to_tile(gt); - - if (reg.addr < gt->mmio.adj_limit) - reg.addr += gt->mmio.adj_offset; - - writel(val, (reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr); -} - -static inline u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg) -{ - struct xe_tile *tile = gt_to_tile(gt); - - if (reg.addr < gt->mmio.adj_limit) - reg.addr += gt->mmio.adj_offset; - - return readl((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + reg.addr); -} - -static inline u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, - u32 set) -{ - u32 old, reg_val; - - old = xe_mmio_read32(gt, reg); - reg_val = (old & ~clr) | set; - xe_mmio_write32(gt, reg, reg_val); - - return old; -} - -static inline int xe_mmio_write32_and_verify(struct xe_gt *gt, - struct xe_reg reg, u32 val, - u32 mask, u32 eval) -{ - u32 reg_val; - - xe_mmio_write32(gt, reg, val); - reg_val = xe_mmio_read32(gt, reg); - - return (reg_val & mask) != eval ? -EINVAL : 0; -} - -static inline bool xe_mmio_in_range(const struct xe_gt *gt, - const struct xe_mmio_range *range, - struct xe_reg reg) -{ - if (reg.addr < gt->mmio.adj_limit) - reg.addr += gt->mmio.adj_offset; - - return range && reg.addr >= range->start && reg.addr <= range->end; -} +u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg); +u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg); +void xe_mmio_write32(struct xe_gt *gt, struct xe_reg reg, u32 val); +u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg); +u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr, u32 set); +int xe_mmio_write32_and_verify(struct xe_gt *gt, struct xe_reg reg, u32 val, u32 mask, u32 eval); +bool xe_mmio_in_range(const struct xe_gt *gt, const struct xe_mmio_range *range, struct xe_reg reg); int xe_mmio_probe_vram(struct xe_device *xe); u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg); diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index 609d997b3e..1e92f8ee07 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -17,10 +17,10 @@ #include "xe_step_types.h" #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) -#define mocs_dbg drm_dbg +#define mocs_dbg xe_gt_dbg #else __printf(2, 3) -static inline void mocs_dbg(const struct drm_device *dev, +static inline void mocs_dbg(const struct xe_gt *gt, const char *format, ...) { /* noop */ } #endif @@ -72,7 +72,7 @@ struct xe_mocs_info { /* Helper defines */ #define XELP_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */ #define PVC_NUM_MOCS_ENTRIES 3 -#define MTL_NUM_MOCS_ENTRIES 16 +#define MTL_NUM_MOCS_ENTRIES 16 #define XE2_NUM_MOCS_ENTRIES 16 /* (e)LLC caching options */ @@ -375,6 +375,7 @@ static unsigned int get_mocs_settings(struct xe_device *xe, switch (xe->info.platform) { case XE_LUNARLAKE: + case XE_BATTLEMAGE: info->size = ARRAY_SIZE(xe2_mocs_table); info->table = xe2_mocs_table; info->n_entries = XE2_NUM_MOCS_ENTRIES; @@ -401,7 +402,11 @@ static unsigned int get_mocs_settings(struct xe_device *xe, info->size = ARRAY_SIZE(dg2_mocs_desc); info->table = dg2_mocs_desc; info->uc_index = 1; - info->n_entries = XELP_NUM_MOCS_ENTRIES; + /* + * Last entry is RO on hardware, don't bother with what was + * written when checking later + */ + info->n_entries = XELP_NUM_MOCS_ENTRIES - 1; info->unused_entries_index = 3; break; case XE_DG1: @@ -462,24 +467,34 @@ static u32 get_entry_control(const struct xe_mocs_info *info, return info->table[info->unused_entries_index].control_value; } -static void __init_mocs_table(struct xe_gt *gt, - const struct xe_mocs_info *info) +static bool regs_are_mcr(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); + if (xe_gt_is_media_type(gt)) + return MEDIA_VER(xe) >= 20; + else + return GRAPHICS_VERx100(xe) >= 1250; +} + +static void __init_mocs_table(struct xe_gt *gt, + const struct xe_mocs_info *info) +{ unsigned int i; u32 mocs; - mocs_dbg(>_to_xe(gt)->drm, "entries:%d\n", info->n_entries); - drm_WARN_ONCE(&xe->drm, !info->unused_entries_index, - "Unused entries index should have been defined\n"); - for (i = 0; - i < info->n_entries ? (mocs = get_entry_control(info, i)), 1 : 0; - i++) { - mocs_dbg(>_to_xe(gt)->drm, "GLOB_MOCS[%d] 0x%x 0x%x\n", i, + xe_gt_WARN_ONCE(gt, !info->unused_entries_index, + "Unused entries index should have been defined\n"); + + mocs_dbg(gt, "mocs entries: %d\n", info->n_entries); + + for (i = 0; i < info->n_entries; i++) { + mocs = get_entry_control(info, i); + + mocs_dbg(gt, "GLOB_MOCS[%d] 0x%x 0x%x\n", i, XELP_GLOBAL_MOCS(i).addr, mocs); - if (GRAPHICS_VERx100(gt_to_xe(gt)) > 1250) + if (regs_are_mcr(gt)) xe_gt_mcr_multicast_write(gt, XEHP_GLOBAL_MOCS(i), mocs); else xe_mmio_write32(gt, XELP_GLOBAL_MOCS(i), mocs); @@ -510,16 +525,16 @@ static void init_l3cc_table(struct xe_gt *gt, unsigned int i; u32 l3cc; - mocs_dbg(>_to_xe(gt)->drm, "entries:%d\n", info->n_entries); - for (i = 0; - i < (info->n_entries + 1) / 2 ? - (l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i), - get_entry_l3cc(info, 2 * i + 1))), 1 : 0; - i++) { - mocs_dbg(>_to_xe(gt)->drm, "LNCFCMOCS[%d] 0x%x 0x%x\n", i, XELP_LNCFCMOCS(i).addr, - l3cc); + mocs_dbg(gt, "l3cc entries: %d\n", info->n_entries); + + for (i = 0; i < (info->n_entries + 1) / 2; i++) { + l3cc = l3cc_combine(get_entry_l3cc(info, 2 * i), + get_entry_l3cc(info, 2 * i + 1)); - if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1250) + mocs_dbg(gt, "LNCFCMOCS[%d] 0x%x 0x%x\n", i, + XELP_LNCFCMOCS(i).addr, l3cc); + + if (regs_are_mcr(gt)) xe_gt_mcr_multicast_write(gt, XEHP_LNCFCMOCS(i), l3cc); else xe_mmio_write32(gt, XELP_LNCFCMOCS(i), l3cc); @@ -552,7 +567,10 @@ void xe_mocs_init(struct xe_gt *gt) * performed by the GuC. */ flags = get_mocs_settings(gt_to_xe(gt), &table); - mocs_dbg(>_to_xe(gt)->drm, "flag:0x%x\n", flags); + mocs_dbg(gt, "flag:0x%x\n", flags); + + if (IS_SRIOV_VF(gt_to_xe(gt))) + return; if (flags & HAS_GLOBAL_MOCS) __init_mocs_table(gt, &table); diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index 110b698646..ceb8345cbc 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -48,6 +48,13 @@ module_param_named_unsafe(force_probe, xe_modparam.force_probe, charp, 0400); MODULE_PARM_DESC(force_probe, "Force probe options for specified devices. See CONFIG_DRM_XE_FORCE_PROBE for details."); +#ifdef CONFIG_PCI_IOV +module_param_named(max_vfs, xe_modparam.max_vfs, uint, 0400); +MODULE_PARM_DESC(max_vfs, + "Limit number of Virtual Functions (VFs) that could be managed. " + "(0 = no VFs [default]; N = allow up to N VFs)"); +#endif + struct init_funcs { int (*init)(void); void (*exit)(void); diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h index 88ef0e8b2b..b369984f08 100644 --- a/drivers/gpu/drm/xe/xe_module.h +++ b/drivers/gpu/drm/xe/xe_module.h @@ -18,6 +18,9 @@ struct xe_modparam { char *huc_firmware_path; char *gsc_firmware_path; char *force_probe; +#ifdef CONFIG_PCI_IOV + unsigned int max_vfs; +#endif }; extern struct xe_modparam xe_modparam; diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index 351ab902eb..d5b516f115 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -142,6 +142,7 @@ static const struct xe_pat_table_entry xe2_pat_table[] = { /* Special PAT values programmed outside the main table */ static const struct xe_pat_table_entry xe2_pat_ats = XE2_PAT( 0, 0, 0, 0, 3, 3 ); +static const struct xe_pat_table_entry xe2_pat_pta = XE2_PAT( 0, 0, 0, 0, 3, 0 ); u16 xe_pat_index_get_coh_mode(struct xe_device *xe, u16 pat_index) { @@ -174,7 +175,6 @@ static void xelp_dump(struct xe_gt *gt, struct drm_printer *p) struct xe_device *xe = gt_to_xe(gt); int i, err; - xe_device_mem_access_get(xe); err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (err) goto err_fw; @@ -192,7 +192,6 @@ static void xelp_dump(struct xe_gt *gt, struct drm_printer *p) err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); err_fw: xe_assert(xe, !err); - xe_device_mem_access_put(xe); } static const struct xe_pat_ops xelp_pat_ops = { @@ -205,7 +204,6 @@ static void xehp_dump(struct xe_gt *gt, struct drm_printer *p) struct xe_device *xe = gt_to_xe(gt); int i, err; - xe_device_mem_access_get(xe); err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (err) goto err_fw; @@ -225,7 +223,6 @@ static void xehp_dump(struct xe_gt *gt, struct drm_printer *p) err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); err_fw: xe_assert(xe, !err); - xe_device_mem_access_put(xe); } static const struct xe_pat_ops xehp_pat_ops = { @@ -238,7 +235,6 @@ static void xehpc_dump(struct xe_gt *gt, struct drm_printer *p) struct xe_device *xe = gt_to_xe(gt); int i, err; - xe_device_mem_access_get(xe); err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (err) goto err_fw; @@ -256,7 +252,6 @@ static void xehpc_dump(struct xe_gt *gt, struct drm_printer *p) err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); err_fw: xe_assert(xe, !err); - xe_device_mem_access_put(xe); } static const struct xe_pat_ops xehpc_pat_ops = { @@ -269,7 +264,6 @@ static void xelpg_dump(struct xe_gt *gt, struct drm_printer *p) struct xe_device *xe = gt_to_xe(gt); int i, err; - xe_device_mem_access_get(xe); err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (err) goto err_fw; @@ -292,7 +286,6 @@ static void xelpg_dump(struct xe_gt *gt, struct drm_printer *p) err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); err_fw: xe_assert(xe, !err); - xe_device_mem_access_put(xe); } /* @@ -310,6 +303,9 @@ static void xe2lpg_program_pat(struct xe_gt *gt, const struct xe_pat_table_entry { program_pat_mcr(gt, table, n_entries); xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_ATS), xe2_pat_ats.value); + + if (IS_DGFX(gt_to_xe(gt))) + xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_PTA), xe2_pat_pta.value); } static void xe2lpm_program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[], @@ -317,6 +313,9 @@ static void xe2lpm_program_pat(struct xe_gt *gt, const struct xe_pat_table_entry { program_pat(gt, table, n_entries); xe_mmio_write32(gt, XE_REG(_PAT_ATS), xe2_pat_ats.value); + + if (IS_DGFX(gt_to_xe(gt))) + xe_mmio_write32(gt, XE_REG(_PAT_PTA), xe2_pat_pta.value); } static void xe2_dump(struct xe_gt *gt, struct drm_printer *p) @@ -325,7 +324,6 @@ static void xe2_dump(struct xe_gt *gt, struct drm_printer *p) int i, err; u32 pat; - xe_device_mem_access_get(xe); err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); if (err) goto err_fw; @@ -370,7 +368,6 @@ static void xe2_dump(struct xe_gt *gt, struct drm_printer *p) err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); err_fw: xe_assert(xe, !err); - xe_device_mem_access_put(xe); } static const struct xe_pat_ops xe2_pat_ops = { @@ -438,6 +435,10 @@ void xe_pat_init_early(struct xe_device *xe) /* VFs can't program nor dump PAT settings */ if (IS_SRIOV_VF(xe)) xe->pat.ops = NULL; + + xe_assert(xe, !xe->pat.ops || xe->pat.ops->dump); + xe_assert(xe, !xe->pat.ops || xe->pat.ops->program_graphics); + xe_assert(xe, !xe->pat.ops || MEDIA_VER(xe) < 13 || xe->pat.ops->program_media); } void xe_pat_init(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 557f2d88a8..f326dbb1ce 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -174,7 +174,7 @@ static const struct xe_graphics_desc graphics_xelpg = { GENMASK(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0) static const struct xe_graphics_desc graphics_xe2 = { - .name = "Xe2_LPG", + .name = "Xe2_LPG / Xe2_HPG", XE2_GFX_FEATURES, }; @@ -185,8 +185,8 @@ static const struct xe_media_desc media_xem = { .rel = 0, .hw_engine_mask = - BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) | - BIT(XE_HW_ENGINE_VECS0), + GENMASK(XE_HW_ENGINE_VCS7, XE_HW_ENGINE_VCS0) | + GENMASK(XE_HW_ENGINE_VECS3, XE_HW_ENGINE_VECS0), }; static const struct xe_media_desc media_xehpm = { @@ -195,21 +195,23 @@ static const struct xe_media_desc media_xehpm = { .rel = 55, .hw_engine_mask = - BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) | - BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_VECS1), + GENMASK(XE_HW_ENGINE_VCS7, XE_HW_ENGINE_VCS0) | + GENMASK(XE_HW_ENGINE_VECS3, XE_HW_ENGINE_VECS0), }; static const struct xe_media_desc media_xelpmp = { .name = "Xe_LPM+", .hw_engine_mask = - BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VCS2) | - BIT(XE_HW_ENGINE_VECS0) | BIT(XE_HW_ENGINE_GSCCS0) + GENMASK(XE_HW_ENGINE_VCS7, XE_HW_ENGINE_VCS0) | + GENMASK(XE_HW_ENGINE_VECS3, XE_HW_ENGINE_VECS0) | + BIT(XE_HW_ENGINE_GSCCS0) }; static const struct xe_media_desc media_xe2 = { - .name = "Xe2_LPM", + .name = "Xe2_LPM / Xe2_HPM", .hw_engine_mask = - BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VECS0), /* TODO: GSC0 */ + GENMASK(XE_HW_ENGINE_VCS7, XE_HW_ENGINE_VCS0) | + GENMASK(XE_HW_ENGINE_VECS3, XE_HW_ENGINE_VECS0), /* TODO: GSC0 */ }; static const struct xe_device_desc tgl_desc = { @@ -333,6 +335,13 @@ static const struct xe_device_desc mtl_desc = { static const struct xe_device_desc lnl_desc = { PLATFORM(XE_LUNARLAKE), + .has_display = true, + .require_force_probe = true, +}; + +static const struct xe_device_desc bmg_desc __maybe_unused = { + DGFX_FEATURES, + PLATFORM(XE_BATTLEMAGE), .require_force_probe = true, }; @@ -343,12 +352,15 @@ __diag_pop(); static const struct gmdid_map graphics_ip_map[] = { { 1270, &graphics_xelpg }, { 1271, &graphics_xelpg }, + { 1274, &graphics_xelpg }, /* Xe_LPG+ */ + { 2001, &graphics_xe2 }, { 2004, &graphics_xe2 }, }; /* Map of GMD_ID values to media IP */ static const struct gmdid_map media_ip_map[] = { { 1300, &media_xelpmp }, + { 1301, &media_xe2 }, { 2000, &media_xe2 }, }; @@ -737,8 +749,6 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) return err; - xe_sriov_probe_early(xe, desc->has_sriov); - err = xe_device_probe_early(xe); if (err) return err; @@ -774,18 +784,26 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) str_yes_no(xe_device_has_sriov(xe)), xe_sriov_mode_to_string(xe_device_sriov_mode(xe))); - xe_pm_init_early(xe); + err = xe_pm_init_early(xe); + if (err) + return err; err = xe_device_probe(xe); if (err) return err; - xe_pm_init(xe); + err = xe_pm_init(xe); + if (err) + goto err_driver_cleanup; drm_dbg(&xe->drm, "d3cold: capable=%s\n", str_yes_no(xe->d3cold.capable)); return 0; + +err_driver_cleanup: + xe_pci_remove(pdev); + return err; } static void xe_pci_shutdown(struct pci_dev *pdev) diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c index 81f4ae2ea0..a5e7da8cf9 100644 --- a/drivers/gpu/drm/xe/xe_pcode.c +++ b/drivers/gpu/drm/xe/xe_pcode.c @@ -71,7 +71,7 @@ static int __pcode_mailbox_rw(struct xe_gt *gt, u32 mbox, u32 *data0, u32 *data1 xe_mmio_write32(gt, PCODE_MAILBOX, PCODE_READY | mbox); err = xe_mmio_wait32(gt, PCODE_MAILBOX, PCODE_READY, 0, - timeout_ms * 1000, NULL, atomic); + timeout_ms * USEC_PER_MSEC, NULL, atomic); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_platform_types.h b/drivers/gpu/drm/xe/xe_platform_types.h index 553f53dbd0..79b7042c45 100644 --- a/drivers/gpu/drm/xe/xe_platform_types.h +++ b/drivers/gpu/drm/xe/xe_platform_types.h @@ -22,6 +22,7 @@ enum xe_platform { XE_PVC, XE_METEORLAKE, XE_LUNARLAKE, + XE_BATTLEMAGE, }; enum xe_subplatform { diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 944cf4d760..37fbeda12d 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -25,23 +25,55 @@ /** * DOC: Xe Power Management * - * Xe PM shall be guided by the simplicity. - * Use the simplest hook options whenever possible. - * Let's not reinvent the runtime_pm references and hooks. - * Shall have a clear separation of display and gt underneath this component. + * Xe PM implements the main routines for both system level suspend states and + * for the opportunistic runtime suspend states. * - * What's next: + * System Level Suspend (S-States) - In general this is OS initiated suspend + * driven by ACPI for achieving S0ix (a.k.a. S2idle, freeze), S3 (suspend to ram), + * S4 (disk). The main functions here are `xe_pm_suspend` and `xe_pm_resume`. They + * are the main point for the suspend to and resume from these states. * - * For now s2idle and s3 are only working in integrated devices. The next step - * is to iterate through all VRAM's BO backing them up into the system memory - * before allowing the system suspend. + * PCI Device Suspend (D-States) - This is the opportunistic PCIe device low power + * state D3, controlled by the PCI subsystem and ACPI with the help from the + * runtime_pm infrastructure. + * PCI D3 is special and can mean D3hot, where Vcc power is on for keeping memory + * alive and quicker low latency resume or D3Cold where Vcc power is off for + * better power savings. + * The Vcc control of PCI hierarchy can only be controlled at the PCI root port + * level, while the device driver can be behind multiple bridges/switches and + * paired with other devices. For this reason, the PCI subsystem cannot perform + * the transition towards D3Cold. The lowest runtime PM possible from the PCI + * subsystem is D3hot. Then, if all these paired devices in the same root port + * are in D3hot, ACPI will assist here and run its own methods (_PR3 and _OFF) + * to perform the transition from D3hot to D3cold. Xe may disallow this + * transition by calling pci_d3cold_disable(root_pdev) before going to runtime + * suspend. It will be based on runtime conditions such as VRAM usage for a + * quick and low latency resume for instance. * - * Also runtime_pm needs to be here from the beginning. + * Runtime PM - This infrastructure provided by the Linux kernel allows the + * device drivers to indicate when the can be runtime suspended, so the device + * could be put at D3 (if supported), or allow deeper package sleep states + * (PC-states), and/or other low level power states. Xe PM component provides + * `xe_pm_runtime_suspend` and `xe_pm_runtime_resume` functions that PCI + * subsystem will call before transition to/from runtime suspend. * - * RC6/RPS are also critical PM features. Let's start with GuCRC and GuC SLPC - * and no wait boost. Frequency optimizations should come on a next stage. + * Also, Xe PM provides get and put functions that Xe driver will use to + * indicate activity. In order to avoid locking complications with the memory + * management, whenever possible, these get and put functions needs to be called + * from the higher/outer levels. + * The main cases that need to be protected from the outer levels are: IOCTL, + * sysfs, debugfs, dma-buf sharing, GPU execution. + * + * This component is not responsible for GT idleness (RC6) nor GT frequency + * management (RPS). */ +#ifdef CONFIG_LOCKDEP +struct lockdep_map xe_pm_runtime_lockdep_map = { + .name = "xe_pm_runtime_lockdep_map" +}; +#endif + /** * xe_pm_suspend - Helper for System suspend, i.e. S0->S3 / S0->S2idle * @xe: xe device instance @@ -182,30 +214,60 @@ static void xe_pm_runtime_init(struct xe_device *xe) pm_runtime_put(dev); } -void xe_pm_init_early(struct xe_device *xe) +int xe_pm_init_early(struct xe_device *xe) { + int err; + INIT_LIST_HEAD(&xe->mem_access.vram_userfault.list); - drmm_mutex_init(&xe->drm, &xe->mem_access.vram_userfault.lock); + + err = drmm_mutex_init(&xe->drm, &xe->mem_access.vram_userfault.lock); + if (err) + return err; + + err = drmm_mutex_init(&xe->drm, &xe->d3cold.lock); + if (err) + return err; + + return 0; } -void xe_pm_init(struct xe_device *xe) +/** + * xe_pm_init - Initialize Xe Power Management + * @xe: xe device instance + * + * This component is responsible for System and Device sleep states. + * + * Returns 0 for success, negative error code otherwise. + */ +int xe_pm_init(struct xe_device *xe) { + int err; + /* For now suspend/resume is only allowed with GuC */ if (!xe_device_uc_enabled(xe)) - return; - - drmm_mutex_init(&xe->drm, &xe->d3cold.lock); + return 0; xe->d3cold.capable = xe_pm_pci_d3cold_capable(xe); if (xe->d3cold.capable) { - xe_device_sysfs_init(xe); - xe_pm_set_vram_threshold(xe, DEFAULT_VRAM_THRESHOLD); + err = xe_device_sysfs_init(xe); + if (err) + return err; + + err = xe_pm_set_vram_threshold(xe, DEFAULT_VRAM_THRESHOLD); + if (err) + return err; } xe_pm_runtime_init(xe); + + return 0; } +/** + * xe_pm_runtime_fini - Finalize Runtime PM + * @xe: xe device instance + */ void xe_pm_runtime_fini(struct xe_device *xe) { struct device *dev = xe->drm.dev; @@ -235,6 +297,28 @@ struct task_struct *xe_pm_read_callback_task(struct xe_device *xe) return READ_ONCE(xe->pm_callback_task); } +/** + * xe_pm_runtime_suspended - Check if runtime_pm state is suspended + * @xe: xe device instance + * + * This does not provide any guarantee that the device is going to remain + * suspended as it might be racing with the runtime state transitions. + * It can be used only as a non-reliable assertion, to ensure that we are not in + * the sleep state while trying to access some memory for instance. + * + * Returns true if PCI device is suspended, false otherwise. + */ +bool xe_pm_runtime_suspended(struct xe_device *xe) +{ + return pm_runtime_suspended(xe->drm.dev); +} + +/** + * xe_pm_runtime_suspend - Prepare our device for D3hot/D3Cold + * @xe: xe device instance + * + * Returns 0 for success, negative error code otherwise. + */ int xe_pm_runtime_suspend(struct xe_device *xe) { struct xe_bo *bo, *on; @@ -242,18 +326,15 @@ int xe_pm_runtime_suspend(struct xe_device *xe) u8 id; int err = 0; - if (xe->d3cold.allowed && xe_device_mem_access_ongoing(xe)) - return -EBUSY; - /* Disable access_ongoing asserts and prevent recursive pm calls */ xe_pm_write_callback_task(xe, current); /* - * The actual xe_device_mem_access_put() is always async underneath, so + * The actual xe_pm_runtime_put() is always async underneath, so * exactly where that is called should makes no difference to us. However * we still need to be very careful with the locks that this callback * acquires and the locks that are acquired and held by any callers of - * xe_device_mem_access_get(). We already have the matching annotation + * xe_runtime_pm_get(). We already have the matching annotation * on that side, but we also need it here. For example lockdep should be * able to tell us if the following scenario is in theory possible: * @@ -261,15 +342,15 @@ int xe_pm_runtime_suspend(struct xe_device *xe) * lock(A) | * | xe_pm_runtime_suspend() * | lock(A) - * xe_device_mem_access_get() | + * xe_pm_runtime_get() | * * This will clearly deadlock since rpm core needs to wait for * xe_pm_runtime_suspend() to complete, but here we are holding lock(A) * on CPU0 which prevents CPU1 making forward progress. With the - * annotation here and in xe_device_mem_access_get() lockdep will see + * annotation here and in xe_pm_runtime_get() lockdep will see * the potential lock inversion and give us a nice splat. */ - lock_map_acquire(&xe_device_mem_access_lockdep_map); + lock_map_acquire(&xe_pm_runtime_lockdep_map); /* * Applying lock for entire list op as xe_ttm_bo_destroy and xe_bo_move_notify @@ -295,11 +376,17 @@ int xe_pm_runtime_suspend(struct xe_device *xe) xe_irq_suspend(xe); out: - lock_map_release(&xe_device_mem_access_lockdep_map); + lock_map_release(&xe_pm_runtime_lockdep_map); xe_pm_write_callback_task(xe, NULL); return err; } +/** + * xe_pm_runtime_resume - Waking up from D3hot/D3Cold + * @xe: xe device instance + * + * Returns 0 for success, negative error code otherwise. + */ int xe_pm_runtime_resume(struct xe_device *xe) { struct xe_gt *gt; @@ -309,7 +396,7 @@ int xe_pm_runtime_resume(struct xe_device *xe) /* Disable access_ongoing asserts and prevent recursive pm calls */ xe_pm_write_callback_task(xe, current); - lock_map_acquire(&xe_device_mem_access_lockdep_map); + lock_map_acquire(&xe_pm_runtime_lockdep_map); /* * It can be possible that xe has allowed d3cold but other pcie devices @@ -344,27 +431,147 @@ int xe_pm_runtime_resume(struct xe_device *xe) goto out; } out: - lock_map_release(&xe_device_mem_access_lockdep_map); + lock_map_release(&xe_pm_runtime_lockdep_map); xe_pm_write_callback_task(xe, NULL); return err; } -int xe_pm_runtime_get(struct xe_device *xe) +/* + * For places where resume is synchronous it can be quite easy to deadlock + * if we are not careful. Also in practice it might be quite timing + * sensitive to ever see the 0 -> 1 transition with the callers locks + * held, so deadlocks might exist but are hard for lockdep to ever see. + * With this in mind, help lockdep learn about the potentially scary + * stuff that can happen inside the runtime_resume callback by acquiring + * a dummy lock (it doesn't protect anything and gets compiled out on + * non-debug builds). Lockdep then only needs to see the + * xe_pm_runtime_lockdep_map -> runtime_resume callback once, and then can + * hopefully validate all the (callers_locks) -> xe_pm_runtime_lockdep_map. + * For example if the (callers_locks) are ever grabbed in the + * runtime_resume callback, lockdep should give us a nice splat. + */ +static void pm_runtime_lockdep_prime(void) { - return pm_runtime_get_sync(xe->drm.dev); + lock_map_acquire(&xe_pm_runtime_lockdep_map); + lock_map_release(&xe_pm_runtime_lockdep_map); } -int xe_pm_runtime_put(struct xe_device *xe) +/** + * xe_pm_runtime_get - Get a runtime_pm reference and resume synchronously + * @xe: xe device instance + */ +void xe_pm_runtime_get(struct xe_device *xe) { - pm_runtime_mark_last_busy(xe->drm.dev); - return pm_runtime_put(xe->drm.dev); + pm_runtime_get_noresume(xe->drm.dev); + + if (xe_pm_read_callback_task(xe) == current) + return; + + pm_runtime_lockdep_prime(); + pm_runtime_resume(xe->drm.dev); } +/** + * xe_pm_runtime_put - Put the runtime_pm reference back and mark as idle + * @xe: xe device instance + */ +void xe_pm_runtime_put(struct xe_device *xe) +{ + if (xe_pm_read_callback_task(xe) == current) { + pm_runtime_put_noidle(xe->drm.dev); + } else { + pm_runtime_mark_last_busy(xe->drm.dev); + pm_runtime_put(xe->drm.dev); + } +} + +/** + * xe_pm_runtime_get_ioctl - Get a runtime_pm reference before ioctl + * @xe: xe device instance + * + * Returns: Any number greater than or equal to 0 for success, negative error + * code otherwise. + */ +int xe_pm_runtime_get_ioctl(struct xe_device *xe) +{ + if (WARN_ON(xe_pm_read_callback_task(xe) == current)) + return -ELOOP; + + pm_runtime_lockdep_prime(); + return pm_runtime_get_sync(xe->drm.dev); +} + +/** + * xe_pm_runtime_get_if_active - Get a runtime_pm reference if device active + * @xe: xe device instance + * + * Returns: Any number greater than or equal to 0 for success, negative error + * code otherwise. + */ int xe_pm_runtime_get_if_active(struct xe_device *xe) { return pm_runtime_get_if_active(xe->drm.dev); } +/** + * xe_pm_runtime_get_if_in_use - Get a runtime_pm reference and resume if needed + * @xe: xe device instance + * + * Returns: True if device is awake and the reference was taken, false otherwise. + */ +bool xe_pm_runtime_get_if_in_use(struct xe_device *xe) +{ + if (xe_pm_read_callback_task(xe) == current) { + /* The device is awake, grab the ref and move on */ + pm_runtime_get_noresume(xe->drm.dev); + return true; + } + + return pm_runtime_get_if_in_use(xe->drm.dev) > 0; +} + +/** + * xe_pm_runtime_get_noresume - Bump runtime PM usage counter without resuming + * @xe: xe device instance + * + * This function should be used in inner places where it is surely already + * protected by outer-bound callers of `xe_pm_runtime_get`. + * It will warn if not protected. + * The reference should be put back after this function regardless, since it + * will always bump the usage counter, regardless. + */ +void xe_pm_runtime_get_noresume(struct xe_device *xe) +{ + bool ref; + + ref = xe_pm_runtime_get_if_in_use(xe); + + if (drm_WARN(&xe->drm, !ref, "Missing outer runtime PM protection\n")) + pm_runtime_get_noresume(xe->drm.dev); +} + +/** + * xe_pm_runtime_resume_and_get - Resume, then get a runtime_pm ref if awake. + * @xe: xe device instance + * + * Returns: True if device is awake and the reference was taken, false otherwise. + */ +bool xe_pm_runtime_resume_and_get(struct xe_device *xe) +{ + if (xe_pm_read_callback_task(xe) == current) { + /* The device is awake, grab the ref and move on */ + pm_runtime_get_noresume(xe->drm.dev); + return true; + } + + pm_runtime_lockdep_prime(); + return pm_runtime_resume_and_get(xe->drm.dev) >= 0; +} + +/** + * xe_pm_assert_unbounded_bridge - Disable PM on unbounded pcie parent bridge + * @xe: xe device instance + */ void xe_pm_assert_unbounded_bridge(struct xe_device *xe) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -379,6 +586,13 @@ void xe_pm_assert_unbounded_bridge(struct xe_device *xe) } } +/** + * xe_pm_set_vram_threshold - Set a vram threshold for allowing/blocking D3Cold + * @xe: xe device instance + * @threshold: VRAM size in bites for the D3cold threshold + * + * Returns 0 for success, negative error code otherwise. + */ int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold) { struct ttm_resource_manager *man; @@ -403,6 +617,13 @@ int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold) return 0; } +/** + * xe_pm_d3cold_allowed_toggle - Check conditions to toggle d3cold.allowed + * @xe: xe device instance + * + * To be called during runtime_pm idle callback. + * Check for all the D3Cold conditions ahead of runtime suspend. + */ void xe_pm_d3cold_allowed_toggle(struct xe_device *xe) { struct ttm_resource_manager *man; diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h index 64a97c6726..18b0613fe5 100644 --- a/drivers/gpu/drm/xe/xe_pm.h +++ b/drivers/gpu/drm/xe/xe_pm.h @@ -20,14 +20,19 @@ struct xe_device; int xe_pm_suspend(struct xe_device *xe); int xe_pm_resume(struct xe_device *xe); -void xe_pm_init_early(struct xe_device *xe); -void xe_pm_init(struct xe_device *xe); +int xe_pm_init_early(struct xe_device *xe); +int xe_pm_init(struct xe_device *xe); void xe_pm_runtime_fini(struct xe_device *xe); +bool xe_pm_runtime_suspended(struct xe_device *xe); int xe_pm_runtime_suspend(struct xe_device *xe); int xe_pm_runtime_resume(struct xe_device *xe); -int xe_pm_runtime_get(struct xe_device *xe); -int xe_pm_runtime_put(struct xe_device *xe); +void xe_pm_runtime_get(struct xe_device *xe); +int xe_pm_runtime_get_ioctl(struct xe_device *xe); +void xe_pm_runtime_put(struct xe_device *xe); int xe_pm_runtime_get_if_active(struct xe_device *xe); +bool xe_pm_runtime_get_if_in_use(struct xe_device *xe); +void xe_pm_runtime_get_noresume(struct xe_device *xe); +bool xe_pm_runtime_resume_and_get(struct xe_device *xe); void xe_pm_assert_unbounded_bridge(struct xe_device *xe); int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold); void xe_pm_d3cold_allowed_toggle(struct xe_device *xe); diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index 4efc8c1a3d..5b7930f46c 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -5,6 +5,7 @@ #include "xe_pt.h" +#include "regs/xe_gtt_defs.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_drm_client.h" @@ -108,11 +109,11 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, pt->level = level; bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K, ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(tile) | - XE_BO_CREATE_IGNORE_MIN_PAGE_SIZE_BIT | - XE_BO_CREATE_PINNED_BIT | - XE_BO_CREATE_NO_RESV_EVICT | - XE_BO_PAGETABLE); + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE | + XE_BO_FLAG_PINNED | + XE_BO_FLAG_NO_RESV_EVICT | + XE_BO_FLAG_PAGETABLE); if (IS_ERR(bo)) { err = PTR_ERR(bo); goto err_kfree; @@ -618,7 +619,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; int ret; - if (vma && (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) && + if ((vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) && (is_devmem || !IS_DGFX(xe))) xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 075f9eaef0..df407d73e5 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -12,6 +12,7 @@ #include <drm/xe_drm.h> #include "regs/xe_engine_regs.h" +#include "regs/xe_gt_regs.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_exec_queue.h" @@ -147,8 +148,8 @@ query_engine_cycles(struct xe_device *xe, if (!hwe) return -EINVAL; - xe_device_mem_access_get(xe); - xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)) + return -EIO; __read_timestamps(gt, RING_TIMESTAMP(hwe->mmio_base), @@ -159,7 +160,6 @@ query_engine_cycles(struct xe_device *xe, cpu_clock); xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); - xe_device_mem_access_put(xe); resp.width = 36; /* Only write to the output fields of user query */ @@ -403,6 +403,13 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query BIT(gt_to_tile(gt)->id) << 1; gt_list->gt_list[id].far_mem_regions = xe->info.mem_region_mask ^ gt_list->gt_list[id].near_mem_regions; + + gt_list->gt_list[id].ip_ver_major = + REG_FIELD_GET(GMD_ID_ARCH_MASK, gt->info.gmdid); + gt_list->gt_list[id].ip_ver_minor = + REG_FIELD_GET(GMD_ID_RELEASE_MASK, gt->info.gmdid); + gt_list->gt_list[id].ip_ver_rev = + REG_FIELD_GET(GMD_ID_REVID, gt->info.gmdid); } if (copy_to_user(query_ptr, gt_list, size)) { @@ -433,9 +440,7 @@ static int query_hwconfig(struct xe_device *xe, if (!hwconfig) return -ENOMEM; - xe_device_mem_access_get(xe); xe_guc_hwconfig_copy(>->uc.guc, hwconfig); - xe_device_mem_access_put(xe); if (copy_to_user(query_ptr, hwconfig, size)) { kfree(hwconfig); @@ -544,14 +549,44 @@ query_uc_fw_version(struct xe_device *xe, struct drm_xe_device_query *query) version = &guc->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY]; break; } + case XE_QUERY_UC_TYPE_HUC: { + struct xe_gt *media_gt = NULL; + struct xe_huc *huc; + + if (MEDIA_VER(xe) >= 13) { + struct xe_tile *tile; + u8 gt_id; + + for_each_tile(tile, xe, gt_id) { + if (tile->media_gt) { + media_gt = tile->media_gt; + break; + } + } + } else { + media_gt = xe->tiles[0].primary_gt; + } + + if (!media_gt) + break; + + huc = &media_gt->uc.huc; + if (huc->fw.status == XE_UC_FIRMWARE_RUNNING) + version = &huc->fw.versions.found[XE_UC_FW_VER_RELEASE]; + break; + } default: return -EINVAL; } - resp.branch_ver = 0; - resp.major_ver = version->major; - resp.minor_ver = version->minor; - resp.patch_ver = version->patch; + if (version) { + resp.branch_ver = 0; + resp.major_ver = version->major; + resp.minor_ver = version->minor; + resp.patch_ver = version->patch; + } else { + return -ENODEV; + } if (copy_to_user(query_ptr, &resp, size)) return -EFAULT; diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index 820c8d73c4..aca7a9af6e 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -17,6 +17,7 @@ #include "xe_lrc.h" #include "xe_macros.h" #include "xe_sched_job.h" +#include "xe_sriov.h" #include "xe_vm_types.h" #include "xe_vm.h" #include "xe_wa.h" @@ -381,10 +382,12 @@ static void emit_migration_job_gen12(struct xe_sched_job *job, i = emit_bb_start(job->batch_addr[0], BIT(8), dw, i); - /* XXX: Do we need this? Leaving for now. */ - dw[i++] = preparser_disable(true); - i = emit_flush_invalidate(0, dw, i); - dw[i++] = preparser_disable(false); + if (!IS_SRIOV_VF(gt_to_xe(job->q->gt))) { + /* XXX: Do we need this? Leaving for now. */ + dw[i++] = preparser_disable(true); + i = emit_flush_invalidate(0, dw, i); + dw[i++] = preparser_disable(false); + } i = emit_bb_start(job->batch_addr[1], BIT(8), dw, i); diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c index 2c4632259e..8941522b77 100644 --- a/drivers/gpu/drm/xe/xe_sa.c +++ b/drivers/gpu/drm/xe/xe_sa.c @@ -48,8 +48,9 @@ struct xe_sa_manager *xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u32 sa_manager->bo = NULL; bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, - XE_BO_CREATE_VRAM_IF_DGFX(tile) | - XE_BO_CREATE_GGTT_BIT); + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE); if (IS_ERR(bo)) { drm_err(&xe->drm, "failed to allocate bo for sa manager: %ld\n", PTR_ERR(bo)); diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index b0c7fa4693..cd8a2fba54 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -5,6 +5,7 @@ #include "xe_sched_job.h" +#include <drm/xe_drm.h> #include <linux/dma-fence-array.h> #include <linux/slab.h> @@ -15,6 +16,8 @@ #include "xe_hw_fence.h" #include "xe_lrc.h" #include "xe_macros.h" +#include "xe_pm.h" +#include "xe_sync_types.h" #include "xe_trace.h" #include "xe_vm.h" @@ -157,7 +160,7 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q, /* All other jobs require a VM to be open which has a ref */ if (unlikely(q->flags & EXEC_QUEUE_FLAG_KERNEL)) - xe_device_mem_access_get(job_to_xe(job)); + xe_pm_runtime_get_noresume(job_to_xe(job)); xe_device_assert_mem_access(job_to_xe(job)); trace_xe_sched_job_create(job); @@ -190,7 +193,7 @@ void xe_sched_job_destroy(struct kref *ref) container_of(ref, struct xe_sched_job, refcount); if (unlikely(job->q->flags & EXEC_QUEUE_FLAG_KERNEL)) - xe_device_mem_access_put(job_to_xe(job)); + xe_pm_runtime_put(job_to_xe(job)); xe_exec_queue_put(job->q); dma_fence_put(job->fence); drm_sched_job_cleanup(&job->drm); @@ -288,6 +291,22 @@ int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm) return drm_sched_job_add_dependency(&job->drm, fence); } +/** + * xe_sched_job_init_user_fence - Initialize user_fence for the job + * @job: job whose user_fence needs an init + * @sync: sync to be use to init user_fence + */ +void xe_sched_job_init_user_fence(struct xe_sched_job *job, + struct xe_sync_entry *sync) +{ + if (sync->type != DRM_XE_SYNC_TYPE_USER_FENCE) + return; + + job->user_fence.used = true; + job->user_fence.addr = sync->addr; + job->user_fence.value = sync->timeline_value; +} + struct xe_sched_job_snapshot * xe_sched_job_snapshot_capture(struct xe_sched_job *job) { diff --git a/drivers/gpu/drm/xe/xe_sched_job.h b/drivers/gpu/drm/xe/xe_sched_job.h index f1a660648c..c75018f466 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.h +++ b/drivers/gpu/drm/xe/xe_sched_job.h @@ -10,6 +10,7 @@ struct drm_printer; struct xe_vm; +struct xe_sync_entry; #define XE_SCHED_HANG_LIMIT 1 #define XE_SCHED_JOB_TIMEOUT LONG_MAX @@ -58,6 +59,8 @@ void xe_sched_job_arm(struct xe_sched_job *job); void xe_sched_job_push(struct xe_sched_job *job); int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm); +void xe_sched_job_init_user_fence(struct xe_sched_job *job, + struct xe_sync_entry *sync); static inline struct xe_sched_job * to_xe_sched_job(struct drm_sched_job *drm) diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c index f295d91886..1c3fa84b6a 100644 --- a/drivers/gpu/drm/xe/xe_sriov.c +++ b/drivers/gpu/drm/xe/xe_sriov.c @@ -5,8 +5,13 @@ #include <drm/drm_managed.h> +#include "regs/xe_sriov_regs.h" + #include "xe_assert.h" +#include "xe_device.h" +#include "xe_mmio.h" #include "xe_sriov.h" +#include "xe_sriov_pf.h" /** * xe_sriov_mode_to_string - Convert enum value to string. @@ -28,10 +33,16 @@ const char *xe_sriov_mode_to_string(enum xe_sriov_mode mode) } } +static bool test_is_vf(struct xe_device *xe) +{ + u32 value = xe_mmio_read32(xe_root_mmio_gt(xe), VF_CAP_REG); + + return value & VF_CAP; +} + /** * xe_sriov_probe_early - Probe a SR-IOV mode. * @xe: the &xe_device to probe mode on - * @has_sriov: flag indicating hardware support for SR-IOV * * This function should be called only once and as soon as possible during * driver probe to detect whether we are running a SR-IOV Physical Function @@ -40,12 +51,17 @@ const char *xe_sriov_mode_to_string(enum xe_sriov_mode mode) * SR-IOV PF mode detection is based on PCI @dev_is_pf() function. * SR-IOV VF mode detection is based on dedicated MMIO register read. */ -void xe_sriov_probe_early(struct xe_device *xe, bool has_sriov) +void xe_sriov_probe_early(struct xe_device *xe) { enum xe_sriov_mode mode = XE_SRIOV_MODE_NONE; + bool has_sriov = xe->info.has_sriov; - /* TODO: replace with proper mode detection */ - xe_assert(xe, !has_sriov); + if (has_sriov) { + if (test_is_vf(xe)) + mode = XE_SRIOV_MODE_VF; + else if (xe_sriov_pf_readiness(xe)) + mode = XE_SRIOV_MODE_PF; + } xe_assert(xe, !xe->sriov.__mode); xe->sriov.__mode = mode; @@ -78,6 +94,13 @@ int xe_sriov_init(struct xe_device *xe) if (!IS_SRIOV(xe)) return 0; + if (IS_SRIOV_PF(xe)) { + int err = xe_sriov_pf_init_early(xe); + + if (err) + return err; + } + xe_assert(xe, !xe->sriov.wq); xe->sriov.wq = alloc_workqueue("xe-sriov-wq", 0, 0); if (!xe->sriov.wq) @@ -85,3 +108,34 @@ int xe_sriov_init(struct xe_device *xe) return drmm_add_action_or_reset(&xe->drm, fini_sriov, xe); } + +/** + * xe_sriov_print_info - Print basic SR-IOV information. + * @xe: the &xe_device to print info from + * @p: the &drm_printer + * + * Print SR-IOV related information into provided DRM printer. + */ +void xe_sriov_print_info(struct xe_device *xe, struct drm_printer *p) +{ + drm_printf(p, "supported: %s\n", str_yes_no(xe_device_has_sriov(xe))); + drm_printf(p, "enabled: %s\n", str_yes_no(IS_SRIOV(xe))); + drm_printf(p, "mode: %s\n", xe_sriov_mode_to_string(xe_device_sriov_mode(xe))); +} + +/** + * xe_sriov_function_name() - Get SR-IOV Function name. + * @n: the Function number (identifier) to get name of + * @buf: the buffer to format to + * @size: size of the buffer (shall be at least 5 bytes) + * + * Return: formatted function name ("PF" or "VF%u"). + */ +const char *xe_sriov_function_name(unsigned int n, char *buf, size_t size) +{ + if (n) + snprintf(buf, size, "VF%u", n); + else + strscpy(buf, "PF", size); + return buf; +} diff --git a/drivers/gpu/drm/xe/xe_sriov.h b/drivers/gpu/drm/xe/xe_sriov.h index 1545552162..486bb21c32 100644 --- a/drivers/gpu/drm/xe/xe_sriov.h +++ b/drivers/gpu/drm/xe/xe_sriov.h @@ -10,9 +10,13 @@ #include "xe_device_types.h" #include "xe_sriov_types.h" +struct drm_printer; + const char *xe_sriov_mode_to_string(enum xe_sriov_mode mode); +const char *xe_sriov_function_name(unsigned int n, char *buf, size_t len); -void xe_sriov_probe_early(struct xe_device *xe, bool has_sriov); +void xe_sriov_probe_early(struct xe_device *xe); +void xe_sriov_print_info(struct xe_device *xe, struct drm_printer *p); int xe_sriov_init(struct xe_device *xe); static inline enum xe_sriov_mode xe_device_sriov_mode(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.c b/drivers/gpu/drm/xe/xe_sriov_pf.c new file mode 100644 index 0000000000..0f721ae17b --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#include <drm/drm_managed.h> + +#include "xe_assert.h" +#include "xe_device.h" +#include "xe_module.h" +#include "xe_sriov.h" +#include "xe_sriov_pf.h" +#include "xe_sriov_printk.h" + +static unsigned int wanted_max_vfs(struct xe_device *xe) +{ + return xe_modparam.max_vfs; +} + +static int pf_reduce_totalvfs(struct xe_device *xe, int limit) +{ + struct device *dev = xe->drm.dev; + struct pci_dev *pdev = to_pci_dev(dev); + int err; + + err = pci_sriov_set_totalvfs(pdev, limit); + if (err) + xe_sriov_notice(xe, "Failed to set number of VFs to %d (%pe)\n", + limit, ERR_PTR(err)); + return err; +} + +static bool pf_continue_as_native(struct xe_device *xe, const char *why) +{ + xe_sriov_dbg(xe, "%s, continuing as native\n", why); + pf_reduce_totalvfs(xe, 0); + return false; +} + +/** + * xe_sriov_pf_readiness - Check if PF functionality can be enabled. + * @xe: the &xe_device to check + * + * This function is called as part of the SR-IOV probe to validate if all + * PF prerequisites are satisfied and we can continue with enabling PF mode. + * + * Return: true if the PF mode can be turned on. + */ +bool xe_sriov_pf_readiness(struct xe_device *xe) +{ + struct device *dev = xe->drm.dev; + struct pci_dev *pdev = to_pci_dev(dev); + int totalvfs = pci_sriov_get_totalvfs(pdev); + int newlimit = min_t(u16, wanted_max_vfs(xe), totalvfs); + + xe_assert(xe, totalvfs <= U16_MAX); + + if (!dev_is_pf(dev)) + return false; + + if (!xe_device_uc_enabled(xe)) + return pf_continue_as_native(xe, "Guc submission disabled"); + + if (!newlimit) + return pf_continue_as_native(xe, "all VFs disabled"); + + pf_reduce_totalvfs(xe, newlimit); + + xe->sriov.pf.device_total_vfs = totalvfs; + xe->sriov.pf.driver_max_vfs = newlimit; + + return true; +} + +/** + * xe_sriov_pf_init_early - Initialize SR-IOV PF specific data. + * @xe: the &xe_device to initialize + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_init_early(struct xe_device *xe) +{ + xe_assert(xe, IS_SRIOV_PF(xe)); + + return drmm_mutex_init(&xe->drm, &xe->sriov.pf.master_lock); +} + +/** + * xe_sriov_pf_print_vfs_summary - Print SR-IOV PF information. + * @xe: the &xe_device to print info from + * @p: the &drm_printer + * + * Print SR-IOV PF related information into provided DRM printer. + */ +void xe_sriov_pf_print_vfs_summary(struct xe_device *xe, struct drm_printer *p) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + + xe_assert(xe, IS_SRIOV_PF(xe)); + + drm_printf(p, "total: %u\n", xe->sriov.pf.device_total_vfs); + drm_printf(p, "supported: %u\n", xe->sriov.pf.driver_max_vfs); + drm_printf(p, "enabled: %u\n", pci_num_vf(pdev)); +} diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.h b/drivers/gpu/drm/xe/xe_sriov_pf.h new file mode 100644 index 0000000000..d1220e70e1 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_SRIOV_PF_H_ +#define _XE_SRIOV_PF_H_ + +#include <linux/types.h> + +struct drm_printer; +struct xe_device; + +#ifdef CONFIG_PCI_IOV +bool xe_sriov_pf_readiness(struct xe_device *xe); +int xe_sriov_pf_init_early(struct xe_device *xe); +void xe_sriov_pf_print_vfs_summary(struct xe_device *xe, struct drm_printer *p); +#else +static inline bool xe_sriov_pf_readiness(struct xe_device *xe) +{ + return false; +} + +static inline int xe_sriov_pf_init_early(struct xe_device *xe) +{ + return 0; +} +#endif + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h b/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h new file mode 100644 index 0000000000..7d156ba824 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_SRIOV_PF_HELPERS_H_ +#define _XE_SRIOV_PF_HELPERS_H_ + +#include "xe_assert.h" +#include "xe_device_types.h" +#include "xe_sriov.h" +#include "xe_sriov_types.h" + +/** + * xe_sriov_pf_assert_vfid() - warn if &id is not a supported VF number when debugging. + * @xe: the PF &xe_device to assert on + * @vfid: the VF number to assert + * + * Assert that &xe represents the Physical Function (PF) device and provided &vfid + * is within a range of supported VF numbers (up to maximum number of VFs that + * driver can support, including VF0 that represents the PF itself). + * + * Note: Effective only on debug builds. See `Xe ASSERTs`_ for more information. + */ +#define xe_sriov_pf_assert_vfid(xe, vfid) \ + xe_assert((xe), (vfid) <= xe_sriov_pf_get_totalvfs(xe)) + +/** + * xe_sriov_pf_get_totalvfs() - Get maximum number of VFs that driver can support. + * @xe: the &xe_device to query (shall be PF) + * + * Return: Maximum number of VFs that this PF driver supports. + */ +static inline int xe_sriov_pf_get_totalvfs(struct xe_device *xe) +{ + xe_assert(xe, IS_SRIOV_PF(xe)); + return xe->sriov.pf.driver_max_vfs; +} + +static inline struct mutex *xe_sriov_pf_master_mutex(struct xe_device *xe) +{ + xe_assert(xe, IS_SRIOV_PF(xe)); + return &xe->sriov.pf.master_lock; +} + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_types.h b/drivers/gpu/drm/xe/xe_sriov_types.h index 1a138108d1..c7b7ad4af5 100644 --- a/drivers/gpu/drm/xe/xe_sriov_types.h +++ b/drivers/gpu/drm/xe/xe_sriov_types.h @@ -7,6 +7,8 @@ #define _XE_SRIOV_TYPES_H_ #include <linux/build_bug.h> +#include <linux/mutex.h> +#include <linux/types.h> /** * VFID - Virtual Function Identifier @@ -37,4 +39,21 @@ enum xe_sriov_mode { }; static_assert(XE_SRIOV_MODE_NONE); +/** + * struct xe_device_pf - Xe PF related data + * + * The data in this structure is valid only if driver is running in the + * @XE_SRIOV_MODE_PF mode. + */ +struct xe_device_pf { + /** @device_total_vfs: Maximum number of VFs supported by the device. */ + u16 device_total_vfs; + + /** @driver_max_vfs: Maximum number of VFs supported by the driver. */ + u16 driver_max_vfs; + + /** @master_lock: protects all VFs configurations across GTs */ + struct mutex master_lock; +}; + #endif diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index 02c9577fe4..65f1f16282 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -224,8 +224,7 @@ int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job) return 0; } -void xe_sync_entry_signal(struct xe_sync_entry *sync, struct xe_sched_job *job, - struct dma_fence *fence) +void xe_sync_entry_signal(struct xe_sync_entry *sync, struct dma_fence *fence) { if (!(sync->flags & DRM_XE_SYNC_FLAG_SIGNAL)) return; @@ -254,10 +253,6 @@ void xe_sync_entry_signal(struct xe_sync_entry *sync, struct xe_sched_job *job, user_fence_put(sync->ufence); dma_fence_put(fence); } - } else if (sync->type == DRM_XE_SYNC_TYPE_USER_FENCE) { - job->user_fence.used = true; - job->user_fence.addr = sync->addr; - job->user_fence.value = sync->timeline_value; } } diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h index 0fd0d51208..3e03396af2 100644 --- a/drivers/gpu/drm/xe/xe_sync.h +++ b/drivers/gpu/drm/xe/xe_sync.h @@ -26,7 +26,6 @@ int xe_sync_entry_wait(struct xe_sync_entry *sync); int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job); void xe_sync_entry_signal(struct xe_sync_entry *sync, - struct xe_sched_job *job, struct dma_fence *fence); void xe_sync_entry_cleanup(struct xe_sync_entry *sync); struct dma_fence * diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index 0650b2fa75..15ea0a942f 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -160,24 +160,19 @@ int xe_tile_init_noalloc(struct xe_tile *tile) { int err; - xe_device_mem_access_get(tile_to_xe(tile)); - err = tile_ttm_mgr_init(tile); if (err) - goto err_mem_access; + return err; tile->mem.kernel_bb_pool = xe_sa_bo_manager_init(tile, SZ_1M, 16); - if (IS_ERR(tile->mem.kernel_bb_pool)) { - err = PTR_ERR(tile->mem.kernel_bb_pool); - goto err_mem_access; - } + if (IS_ERR(tile->mem.kernel_bb_pool)) + return PTR_ERR(tile->mem.kernel_bb_pool); + xe_wa_apply_tile_workarounds(tile); - xe_tile_sysfs_init(tile); + err = xe_tile_sysfs_init(tile); -err_mem_access: - xe_device_mem_access_put(tile_to_xe(tile)); - return err; + return 0; } void xe_tile_migrate_wait(struct xe_tile *tile) diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs.c b/drivers/gpu/drm/xe/xe_tile_sysfs.c index 0662968d7b..64661403af 100644 --- a/drivers/gpu/drm/xe/xe_tile_sysfs.c +++ b/drivers/gpu/drm/xe/xe_tile_sysfs.c @@ -7,6 +7,7 @@ #include <linux/sysfs.h> #include <drm/drm_managed.h> +#include "xe_pm.h" #include "xe_tile.h" #include "xe_tile_sysfs.h" #include "xe_vram_freq.h" @@ -28,7 +29,7 @@ static void tile_sysfs_fini(struct drm_device *drm, void *arg) kobject_put(tile->sysfs); } -void xe_tile_sysfs_init(struct xe_tile *tile) +int xe_tile_sysfs_init(struct xe_tile *tile) { struct xe_device *xe = tile_to_xe(tile); struct device *dev = xe->drm.dev; @@ -37,7 +38,7 @@ void xe_tile_sysfs_init(struct xe_tile *tile) kt = kzalloc(sizeof(*kt), GFP_KERNEL); if (!kt) - return; + return -ENOMEM; kobject_init(&kt->base, &xe_tile_sysfs_kobj_type); kt->tile = tile; @@ -45,16 +46,14 @@ void xe_tile_sysfs_init(struct xe_tile *tile) err = kobject_add(&kt->base, &dev->kobj, "tile%d", tile->id); if (err) { kobject_put(&kt->base); - drm_warn(&xe->drm, "failed to register TILE sysfs directory, err: %d\n", err); - return; + return err; } tile->sysfs = &kt->base; - xe_vram_freq_sysfs_init(tile); - - err = drmm_add_action_or_reset(&xe->drm, tile_sysfs_fini, tile); + err = xe_vram_freq_sysfs_init(tile); if (err) - drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n", - __func__, err); + return err; + + return drmm_add_action_or_reset(&xe->drm, tile_sysfs_fini, tile); } diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs.h b/drivers/gpu/drm/xe/xe_tile_sysfs.h index e4f065039e..54a2ba8ba5 100644 --- a/drivers/gpu/drm/xe/xe_tile_sysfs.h +++ b/drivers/gpu/drm/xe/xe_tile_sysfs.h @@ -8,7 +8,7 @@ #include "xe_tile_sysfs_types.h" -void xe_tile_sysfs_init(struct xe_tile *tile); +int xe_tile_sysfs_init(struct xe_tile *tile); static inline struct xe_tile * kobj_to_tile(struct kobject *kobj) diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index 846f14507d..2d56cfc09e 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -258,7 +258,7 @@ DECLARE_EVENT_CLASS(xe_sched_job, __field(u32, guc_state) __field(u32, flags) __field(int, error) - __field(u64, fence) + __field(struct dma_fence *, fence) __field(u64, batch_addr) ), @@ -269,11 +269,11 @@ DECLARE_EVENT_CLASS(xe_sched_job, atomic_read(&job->q->guc->state); __entry->flags = job->q->flags; __entry->error = job->fence->error; - __entry->fence = (unsigned long)job->fence; + __entry->fence = job->fence; __entry->batch_addr = (u64)job->batch_addr[0]; ), - TP_printk("fence=0x%016llx, seqno=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d", + TP_printk("fence=%p, seqno=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d", __entry->fence, __entry->seqno, __entry->guc_id, __entry->batch_addr, __entry->guc_state, __entry->flags, __entry->error) diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index fb35e46d68..f773673297 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -204,7 +204,7 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe) { struct xe_ttm_stolen_mgr *mgr = drmm_kzalloc(&xe->drm, sizeof(*mgr), GFP_KERNEL); struct pci_dev *pdev = to_pci_dev(xe->drm.dev); - u64 stolen_size, io_size, pgsize; + u64 stolen_size, io_size; int err; if (!mgr) { @@ -226,10 +226,6 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe) return; } - pgsize = xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K; - if (pgsize < PAGE_SIZE) - pgsize = PAGE_SIZE; - /* * We don't try to attempt partial visible support for stolen vram, * since stolen is always at the end of vram, and the BAR size is pretty @@ -240,7 +236,7 @@ void xe_ttm_stolen_mgr_init(struct xe_device *xe) io_size = stolen_size; err = __xe_ttm_vram_mgr_init(xe, &mgr->base, XE_PL_STOLEN, stolen_size, - io_size, pgsize); + io_size, PAGE_SIZE); if (err) { drm_dbg_kms(&xe->drm, "Stolen mgr init failed: %i\n", err); return; @@ -303,7 +299,7 @@ static int __xe_ttm_stolen_io_mem_reserve_stolen(struct xe_device *xe, XE_WARN_ON(IS_DGFX(xe)); /* XXX: Require BO to be mapped to GGTT? */ - if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_CREATE_GGTT_BIT))) + if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_FLAG_GGTT))) return -EIO; /* GGTT is always contiguously mapped */ diff --git a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c index 3e1fa0c832..9844a8edbf 100644 --- a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c @@ -73,7 +73,10 @@ static void xe_ttm_sys_mgr_del(struct ttm_resource_manager *man, static void xe_ttm_sys_mgr_debug(struct ttm_resource_manager *man, struct drm_printer *printer) { - + /* + * This function is called by debugfs entry and would require + * pm_runtime_{get,put} wrappers around any operation. + */ } static const struct ttm_resource_manager_func xe_ttm_sys_mgr_func = { diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index 0678faf832..fe3779fdba 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -196,7 +196,7 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, return 0; error_free_blocks: - drm_buddy_free_list(mm, &vres->blocks); + drm_buddy_free_list(mm, &vres->blocks, 0); mutex_unlock(&mgr->lock); error_fini: ttm_resource_fini(man, &vres->base); @@ -214,7 +214,7 @@ static void xe_ttm_vram_mgr_del(struct ttm_resource_manager *man, struct drm_buddy *mm = &mgr->mm; mutex_lock(&mgr->lock); - drm_buddy_free_list(mm, &vres->blocks); + drm_buddy_free_list(mm, &vres->blocks, 0); mgr->visible_avail += vres->used_visible_size; mutex_unlock(&mgr->lock); @@ -478,3 +478,15 @@ void xe_ttm_vram_get_used(struct ttm_resource_manager *man, *used_visible = mgr->visible_size - mgr->visible_avail; mutex_unlock(&mgr->lock); } + +u64 xe_ttm_vram_get_avail(struct ttm_resource_manager *man) +{ + struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man); + u64 avail; + + mutex_lock(&mgr->lock); + avail = mgr->mm.avail; + mutex_unlock(&mgr->lock); + + return avail; +} diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h index d184e19a92..cc76050e37 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h @@ -25,6 +25,7 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe, void xe_ttm_vram_mgr_free_sgt(struct device *dev, enum dma_data_direction dir, struct sg_table *sgt); +u64 xe_ttm_vram_get_avail(struct ttm_resource_manager *man); u64 xe_ttm_vram_get_cpu_visible_size(struct ttm_resource_manager *man); void xe_ttm_vram_get_used(struct ttm_resource_manager *man, u64 *used, u64 *used_visible); diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index 5c83c75bc4..d4e6fa9189 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -28,7 +28,7 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { /* Xe2 */ { XE_RTP_NAME("Tuning: L3 cache"), - XE_RTP_RULES(GRAPHICS_VERSION(2004)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) }, @@ -38,11 +38,11 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) }, { XE_RTP_NAME("Tuning: Compression Overfetch"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2004, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), XE_RTP_ACTIONS(CLR(CCCHKNREG1, ENCOMPPERFFIX)), }, { XE_RTP_NAME("Tuning: Enable compressible partial write overfetch in L3"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2004, XE_RTP_END_VERSION_UNDEFINED)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)), XE_RTP_ACTIONS(SET(L3SQCREG3, COMPPWOVERFETCHEN)) }, {} @@ -50,7 +50,7 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { static const struct xe_rtp_entry_sr engine_tunings[] = { { XE_RTP_NAME("Tuning: Set Indirect State Override"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1271), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1200, 1274), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(SAMPLER_MODE, INDIRECT_STATE_BASE_ADDR_OVERRIDE)) }, @@ -88,7 +88,7 @@ static const struct xe_rtp_entry_sr lrc_tunings[] = { /* Xe_LPG */ { XE_RTP_NAME("Tuning: L3 cache"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271), ENGINE_CLASS(RENDER)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(FIELD_SET(XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f))) }, diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index 7033f8c1b4..4feb35c95a 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -32,11 +32,8 @@ uc_to_xe(struct xe_uc *uc) /* Should be called once at driver load only */ int xe_uc_init(struct xe_uc *uc) { - struct xe_device *xe = uc_to_xe(uc); int ret; - xe_device_mem_access_get(xe); - /* * We call the GuC/HuC/GSC init functions even if GuC submission is off * to correctly move our tracking of the FW state to "disabled". @@ -65,16 +62,8 @@ int xe_uc_init(struct xe_uc *uc) goto err; ret = xe_guc_db_mgr_init(&uc->guc.dbm, ~0); - if (ret) - goto err; - - xe_device_mem_access_put(xe); - - return 0; err: - xe_device_mem_access_put(xe); - return ret; } diff --git a/drivers/gpu/drm/xe/xe_uc_debugfs.c b/drivers/gpu/drm/xe/xe_uc_debugfs.c index 0a39ec5a6e..78eb8db737 100644 --- a/drivers/gpu/drm/xe/xe_uc_debugfs.c +++ b/drivers/gpu/drm/xe/xe_uc_debugfs.c @@ -3,6 +3,8 @@ * Copyright © 2022 Intel Corporation */ +#include <linux/debugfs.h> + #include <drm/drm_debugfs.h> #include "xe_gt.h" diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index a9d25b3fa6..186f81640c 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -17,6 +17,7 @@ #include "xe_map.h" #include "xe_mmio.h" #include "xe_module.h" +#include "xe_sriov.h" #include "xe_uc_fw.h" /* @@ -296,36 +297,28 @@ static void uc_fw_fini(struct drm_device *drm, void *arg) xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_SELECTED); } -static void guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css) +static int guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css) { struct xe_gt *gt = uc_fw_to_gt(uc_fw); struct xe_uc_fw_version *release = &uc_fw->versions.found[XE_UC_FW_VER_RELEASE]; struct xe_uc_fw_version *compatibility = &uc_fw->versions.found[XE_UC_FW_VER_COMPATIBILITY]; xe_gt_assert(gt, uc_fw->type == XE_UC_FW_TYPE_GUC); - xe_gt_assert(gt, release->major >= 70); - - if (release->major > 70 || release->minor >= 6) { - /* v70.6.0 adds CSS header support */ - compatibility->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, - css->submission_version); - compatibility->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, - css->submission_version); - compatibility->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, - css->submission_version); - } else if (release->minor >= 3) { - /* v70.3.0 introduced v1.1.0 */ - compatibility->major = 1; - compatibility->minor = 1; - compatibility->patch = 0; - } else { - /* v70.0.0 introduced v1.0.0 */ - compatibility->major = 1; - compatibility->minor = 0; - compatibility->patch = 0; + + /* We don't support GuC releases older than 70.19 */ + if (release->major < 70 || (release->major == 70 && release->minor < 19)) { + xe_gt_err(gt, "Unsupported GuC v%u.%u! v70.19 or newer is required\n", + release->major, release->minor); + return -EINVAL; } + compatibility->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css->submission_version); + compatibility->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css->submission_version); + compatibility->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css->submission_version); + uc_fw->private_data_size = css->private_data_size; + + return 0; } int xe_uc_fw_check_version_requirements(struct xe_uc_fw *uc_fw) @@ -424,7 +417,7 @@ static int parse_css_header(struct xe_uc_fw *uc_fw, const void *fw_data, size_t release->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css->sw_version); if (uc_fw->type == XE_UC_FW_TYPE_GUC) - guc_read_css_info(uc_fw, css); + return guc_read_css_info(uc_fw, css); return 0; } @@ -658,7 +651,17 @@ static int uc_fw_request(struct xe_uc_fw *uc_fw, const struct firmware **firmwar xe_assert(xe, !uc_fw->path); uc_fw_auto_select(xe, uc_fw); + + if (IS_SRIOV_VF(xe)) { + /* VF will support only firmwares that driver can autoselect */ + xe_uc_fw_change_status(uc_fw, uc_fw->path ? + XE_UC_FIRMWARE_PRELOADED : + XE_UC_FIRMWARE_NOT_SUPPORTED); + return 0; + } + uc_fw_override(uc_fw); + xe_uc_fw_change_status(uc_fw, uc_fw->path ? XE_UC_FIRMWARE_SELECTED : XE_UC_FIRMWARE_NOT_SUPPORTED); @@ -771,7 +774,8 @@ int xe_uc_fw_init(struct xe_uc_fw *uc_fw) return 0; err = uc_fw_copy(uc_fw, fw->data, fw->size, - XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_GGTT_BIT); + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE); uc_fw_release(fw); @@ -787,7 +791,8 @@ static int uc_fw_xfer(struct xe_uc_fw *uc_fw, u32 offset, u32 dma_flags) { struct xe_device *xe = uc_fw_to_xe(uc_fw); struct xe_gt *gt = uc_fw_to_gt(uc_fw); - u32 src_offset, dma_ctrl; + u64 src_offset; + u32 dma_ctrl; int ret; xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT); diff --git a/drivers/gpu/drm/xe/xe_uc_fw.h b/drivers/gpu/drm/xe/xe_uc_fw.h index 85c20795d1..3507803879 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.h +++ b/drivers/gpu/drm/xe/xe_uc_fw.h @@ -59,6 +59,8 @@ const char *xe_uc_fw_status_repr(enum xe_uc_fw_status status) return "TRANSFERRED"; case XE_UC_FIRMWARE_RUNNING: return "RUNNING"; + case XE_UC_FIRMWARE_PRELOADED: + return "PRELOADED"; } return "<invalid>"; } @@ -85,6 +87,7 @@ static inline int xe_uc_fw_status_to_error(enum xe_uc_fw_status status) case XE_UC_FIRMWARE_LOADABLE: case XE_UC_FIRMWARE_TRANSFERRED: case XE_UC_FIRMWARE_RUNNING: + case XE_UC_FIRMWARE_PRELOADED: return 0; } return -EINVAL; @@ -134,7 +137,8 @@ static inline bool xe_uc_fw_is_available(struct xe_uc_fw *uc_fw) static inline bool xe_uc_fw_is_loadable(struct xe_uc_fw *uc_fw) { - return __xe_uc_fw_status(uc_fw) >= XE_UC_FIRMWARE_LOADABLE; + return __xe_uc_fw_status(uc_fw) >= XE_UC_FIRMWARE_LOADABLE && + __xe_uc_fw_status(uc_fw) != XE_UC_FIRMWARE_PRELOADED; } static inline bool xe_uc_fw_is_loaded(struct xe_uc_fw *uc_fw) @@ -144,7 +148,7 @@ static inline bool xe_uc_fw_is_loaded(struct xe_uc_fw *uc_fw) static inline bool xe_uc_fw_is_running(struct xe_uc_fw *uc_fw) { - return __xe_uc_fw_status(uc_fw) == XE_UC_FIRMWARE_RUNNING; + return __xe_uc_fw_status(uc_fw) >= XE_UC_FIRMWARE_RUNNING; } static inline bool xe_uc_fw_is_overridden(const struct xe_uc_fw *uc_fw) diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h index bc800b6968..0d8caa0e73 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw_types.h +++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h @@ -50,7 +50,8 @@ enum xe_uc_fw_status { XE_UC_FIRMWARE_LOADABLE, /* all fw-required objects are ready */ XE_UC_FIRMWARE_LOAD_FAIL, /* failed to xfer or init/auth the fw */ XE_UC_FIRMWARE_TRANSFERRED, /* dma xfer done */ - XE_UC_FIRMWARE_RUNNING /* init/auth done */ + XE_UC_FIRMWARE_RUNNING, /* init/auth done */ + XE_UC_FIRMWARE_PRELOADED, /* preloaded by the PF driver */ }; enum xe_uc_fw_type { diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 32cd0c978a..4aa3943e6f 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -21,12 +21,12 @@ #include <generated/xe_wa_oob.h> +#include "regs/xe_gtt_defs.h" #include "xe_assert.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_drm_client.h" #include "xe_exec_queue.h" -#include "xe_gt.h" #include "xe_gt_pagefault.h" #include "xe_gt_tlb_invalidation.h" #include "xe_migrate.h" @@ -38,6 +38,7 @@ #include "xe_sync.h" #include "xe_trace.h" #include "xe_wa.h" +#include "xe_hmm.h" static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) { @@ -65,113 +66,14 @@ int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma) int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) { - struct xe_userptr *userptr = &uvma->userptr; struct xe_vma *vma = &uvma->vma; struct xe_vm *vm = xe_vma_vm(vma); struct xe_device *xe = vm->xe; - const unsigned long num_pages = xe_vma_size(vma) >> PAGE_SHIFT; - struct page **pages; - bool in_kthread = !current->mm; - unsigned long notifier_seq; - int pinned, ret, i; - bool read_only = xe_vma_read_only(vma); lockdep_assert_held(&vm->lock); xe_assert(xe, xe_vma_is_userptr(vma)); -retry: - if (vma->gpuva.flags & XE_VMA_DESTROYED) - return 0; - - notifier_seq = mmu_interval_read_begin(&userptr->notifier); - if (notifier_seq == userptr->notifier_seq) - return 0; - - pages = kvmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL); - if (!pages) - return -ENOMEM; - - if (userptr->sg) { - dma_unmap_sgtable(xe->drm.dev, - userptr->sg, - read_only ? DMA_TO_DEVICE : - DMA_BIDIRECTIONAL, 0); - sg_free_table(userptr->sg); - userptr->sg = NULL; - } - - pinned = ret = 0; - if (in_kthread) { - if (!mmget_not_zero(userptr->notifier.mm)) { - ret = -EFAULT; - goto mm_closed; - } - kthread_use_mm(userptr->notifier.mm); - } - - while (pinned < num_pages) { - ret = get_user_pages_fast(xe_vma_userptr(vma) + - pinned * PAGE_SIZE, - num_pages - pinned, - read_only ? 0 : FOLL_WRITE, - &pages[pinned]); - if (ret < 0) - break; - - pinned += ret; - ret = 0; - } - - if (in_kthread) { - kthread_unuse_mm(userptr->notifier.mm); - mmput(userptr->notifier.mm); - } -mm_closed: - if (ret) - goto out; - - ret = sg_alloc_table_from_pages_segment(&userptr->sgt, pages, - pinned, 0, - (u64)pinned << PAGE_SHIFT, - xe_sg_segment_size(xe->drm.dev), - GFP_KERNEL); - if (ret) { - userptr->sg = NULL; - goto out; - } - userptr->sg = &userptr->sgt; - - ret = dma_map_sgtable(xe->drm.dev, userptr->sg, - read_only ? DMA_TO_DEVICE : - DMA_BIDIRECTIONAL, - DMA_ATTR_SKIP_CPU_SYNC | - DMA_ATTR_NO_KERNEL_MAPPING); - if (ret) { - sg_free_table(userptr->sg); - userptr->sg = NULL; - goto out; - } - - for (i = 0; i < pinned; ++i) { - if (!read_only) { - lock_page(pages[i]); - set_page_dirty(pages[i]); - unlock_page(pages[i]); - } - - mark_page_accessed(pages[i]); - } - -out: - release_pages(pages, pinned); - kvfree(pages); - - if (!(ret < 0)) { - userptr->notifier_seq = notifier_seq; - if (xe_vma_userptr_check_repin(uvma) == -EAGAIN) - goto retry; - } - return ret < 0 ? ret : 0; + return xe_hmm_userptr_populate_range(uvma, false); } static bool preempt_fences_waiting(struct xe_vm *vm) @@ -682,6 +584,10 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, if (!mmu_notifier_range_blockable(range)) return false; + vm_dbg(&xe_vma_vm(vma)->xe->drm, + "NOTIFIER: addr=0x%016llx, range=0x%016llx", + xe_vma_start(vma), xe_vma_size(vma)); + down_write(&vm->userptr.notifier_lock); mmu_interval_set_seq(mni, cur_seq); @@ -951,8 +857,6 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, static void xe_vma_destroy_late(struct xe_vma *vma) { struct xe_vm *vm = xe_vma_vm(vma); - struct xe_device *xe = vm->xe; - bool read_only = xe_vma_read_only(vma); if (vma->ufence) { xe_sync_ufence_put(vma->ufence); @@ -960,16 +864,11 @@ static void xe_vma_destroy_late(struct xe_vma *vma) } if (xe_vma_is_userptr(vma)) { - struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr; + struct xe_userptr_vma *uvma = to_userptr_vma(vma); + struct xe_userptr *userptr = &uvma->userptr; - if (userptr->sg) { - dma_unmap_sgtable(xe->drm.dev, - userptr->sg, - read_only ? DMA_TO_DEVICE : - DMA_BIDIRECTIONAL, 0); - sg_free_table(userptr->sg); - userptr->sg = NULL; - } + if (userptr->sg) + xe_hmm_userptr_free_sg(uvma); /* * Since userptr pages are not pinned, we can't remove @@ -1274,8 +1173,6 @@ static const struct xe_pt_ops xelp_pt_ops = { .pde_encode_bo = xelp_pde_encode_bo, }; -static void vm_destroy_work_func(struct work_struct *w); - /** * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the * given tile and vm. @@ -1355,8 +1252,6 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) init_rwsem(&vm->userptr.notifier_lock); spin_lock_init(&vm->userptr.invalidated_lock); - INIT_WORK(&vm->destroy_work, vm_destroy_work_func); - INIT_LIST_HEAD(&vm->preempt.exec_queues); vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */ @@ -1366,7 +1261,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) vm->pt_ops = &xelp_pt_ops; if (!(flags & XE_VM_FLAG_MIGRATION)) - xe_device_mem_access_get(xe); + xe_pm_runtime_get_noresume(xe); vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); if (!vm_resv_obj) { @@ -1411,9 +1306,8 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) vm->batch_invalidate_tlb = true; } - if (flags & XE_VM_FLAG_LR_MODE) { + if (vm->flags & XE_VM_FLAG_LR_MODE) { INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); - vm->flags |= XE_VM_FLAG_LR_MODE; vm->batch_invalidate_tlb = false; } @@ -1477,7 +1371,7 @@ err_no_resv: xe_range_fence_tree_fini(&vm->rftree[id]); kfree(vm); if (!(flags & XE_VM_FLAG_MIGRATION)) - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); return ERR_PTR(err); } @@ -1595,10 +1489,9 @@ void xe_vm_close_and_put(struct xe_vm *vm) xe_vm_put(vm); } -static void vm_destroy_work_func(struct work_struct *w) +static void xe_vm_free(struct drm_gpuvm *gpuvm) { - struct xe_vm *vm = - container_of(w, struct xe_vm, destroy_work); + struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm); struct xe_device *xe = vm->xe; struct xe_tile *tile; u8 id; @@ -1612,7 +1505,7 @@ static void vm_destroy_work_func(struct work_struct *w) mutex_destroy(&vm->snap_mutex); if (!(vm->flags & XE_VM_FLAG_MIGRATION)) - xe_device_mem_access_put(xe); + xe_pm_runtime_put(xe); for_each_tile(tile, xe, id) XE_WARN_ON(vm->pt_root[id]); @@ -1621,14 +1514,6 @@ static void vm_destroy_work_func(struct work_struct *w) kfree(vm); } -static void xe_vm_free(struct drm_gpuvm *gpuvm) -{ - struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm); - - /* To destroy the VM we need to be able to sleep */ - queue_work(system_unbound_wq, &vm->destroy_work); -} - struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) { struct xe_vm *vm; @@ -1725,7 +1610,7 @@ next: xe_exec_queue_last_fence_get(wait_exec_queue, vm) : fence; if (last_op) { for (i = 0; i < num_syncs; i++) - xe_sync_entry_signal(&syncs[i], NULL, fence); + xe_sync_entry_signal(&syncs[i], fence); } return fence; @@ -1799,7 +1684,7 @@ next: if (last_op) { for (i = 0; i < num_syncs; i++) - xe_sync_entry_signal(&syncs[i], NULL, + xe_sync_entry_signal(&syncs[i], cf ? &cf->base : fence); } @@ -1860,7 +1745,7 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, fence = xe_exec_queue_last_fence_get(wait_exec_queue, vm); if (last_op) { for (i = 0; i < num_syncs; i++) - xe_sync_entry_signal(&syncs[i], NULL, fence); + xe_sync_entry_signal(&syncs[i], fence); } } @@ -2061,7 +1946,7 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q); int err; - xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type)); + xe_assert(vm->xe, region < ARRAY_SIZE(region_to_mem_type)); if (!xe_vma_has_no_bo(vma)) { err = xe_bo_migrate(xe_vma_bo(vma), region_to_mem_type[region]); @@ -2081,7 +1966,7 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, struct dma_fence *fence = xe_exec_queue_last_fence_get(wait_exec_queue, vm); - xe_sync_entry_signal(&syncs[i], NULL, fence); + xe_sync_entry_signal(&syncs[i], fence); dma_fence_put(fence); } } @@ -2213,6 +2098,10 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, struct xe_vma_op *op = gpuva_op_to_vma_op(__op); if (__op->op == DRM_GPUVA_OP_MAP) { + op->map.immediate = + flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; + op->map.read_only = + flags & DRM_XE_VM_BIND_FLAG_READONLY; op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE; op->map.pat_index = pat_index; @@ -2407,6 +2296,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, switch (op->base.op) { case DRM_GPUVA_OP_MAP: { + flags |= op->map.read_only ? + VMA_CREATE_FLAG_READ_ONLY : 0; flags |= op->map.is_null ? VMA_CREATE_FLAG_IS_NULL : 0; flags |= op->map.dumpable ? @@ -2551,7 +2442,7 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm, case DRM_GPUVA_OP_MAP: err = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma), op->syncs, op->num_syncs, - !xe_vm_in_fault_mode(vm), + op->map.immediate || !xe_vm_in_fault_mode(vm), op->flags & XE_VMA_OP_FIRST, op->flags & XE_VMA_OP_LAST); break; @@ -2826,7 +2717,10 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, return 0; } -#define SUPPORTED_FLAGS (DRM_XE_VM_BIND_FLAG_NULL | \ +#define SUPPORTED_FLAGS \ + (DRM_XE_VM_BIND_FLAG_READONLY | \ + DRM_XE_VM_BIND_FLAG_IMMEDIATE | \ + DRM_XE_VM_BIND_FLAG_NULL | \ DRM_XE_VM_BIND_FLAG_DUMPABLE) #define XE_64K_PAGE_MASK 0xffffull #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) @@ -2959,7 +2853,7 @@ static int vm_bind_ioctl_signal_fences(struct xe_vm *vm, return PTR_ERR(fence); for (i = 0; i < num_syncs; i++) - xe_sync_entry_signal(&syncs[i], NULL, fence); + xe_sync_entry_signal(&syncs[i], fence); xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm, fence); @@ -3070,7 +2964,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto put_obj; } - if (bos[i]->flags & XE_BO_INTERNAL_64K) { + if (bos[i]->flags & XE_BO_FLAG_INTERNAL_64K) { if (XE_IOCTL_DBG(xe, obj_offset & XE_64K_PAGE_MASK) || XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || @@ -3262,6 +3156,10 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) xe_assert(xe, !xe_vma_is_null(vma)); trace_xe_vma_invalidate(vma); + vm_dbg(&xe_vma_vm(vma)->xe->drm, + "INVALIDATE: addr=0x%016llx, range=0x%016llx", + xe_vma_start(vma), xe_vma_size(vma)); + /* Check that we don't race with page-table updates */ if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { if (xe_vma_is_userptr(vma)) { @@ -3380,8 +3278,10 @@ struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm) if (num_snaps) snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT); - if (!snap) + if (!snap) { + snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV); goto out_unlock; + } snap->num_snaps = num_snaps; i = 0; @@ -3421,6 +3321,9 @@ out_unlock: void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) { + if (IS_ERR_OR_NULL(snap)) + return; + for (int i = 0; i < snap->num_snaps; i++) { struct xe_bo *bo = snap->snap[i].bo; struct iosys_map src; @@ -3475,13 +3378,21 @@ void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p) { unsigned long i, j; - for (i = 0; i < snap->num_snaps; i++) { - if (IS_ERR(snap->snap[i].data)) - goto uncaptured; + if (IS_ERR_OR_NULL(snap)) { + drm_printf(p, "[0].error: %li\n", PTR_ERR(snap)); + return; + } + for (i = 0; i < snap->num_snaps; i++) { drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len); - drm_printf(p, "[%llx].data: ", - snap->snap[i].ofs); + + if (IS_ERR(snap->snap[i].data)) { + drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs, + PTR_ERR(snap->snap[i].data)); + continue; + } + + drm_printf(p, "[%llx].data: ", snap->snap[i].ofs); for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) { u32 *val = snap->snap[i].data + j; @@ -3491,12 +3402,6 @@ void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p) } drm_puts(p, "\n"); - continue; - -uncaptured: - drm_printf(p, "Unable to capture range [%llx-%llx]: %li\n", - snap->snap[i].ofs, snap->snap[i].ofs + snap->snap[i].len - 1, - PTR_ERR(snap->snap[i].data)); } } @@ -3504,7 +3409,7 @@ void xe_vm_snapshot_free(struct xe_vm_snapshot *snap) { unsigned long i; - if (!snap) + if (IS_ERR_OR_NULL(snap)) return; for (i = 0; i < snap->num_snaps; i++) { diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index badf394508..72a100671e 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -178,13 +178,6 @@ struct xe_vm { struct list_head rebind_list; /** - * @destroy_work: worker to destroy VM, needed as a dma_fence signaling - * from an irq context can be last put and the destroy needs to be able - * to sleep. - */ - struct work_struct destroy_work; - - /** * @rftree: range fence tree to track updates to page table structure. * Used to implement conflict tracking between independent bind engines. */ @@ -276,6 +269,10 @@ struct xe_vm { struct xe_vma_op_map { /** @vma: VMA to map */ struct xe_vma *vma; + /** @immediate: Immediate bind */ + bool immediate; + /** @read_only: Read only */ + bool read_only; /** @is_null: is NULL binding */ bool is_null; /** @dumpable: whether BO is dumped on GPU hang */ diff --git a/drivers/gpu/drm/xe/xe_vram_freq.c b/drivers/gpu/drm/xe/xe_vram_freq.c index c5f6b5a5d1..3e21ddc6e6 100644 --- a/drivers/gpu/drm/xe/xe_vram_freq.c +++ b/drivers/gpu/drm/xe/xe_vram_freq.c @@ -100,31 +100,27 @@ static void vram_freq_sysfs_fini(struct drm_device *drm, void *arg) * @tile: Xe Tile object * * It needs to be initialized after the main tile component is ready + * + * Returns: 0 on success, negative error code on error. */ -void xe_vram_freq_sysfs_init(struct xe_tile *tile) +int xe_vram_freq_sysfs_init(struct xe_tile *tile) { struct xe_device *xe = tile_to_xe(tile); struct kobject *kobj; int err; if (xe->info.platform != XE_PVC) - return; + return 0; kobj = kobject_create_and_add("memory", tile->sysfs); - if (!kobj) { - drm_warn(&xe->drm, "failed to add memory directory, err: %d\n", -ENOMEM); - return; - } + if (!kobj) + return -ENOMEM; err = sysfs_create_group(kobj, &freq_group_attrs); if (err) { kobject_put(kobj); - drm_warn(&xe->drm, "failed to register vram freq sysfs, err: %d\n", err); - return; + return err; } - err = drmm_add_action_or_reset(&xe->drm, vram_freq_sysfs_fini, kobj); - if (err) - drm_warn(&xe->drm, "%s: drmm_add_action_or_reset failed, err: %d\n", - __func__, err); + return drmm_add_action_or_reset(&xe->drm, vram_freq_sysfs_fini, kobj); } diff --git a/drivers/gpu/drm/xe/xe_vram_freq.h b/drivers/gpu/drm/xe/xe_vram_freq.h index cbe8c12fbd..bf726bc588 100644 --- a/drivers/gpu/drm/xe/xe_vram_freq.h +++ b/drivers/gpu/drm/xe/xe_vram_freq.h @@ -8,6 +8,6 @@ struct xe_tile; -void xe_vram_freq_sysfs_init(struct xe_tile *tile); +int xe_vram_freq_sysfs_init(struct xe_tile *tile); #endif /* _XE_VRAM_FREQ_H_ */ diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index a0264eedd4..dd214d95e4 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -173,11 +173,11 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_ACTIONS(CLR(MISCCPCTL, DOP_CLOCK_GATE_RENDER_ENABLE)) }, { XE_RTP_NAME("14018575942"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274)), XE_RTP_ACTIONS(SET(COMP_MOD_CTRL, FORCE_MISS_FTLB)) }, { XE_RTP_NAME("22016670082"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274)), XE_RTP_ACTIONS(SET(SQCNT1, ENFORCE_RAR)) }, @@ -228,6 +228,28 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), }, + /* Xe2_HPM */ + + { XE_RTP_NAME("16021867713"), + XE_RTP_RULES(MEDIA_VERSION(1301), + ENGINE_CLASS(VIDEO_DECODE)), + XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)), + XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), + }, + { XE_RTP_NAME("14020316580"), + XE_RTP_RULES(MEDIA_VERSION(1301)), + XE_RTP_ACTIONS(CLR(PG_ENABLE, + VD0_HCP_POWERGATE_ENABLE | + VD0_MFXVDENC_POWERGATE_ENABLE | + VD2_HCP_POWERGATE_ENABLE | + VD2_MFXVDENC_POWERGATE_ENABLE)), + }, + { XE_RTP_NAME("14019449301"), + XE_RTP_RULES(MEDIA_VERSION(1301), ENGINE_CLASS(VIDEO_DECODE)), + XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F08(0), CG3DDISHRS_CLKGATE_DIS)), + XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), + }, + {} }; @@ -328,12 +350,6 @@ static const struct xe_rtp_entry_sr engine_was[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE)) }, - { XE_RTP_NAME("16015675438"), - XE_RTP_RULES(PLATFORM(DG2), - FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN2(RENDER_RING_BASE), - PERF_FIX_BALANCING_CFE_DISABLE)) - }, { XE_RTP_NAME("18028616096"), XE_RTP_RULES(PLATFORM(DG2), FUNC(xe_rtp_match_first_render_or_compute)), @@ -383,10 +399,10 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_RULES(PLATFORM(PVC), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE)) }, - { XE_RTP_NAME("16015675438"), - XE_RTP_RULES(PLATFORM(PVC), FUNC(xe_rtp_match_first_render_or_compute)), - XE_RTP_ACTIONS(SET(FF_SLICE_CS_CHICKEN2(RENDER_RING_BASE), - PERF_FIX_BALANCING_CFE_DISABLE)) + { XE_RTP_NAME("18020744125"), + XE_RTP_RULES(PLATFORM(PVC), FUNC(xe_rtp_match_first_render_or_compute), + ENGINE_CLASS(COMPUTE)), + XE_RTP_ACTIONS(SET(RING_HWSTAM(RENDER_RING_BASE), ~0)) }, { XE_RTP_NAME("14014999345"), XE_RTP_RULES(PLATFORM(PVC), ENGINE_CLASS(COMPUTE), @@ -397,7 +413,7 @@ static const struct xe_rtp_entry_sr engine_was[] = { /* Xe_LPG */ { XE_RTP_NAME("14017856879"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(ROW_CHICKEN3, DIS_FIX_EOT1_FLUSH)) }, @@ -407,6 +423,11 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_ACTIONS(SET(XEHP_HDC_CHICKEN0, DIS_ATOMIC_CHAINING_TYPED_WRITES, XE_RTP_NOCHECK)) }, + { XE_RTP_NAME("14020495402"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(ROW_CHICKEN2, DISABLE_TDL_SVHS_GATING)) + }, /* Xe2_LPG */ @@ -424,8 +445,12 @@ static const struct xe_rtp_entry_sr engine_was[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN5, DISABLE_SAMPLE_G_PERFORMANCE)) }, - { XE_RTP_NAME("16021540221"), - XE_RTP_RULES(GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0), + { XE_RTP_NAME("14020338487"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(ROW_CHICKEN3, XE2_EUPEND_CHK_FLUSH_DIS)) + }, + { XE_RTP_NAME("18034896535, 16021540221"), /* 16021540221: GRAPHICS_STEP(A0, B0) */ + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH)) }, @@ -460,6 +485,65 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, SLM_WMTP_RESTORE)) }, + + /* Xe2_HPG */ + + { XE_RTP_NAME("16018712365"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, XE2_ALLOC_DPA_STARVE_FIX_DIS)) + }, + { XE_RTP_NAME("16018737384"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(ROW_CHICKEN, EARLY_EOT_DIS)) + }, + { XE_RTP_NAME("14019988906"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD)) + }, + { XE_RTP_NAME("14019877138"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT)) + }, + { XE_RTP_NAME("14020338487"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(ROW_CHICKEN3, XE2_EUPEND_CHK_FLUSH_DIS)) + }, + { XE_RTP_NAME("18032247524"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, SEQUENTIAL_ACCESS_UPGRADE_DISABLE)) + }, + { XE_RTP_NAME("14018471104"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, ENABLE_SMP_LD_RENDER_SURFACE_CONTROL)) + }, + /* + * Although this workaround isn't required for the RCS, disabling these + * reports has no impact for our driver or the GuC, so we go ahead and + * apply this to all engines for simplicity. + */ + { XE_RTP_NAME("16021639441"), + XE_RTP_RULES(GRAPHICS_VERSION(2001)), + XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), + GHWSP_CSB_REPORT_DIS | + PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS, + XE_RTP_ACTION_FLAG(ENGINE_BASE))) + }, + { XE_RTP_NAME("14019811474"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, WR_REQ_CHAINING_DIS)) + }, + + /* Xe2_HPM */ + + { XE_RTP_NAME("16021639441"), + XE_RTP_RULES(MEDIA_VERSION(1301)), + XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), + GHWSP_CSB_REPORT_DIS | + PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS, + XE_RTP_ACTION_FLAG(ENGINE_BASE))) + }, + {} }; @@ -537,7 +621,7 @@ static const struct xe_rtp_entry_sr lrc_was[] = { /* Xe_LPG */ { XE_RTP_NAME("18019271663"), - XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1271)), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(1270, 1274)), XE_RTP_ACTIONS(SET(CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE)) }, { XE_RTP_NAME("14019877138"), @@ -580,6 +664,24 @@ static const struct xe_rtp_entry_sr lrc_was[] = { ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(INSTPM(RENDER_RING_BASE), ENABLE_SEMAPHORE_POLL_BIT)) }, + { XE_RTP_NAME("18033852989"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN1, DISABLE_BOTTOM_CLIP_RECTANGLE_TEST)) + }, + + /* Xe2_HPG */ + { XE_RTP_NAME("15010599737"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN)) + }, + { XE_RTP_NAME("14019386621"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(VF_SCRATCHPAD, XE2_VFG_TED_CREDIT_INTERFACE_DISABLE)) + }, + { XE_RTP_NAME("14020756599"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(WM_CHICKEN3, HIZ_PLANE_COMPRESSION_DIS)) + }, {} }; diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index b138cbd51b..12fe88796a 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -4,9 +4,6 @@ 22011391025 PLATFORM(DG2) 22012727170 SUBPLATFORM(DG2, G11) 22012727685 SUBPLATFORM(DG2, G11) -16015675438 PLATFORM(PVC) - SUBPLATFORM(DG2, G10) - SUBPLATFORM(DG2, G12) 18020744125 PLATFORM(PVC) 1509372804 PLATFORM(PVC), GRAPHICS_STEP(A0, C0) 1409600907 GRAPHICS_VERSION_RANGE(1200, 1250) @@ -22,3 +19,11 @@ GRAPHICS_VERSION_RANGE(1270, 1274) MEDIA_VERSION(1300) PLATFORM(DG2) +14018094691 GRAPHICS_VERSION(2004) +14019882105 GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0) +18024947630 GRAPHICS_VERSION(2001) + GRAPHICS_VERSION(2004) + MEDIA_VERSION(2000) +16022287689 GRAPHICS_VERSION(2001) + GRAPHICS_VERSION(2004) +13011645652 GRAPHICS_VERSION(2004) |