From 50ba0232fd5312410f1b65247e774244f89a628e Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Sat, 18 May 2024 20:50:36 +0200
Subject: Merging upstream version 6.8.9.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 drivers/accel/drm_accel.c                          |   1 +
 drivers/accel/habanalabs/common/device.c           |   8 +-
 drivers/accel/habanalabs/common/firmware_if.c      | 123 ++--
 drivers/accel/habanalabs/common/habanalabs.h       |  15 +-
 drivers/accel/habanalabs/common/habanalabs_drv.c   |  34 ++
 drivers/accel/habanalabs/common/habanalabs_ioctl.c |  53 ++
 drivers/accel/habanalabs/common/hwmon.c            |   4 -
 drivers/accel/habanalabs/common/memory.c           |   7 +-
 drivers/accel/habanalabs/common/sysfs.c            |  39 +-
 drivers/accel/habanalabs/gaudi2/gaudi2.c           |  70 +--
 .../include/gaudi2/asic_reg/gaudi2_regs.h          |  13 +-
 drivers/accel/ivpu/Kconfig                         |  11 +-
 drivers/accel/ivpu/ivpu_debugfs.c                  |  73 ++-
 drivers/accel/ivpu/ivpu_drv.c                      | 193 +++---
 drivers/accel/ivpu/ivpu_drv.h                      |  23 +-
 drivers/accel/ivpu/ivpu_fw.c                       |  78 ++-
 drivers/accel/ivpu/ivpu_fw.h                       |   1 +
 drivers/accel/ivpu/ivpu_gem.c                      | 662 ++++++---------------
 drivers/accel/ivpu/ivpu_gem.h                      |  76 +--
 drivers/accel/ivpu/ivpu_hw.h                       |  26 +
 drivers/accel/ivpu/ivpu_hw_37xx.c                  | 124 ++--
 drivers/accel/ivpu/ivpu_hw_37xx_reg.h              |   2 +
 drivers/accel/ivpu/ivpu_hw_40xx.c                  | 104 +++-
 drivers/accel/ivpu/ivpu_ipc.c                      | 265 +++++----
 drivers/accel/ivpu/ivpu_ipc.h                      |  33 +-
 drivers/accel/ivpu/ivpu_job.c                      | 255 ++++----
 drivers/accel/ivpu/ivpu_job.h                      |   7 +-
 drivers/accel/ivpu/ivpu_jsm_msg.c                  |  38 ++
 drivers/accel/ivpu/ivpu_jsm_msg.h                  |   1 +
 drivers/accel/ivpu/ivpu_mmu.c                      |  99 ++-
 drivers/accel/ivpu/ivpu_mmu.h                      |   1 +
 drivers/accel/ivpu/ivpu_mmu_context.c              | 162 ++---
 drivers/accel/ivpu/ivpu_mmu_context.h              |  11 +-
 drivers/accel/ivpu/ivpu_pm.c                       | 154 +++--
 drivers/accel/ivpu/ivpu_pm.h                       |   9 +-
 drivers/accel/ivpu/vpu_boot_api.h                  |  90 ++-
 drivers/accel/ivpu/vpu_jsm_api.h                   | 309 +++++++++-
 drivers/accel/qaic/Makefile                        |   3 +-
 drivers/accel/qaic/mhi_controller.c                |  44 +-
 drivers/accel/qaic/mhi_controller.h                |   2 +-
 drivers/accel/qaic/qaic.h                          |  21 +-
 drivers/accel/qaic/qaic_control.c                  |   7 +-
 drivers/accel/qaic/qaic_data.c                     | 147 +++--
 drivers/accel/qaic/qaic_drv.c                      |  98 +--
 drivers/accel/qaic/qaic_timesync.c                 | 395 ++++++++++++
 drivers/accel/qaic/qaic_timesync.h                 |  11 +
 46 files changed, 2474 insertions(+), 1428 deletions(-)
 create mode 100644 drivers/accel/qaic/qaic_timesync.c
 create mode 100644 drivers/accel/qaic/qaic_timesync.h

(limited to 'drivers/accel')

diff --git a/drivers/accel/drm_accel.c b/drivers/accel/drm_accel.c
index 294b572a9c..24cac4c027 100644
--- a/drivers/accel/drm_accel.c
+++ b/drivers/accel/drm_accel.c
@@ -11,6 +11,7 @@
 #include <linux/idr.h>
 
 #include <drm/drm_accel.h>
+#include <drm/drm_auth.h>
 #include <drm/drm_debugfs.h>
 #include <drm/drm_drv.h>
 #include <drm/drm_file.h>
diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c
index 83821e1757..a73bd4be94 100644
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -1051,10 +1051,12 @@ static int hl_device_eq_heartbeat_check(struct hl_device *hdev)
 	if (!prop->cpucp_info.eq_health_check_supported)
 		return 0;
 
-	if (hdev->eq_heartbeat_received)
+	if (hdev->eq_heartbeat_received) {
 		hdev->eq_heartbeat_received = false;
-	else
+	} else {
+		dev_err(hdev->dev, "EQ heartbeat event was not received!\n");
 		return -EIO;
+	}
 
 	return 0;
 }
@@ -2038,7 +2040,7 @@ device_reset:
 	if (ctx)
 		hl_ctx_put(ctx);
 
-	return hl_device_reset(hdev, flags);
+	return hl_device_reset(hdev, flags | HL_DRV_RESET_HARD);
 }
 
 static void hl_notifier_event_send(struct hl_notifier_event *notifier_event, u64 event_mask)
diff --git a/drivers/accel/habanalabs/common/firmware_if.c b/drivers/accel/habanalabs/common/firmware_if.c
index 47e8384134..3558a6a8e1 100644
--- a/drivers/accel/habanalabs/common/firmware_if.c
+++ b/drivers/accel/habanalabs/common/firmware_if.c
@@ -646,39 +646,27 @@ int hl_fw_send_heartbeat(struct hl_device *hdev)
 	return rc;
 }
 
-static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val,
-								u32 sts_val)
+static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val, u32 sts_val)
 {
 	bool err_exists = false;
 
 	if (!(err_val & CPU_BOOT_ERR0_ENABLED))
 		return false;
 
-	if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL) {
-		dev_err(hdev->dev,
-			"Device boot error - DRAM initialization failed\n");
-		err_exists = true;
-	}
+	if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL)
+		dev_err(hdev->dev, "Device boot error - DRAM initialization failed\n");
 
-	if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED) {
+	if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED)
 		dev_err(hdev->dev, "Device boot error - FIT image corrupted\n");
-		err_exists = true;
-	}
 
-	if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL) {
-		dev_err(hdev->dev,
-			"Device boot error - Thermal Sensor initialization failed\n");
-		err_exists = true;
-	}
+	if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL)
+		dev_err(hdev->dev, "Device boot error - Thermal Sensor initialization failed\n");
 
 	if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED) {
 		if (hdev->bmc_enable) {
-			dev_err(hdev->dev,
-				"Device boot error - Skipped waiting for BMC\n");
-			err_exists = true;
+			dev_err(hdev->dev, "Device boot error - Skipped waiting for BMC\n");
 		} else {
-			dev_info(hdev->dev,
-				"Device boot message - Skipped waiting for BMC\n");
+			dev_info(hdev->dev, "Device boot message - Skipped waiting for BMC\n");
 			/* This is an info so we don't want it to disable the
 			 * device
 			 */
@@ -686,48 +674,29 @@ static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val,
 		}
 	}
 
-	if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY) {
-		dev_err(hdev->dev,
-			"Device boot error - Serdes data from BMC not available\n");
-		err_exists = true;
-	}
+	if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY)
+		dev_err(hdev->dev, "Device boot error - Serdes data from BMC not available\n");
 
-	if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL) {
-		dev_err(hdev->dev,
-			"Device boot error - NIC F/W initialization failed\n");
-		err_exists = true;
-	}
+	if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL)
+		dev_err(hdev->dev, "Device boot error - NIC F/W initialization failed\n");
 
-	if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY) {
-		dev_err(hdev->dev,
-			"Device boot warning - security not ready\n");
-		err_exists = true;
-	}
+	if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY)
+		dev_err(hdev->dev, "Device boot warning - security not ready\n");
 
-	if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL) {
+	if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL)
 		dev_err(hdev->dev, "Device boot error - security failure\n");
-		err_exists = true;
-	}
 
-	if (err_val & CPU_BOOT_ERR0_EFUSE_FAIL) {
+	if (err_val & CPU_BOOT_ERR0_EFUSE_FAIL)
 		dev_err(hdev->dev, "Device boot error - eFuse failure\n");
-		err_exists = true;
-	}
 
-	if (err_val & CPU_BOOT_ERR0_SEC_IMG_VER_FAIL) {
+	if (err_val & CPU_BOOT_ERR0_SEC_IMG_VER_FAIL)
 		dev_err(hdev->dev, "Device boot error - Failed to load preboot secondary image\n");
-		err_exists = true;
-	}
 
-	if (err_val & CPU_BOOT_ERR0_PLL_FAIL) {
+	if (err_val & CPU_BOOT_ERR0_PLL_FAIL)
 		dev_err(hdev->dev, "Device boot error - PLL failure\n");
-		err_exists = true;
-	}
 
-	if (err_val & CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL) {
+	if (err_val & CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL)
 		dev_err(hdev->dev, "Device boot error - Failed to set threshold for temperature sensor\n");
-		err_exists = true;
-	}
 
 	if (err_val & CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL) {
 		/* Ignore this bit, don't prevent driver loading */
@@ -735,52 +704,32 @@ static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val,
 		err_val &= ~CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL;
 	}
 
-	if (err_val & CPU_BOOT_ERR0_BINNING_FAIL) {
+	if (err_val & CPU_BOOT_ERR0_BINNING_FAIL)
 		dev_err(hdev->dev, "Device boot error - binning failure\n");
-		err_exists = true;
-	}
 
 	if (sts_val & CPU_BOOT_DEV_STS0_ENABLED)
 		dev_dbg(hdev->dev, "Device status0 %#x\n", sts_val);
 
+	if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED)
+		dev_err(hdev->dev, "Device boot warning - Skipped DRAM initialization\n");
+
+	if (err_val & CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL)
+		dev_err(hdev->dev, "Device boot error - ARC memory scrub failed\n");
+
+	/* All warnings should go here in order not to reach the unknown error validation */
 	if (err_val & CPU_BOOT_ERR0_EEPROM_FAIL) {
 		dev_err(hdev->dev, "Device boot error - EEPROM failure detected\n");
 		err_exists = true;
 	}
 
-	/* All warnings should go here in order not to reach the unknown error validation */
-	if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED) {
-		dev_warn(hdev->dev,
-			"Device boot warning - Skipped DRAM initialization\n");
-		/* This is a warning so we don't want it to disable the
-		 * device
-		 */
-		err_val &= ~CPU_BOOT_ERR0_DRAM_SKIPPED;
-	}
+	if (err_val & CPU_BOOT_ERR0_PRI_IMG_VER_FAIL)
+		dev_warn(hdev->dev, "Device boot warning - Failed to load preboot primary image\n");
 
-	if (err_val & CPU_BOOT_ERR0_PRI_IMG_VER_FAIL) {
-		dev_warn(hdev->dev,
-			"Device boot warning - Failed to load preboot primary image\n");
-		/* This is a warning so we don't want it to disable the
-		 * device as we have a secondary preboot image
-		 */
-		err_val &= ~CPU_BOOT_ERR0_PRI_IMG_VER_FAIL;
-	}
-
-	if (err_val & CPU_BOOT_ERR0_TPM_FAIL) {
-		dev_warn(hdev->dev,
-			"Device boot warning - TPM failure\n");
-		/* This is a warning so we don't want it to disable the
-		 * device
-		 */
-		err_val &= ~CPU_BOOT_ERR0_TPM_FAIL;
-	}
+	if (err_val & CPU_BOOT_ERR0_TPM_FAIL)
+		dev_warn(hdev->dev, "Device boot warning - TPM failure\n");
 
-	if (!err_exists && (err_val & ~CPU_BOOT_ERR0_ENABLED)) {
-		dev_err(hdev->dev,
-			"Device boot error - unknown ERR0 error 0x%08x\n", err_val);
+	if (err_val & CPU_BOOT_ERR_FATAL_MASK)
 		err_exists = true;
-	}
 
 	/* return error only if it's in the predefined mask */
 	if (err_exists && ((err_val & ~CPU_BOOT_ERR0_ENABLED) &
@@ -3295,6 +3244,14 @@ int hl_fw_get_sec_attest_info(struct hl_device *hdev, struct cpucp_sec_attest_in
 					HL_CPUCP_SEC_ATTEST_INFO_TINEOUT_USEC);
 }
 
+int hl_fw_get_dev_info_signed(struct hl_device *hdev,
+			      struct cpucp_dev_info_signed *dev_info_signed, u32 nonce)
+{
+	return hl_fw_get_sec_attest_data(hdev, CPUCP_PACKET_INFO_SIGNED_GET, dev_info_signed,
+					 sizeof(struct cpucp_dev_info_signed), nonce,
+					 HL_CPUCP_SEC_ATTEST_INFO_TINEOUT_USEC);
+}
+
 int hl_fw_send_generic_request(struct hl_device *hdev, enum hl_passthrough_type sub_opcode,
 						dma_addr_t buff, u32 *size)
 {
diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index d0fd77bb6a..41c7aac2ff 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -2547,7 +2547,7 @@ struct hl_state_dump_specs {
  * DEVICES
  */
 
-#define HL_STR_MAX	32
+#define HL_STR_MAX	64
 
 #define HL_DEV_STS_MAX (HL_DEVICE_STATUS_LAST + 1)
 
@@ -3521,6 +3521,9 @@ struct hl_device {
 	u8				heartbeat;
 };
 
+/* Retrieve PCI device name in case of a PCI device or dev name in simulator */
+#define HL_DEV_NAME(hdev)	\
+		((hdev)->pdev ? dev_name(&(hdev)->pdev->dev) : "NA-DEVICE")
 
 /**
  * struct hl_cs_encaps_sig_handle - encapsulated signals handle structure
@@ -3596,6 +3599,14 @@ static inline bool hl_is_fw_sw_ver_below(struct hl_device *hdev, u32 fw_sw_major
 	return false;
 }
 
+static inline bool hl_is_fw_sw_ver_equal_or_greater(struct hl_device *hdev, u32 fw_sw_major,
+							u32 fw_sw_minor)
+{
+	return (hdev->fw_sw_major_ver > fw_sw_major ||
+			(hdev->fw_sw_major_ver == fw_sw_major &&
+					hdev->fw_sw_minor_ver >= fw_sw_minor));
+}
+
 /*
  * Kernel module functions that can be accessed by entire module
  */
@@ -3956,6 +3967,8 @@ long hl_fw_get_max_power(struct hl_device *hdev);
 void hl_fw_set_max_power(struct hl_device *hdev);
 int hl_fw_get_sec_attest_info(struct hl_device *hdev, struct cpucp_sec_attest_info *sec_attest_info,
 				u32 nonce);
+int hl_fw_get_dev_info_signed(struct hl_device *hdev,
+			      struct cpucp_dev_info_signed *dev_info_signed, u32 nonce);
 int hl_set_voltage(struct hl_device *hdev, int sensor_index, u32 attr, long value);
 int hl_set_current(struct hl_device *hdev, int sensor_index, u32 attr, long value);
 int hl_set_power(struct hl_device *hdev, int sensor_index, u32 attr, long value);
diff --git a/drivers/accel/habanalabs/common/habanalabs_drv.c b/drivers/accel/habanalabs/common/habanalabs_drv.c
index 51fb04bbe3..e542fd40e1 100644
--- a/drivers/accel/habanalabs/common/habanalabs_drv.c
+++ b/drivers/accel/habanalabs/common/habanalabs_drv.c
@@ -673,6 +673,38 @@ static pci_ers_result_t hl_pci_err_slot_reset(struct pci_dev *pdev)
 	return PCI_ERS_RESULT_RECOVERED;
 }
 
+static void hl_pci_reset_prepare(struct pci_dev *pdev)
+{
+	struct hl_device *hdev;
+
+	hdev = pci_get_drvdata(pdev);
+	if (!hdev)
+		return;
+
+	hdev->disabled = true;
+}
+
+static void hl_pci_reset_done(struct pci_dev *pdev)
+{
+	struct hl_device *hdev;
+	u32 flags;
+
+	hdev = pci_get_drvdata(pdev);
+	if (!hdev)
+		return;
+
+	/*
+	 * Schedule a thread to trigger hard reset.
+	 * The reason for this handler, is for rare cases where the driver is up
+	 * and FLR occurs. This is valid only when working with no VM, so FW handles FLR
+	 * and resets the device. FW will go back preboot stage, so driver needs to perform
+	 * hard reset in order to load FW fit again.
+	 */
+	flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW;
+
+	hl_device_reset(hdev, flags);
+}
+
 static const struct dev_pm_ops hl_pm_ops = {
 	.suspend = hl_pmops_suspend,
 	.resume = hl_pmops_resume,
@@ -682,6 +714,8 @@ static const struct pci_error_handlers hl_pci_err_handler = {
 	.error_detected = hl_pci_err_detected,
 	.slot_reset = hl_pci_err_slot_reset,
 	.resume = hl_pci_err_resume,
+	.reset_prepare = hl_pci_reset_prepare,
+	.reset_done = hl_pci_reset_done,
 };
 
 static struct pci_driver hl_pci_driver = {
diff --git a/drivers/accel/habanalabs/common/habanalabs_ioctl.c b/drivers/accel/habanalabs/common/habanalabs_ioctl.c
index a7cd625d82..1dd6e23172 100644
--- a/drivers/accel/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/accel/habanalabs/common/habanalabs_ioctl.c
@@ -19,6 +19,9 @@
 
 #include <asm/msr.h>
 
+/* make sure there is space for all the signed info */
+static_assert(sizeof(struct cpucp_info) <= SEC_DEV_INFO_BUF_SZ);
+
 static u32 hl_debug_struct_size[HL_DEBUG_OP_TIMESTAMP + 1] = {
 	[HL_DEBUG_OP_ETR] = sizeof(struct hl_debug_params_etr),
 	[HL_DEBUG_OP_ETF] = sizeof(struct hl_debug_params_etf),
@@ -719,6 +722,53 @@ free_sec_attest_info:
 	return rc;
 }
 
+static int dev_info_signed(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+	void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+	struct cpucp_dev_info_signed *dev_info_signed;
+	struct hl_info_signed *info;
+	u32 max_size = args->return_size;
+	int rc;
+
+	if ((!max_size) || (!out))
+		return -EINVAL;
+
+	dev_info_signed = kzalloc(sizeof(*dev_info_signed), GFP_KERNEL);
+	if (!dev_info_signed)
+		return -ENOMEM;
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info) {
+		rc = -ENOMEM;
+		goto free_dev_info_signed;
+	}
+
+	rc = hl_fw_get_dev_info_signed(hpriv->hdev,
+					dev_info_signed, args->sec_attest_nonce);
+	if (rc)
+		goto free_info;
+
+	info->nonce = le32_to_cpu(dev_info_signed->nonce);
+	info->info_sig_len = dev_info_signed->info_sig_len;
+	info->pub_data_len = le16_to_cpu(dev_info_signed->pub_data_len);
+	info->certificate_len = le16_to_cpu(dev_info_signed->certificate_len);
+	info->dev_info_len = sizeof(struct cpucp_info);
+	memcpy(&info->info_sig, &dev_info_signed->info_sig, sizeof(info->info_sig));
+	memcpy(&info->public_data, &dev_info_signed->public_data, sizeof(info->public_data));
+	memcpy(&info->certificate, &dev_info_signed->certificate, sizeof(info->certificate));
+	memcpy(&info->dev_info, &dev_info_signed->info, info->dev_info_len);
+
+	rc = copy_to_user(out, info, min_t(size_t, max_size, sizeof(*info))) ? -EFAULT : 0;
+
+free_info:
+	kfree(info);
+free_dev_info_signed:
+	kfree(dev_info_signed);
+
+	return rc;
+}
+
+
 static int eventfd_register(struct hl_fpriv *hpriv, struct hl_info_args *args)
 {
 	int rc;
@@ -1089,6 +1139,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 	case HL_INFO_FW_GENERIC_REQ:
 		return send_fw_generic_request(hdev, args);
 
+	case HL_INFO_DEV_SIGNED:
+		return dev_info_signed(hpriv, args);
+
 	default:
 		dev_err(dev, "Invalid request %d\n", args->op);
 		rc = -EINVAL;
diff --git a/drivers/accel/habanalabs/common/hwmon.c b/drivers/accel/habanalabs/common/hwmon.c
index 8598056216..1ee2ee07e9 100644
--- a/drivers/accel/habanalabs/common/hwmon.c
+++ b/drivers/accel/habanalabs/common/hwmon.c
@@ -578,10 +578,6 @@ int hl_get_temperature(struct hl_device *hdev,
 				CPUCP_PKT_CTL_OPCODE_SHIFT);
 	pkt.sensor_index = __cpu_to_le16(sensor_index);
 	pkt.type = __cpu_to_le16(attr);
-
-	dev_dbg(hdev->dev, "get temp, ctl 0x%x, sensor %d, type %d\n",
-		pkt.ctl, pkt.sensor_index, pkt.type);
-
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
 						0, &result);
 
diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/habanalabs/common/memory.c
index 0b8689fe0b..3348ad12c2 100644
--- a/drivers/accel/habanalabs/common/memory.c
+++ b/drivers/accel/habanalabs/common/memory.c
@@ -955,8 +955,8 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
 				(i + 1) == phys_pg_pack->npages);
 		if (rc) {
 			dev_err(hdev->dev,
-				"map failed for handle %u, npages: %llu, mapped: %llu",
-				phys_pg_pack->handle, phys_pg_pack->npages,
+				"map failed (%d) for handle %u, npages: %llu, mapped: %llu\n",
+				rc, phys_pg_pack->handle, phys_pg_pack->npages,
 				mapped_pg_cnt);
 			goto err;
 		}
@@ -1186,7 +1186,8 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, u64 *device
 
 	rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack);
 	if (rc) {
-		dev_err(hdev->dev, "mapping page pack failed for handle %u\n", handle);
+		dev_err(hdev->dev, "mapping page pack failed (%d) for handle %u\n",
+			rc, handle);
 		mutex_unlock(&hdev->mmu_lock);
 		goto map_err;
 	}
diff --git a/drivers/accel/habanalabs/common/sysfs.c b/drivers/accel/habanalabs/common/sysfs.c
index 2786063730..8a9f988321 100644
--- a/drivers/accel/habanalabs/common/sysfs.c
+++ b/drivers/accel/habanalabs/common/sysfs.c
@@ -8,6 +8,7 @@
 #include "habanalabs.h"
 
 #include <linux/pci.h>
+#include <linux/types.h>
 
 static ssize_t clk_max_freq_mhz_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
@@ -80,12 +81,27 @@ static ssize_t vrm_ver_show(struct device *dev, struct device_attribute *attr, c
 {
 	struct hl_device *hdev = dev_get_drvdata(dev);
 	struct cpucp_info *cpucp_info;
+	u32 infineon_second_stage_version;
+	u32 infineon_second_stage_first_instance;
+	u32 infineon_second_stage_second_instance;
+	u32 infineon_second_stage_third_instance;
+	u32 mask = 0xff;
 
 	cpucp_info = &hdev->asic_prop.cpucp_info;
 
+	infineon_second_stage_version = le32_to_cpu(cpucp_info->infineon_second_stage_version);
+	infineon_second_stage_first_instance = infineon_second_stage_version & mask;
+	infineon_second_stage_second_instance =
+					(infineon_second_stage_version >> 8) & mask;
+	infineon_second_stage_third_instance =
+					(infineon_second_stage_version >> 16) & mask;
+
 	if (cpucp_info->infineon_second_stage_version)
-		return sprintf(buf, "%#04x %#04x\n", le32_to_cpu(cpucp_info->infineon_version),
-				le32_to_cpu(cpucp_info->infineon_second_stage_version));
+		return sprintf(buf, "%#04x %#04x:%#04x:%#04x\n",
+				le32_to_cpu(cpucp_info->infineon_version),
+				infineon_second_stage_first_instance,
+				infineon_second_stage_second_instance,
+				infineon_second_stage_third_instance);
 	else
 		return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
 }
@@ -386,6 +402,21 @@ static ssize_t security_enabled_show(struct device *dev,
 	return sprintf(buf, "%d\n", hdev->asic_prop.fw_security_enabled);
 }
 
+static ssize_t module_id_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct hl_device *hdev = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%u\n", le32_to_cpu(hdev->asic_prop.cpucp_info.card_location));
+}
+
+static ssize_t parent_device_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct hl_device *hdev = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%s\n", HL_DEV_NAME(hdev));
+}
+
 static DEVICE_ATTR_RO(armcp_kernel_ver);
 static DEVICE_ATTR_RO(armcp_ver);
 static DEVICE_ATTR_RO(cpld_ver);
@@ -405,6 +436,8 @@ static DEVICE_ATTR_RO(thermal_ver);
 static DEVICE_ATTR_RO(uboot_ver);
 static DEVICE_ATTR_RO(fw_os_ver);
 static DEVICE_ATTR_RO(security_enabled);
+static DEVICE_ATTR_RO(module_id);
+static DEVICE_ATTR_RO(parent_device);
 
 static struct bin_attribute bin_attr_eeprom = {
 	.attr = {.name = "eeprom", .mode = (0444)},
@@ -430,6 +463,8 @@ static struct attribute *hl_dev_attrs[] = {
 	&dev_attr_uboot_ver.attr,
 	&dev_attr_fw_os_ver.attr,
 	&dev_attr_security_enabled.attr,
+	&dev_attr_module_id.attr,
+	&dev_attr_parent_device.attr,
 	NULL,
 };
 
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index bc6e338ef2..e0e5615ef9 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -7858,39 +7858,44 @@ static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type,
 	return !!ecc_data->is_critical;
 }
 
-static void handle_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base, u64 event_mask)
+static void handle_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base, u32 engine_id)
 {
-	u32 lo, hi, cq_ptr_size, arc_cq_ptr_size;
-	u64 cq_ptr, arc_cq_ptr, cp_current_inst;
+	struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
+	u64 cq_ptr, cp_current_inst;
+	u32 lo, hi, cq_size, cp_sts;
+	bool is_arc_cq;
 
-	lo = RREG32(qman_base + QM_CQ_PTR_LO_4_OFFSET);
-	hi = RREG32(qman_base + QM_CQ_PTR_HI_4_OFFSET);
-	cq_ptr = ((u64) hi) << 32 | lo;
-	cq_ptr_size = RREG32(qman_base + QM_CQ_TSIZE_4_OFFSET);
+	cp_sts = RREG32(qman_base + QM_CP_STS_4_OFFSET);
+	is_arc_cq = FIELD_GET(PDMA0_QM_CP_STS_CUR_CQ_MASK, cp_sts); /* 0 - legacy CQ, 1 - ARC_CQ */
 
-	lo = RREG32(qman_base + QM_ARC_CQ_PTR_LO_OFFSET);
-	hi = RREG32(qman_base + QM_ARC_CQ_PTR_HI_OFFSET);
-	arc_cq_ptr = ((u64) hi) << 32 | lo;
-	arc_cq_ptr_size = RREG32(qman_base + QM_ARC_CQ_TSIZE_OFFSET);
+	if (is_arc_cq) {
+		lo = RREG32(qman_base + QM_ARC_CQ_PTR_LO_STS_OFFSET);
+		hi = RREG32(qman_base + QM_ARC_CQ_PTR_HI_STS_OFFSET);
+		cq_ptr = ((u64) hi) << 32 | lo;
+		cq_size = RREG32(qman_base + QM_ARC_CQ_TSIZE_STS_OFFSET);
+	} else {
+		lo = RREG32(qman_base + QM_CQ_PTR_LO_STS_4_OFFSET);
+		hi = RREG32(qman_base + QM_CQ_PTR_HI_STS_4_OFFSET);
+		cq_ptr = ((u64) hi) << 32 | lo;
+		cq_size = RREG32(qman_base + QM_CQ_TSIZE_STS_4_OFFSET);
+	}
 
 	lo = RREG32(qman_base + QM_CP_CURRENT_INST_LO_4_OFFSET);
 	hi = RREG32(qman_base + QM_CP_CURRENT_INST_HI_4_OFFSET);
 	cp_current_inst = ((u64) hi) << 32 | lo;
 
 	dev_info(hdev->dev,
-		"LowerQM. CQ: {ptr %#llx, size %u}, ARC_CQ: {ptr %#llx, size %u}, CP: {instruction %#llx}\n",
-		cq_ptr, cq_ptr_size, arc_cq_ptr, arc_cq_ptr_size, cp_current_inst);
+		"LowerQM. %sCQ: {ptr %#llx, size %u}, CP: {instruction %#018llx}\n",
+		is_arc_cq ? "ARC_" : "", cq_ptr, cq_size, cp_current_inst);
 
-	if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
-		if (arc_cq_ptr) {
-			hdev->captured_err_info.undef_opcode.cq_addr = arc_cq_ptr;
-			hdev->captured_err_info.undef_opcode.cq_size = arc_cq_ptr_size;
-		} else {
-			hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
-			hdev->captured_err_info.undef_opcode.cq_size = cq_ptr_size;
-		}
-
-		hdev->captured_err_info.undef_opcode.stream_id = QMAN_STREAMS;
+	if (undef_opcode->write_enable) {
+		memset(undef_opcode, 0, sizeof(*undef_opcode));
+		undef_opcode->timestamp = ktime_get();
+		undef_opcode->cq_addr = cq_ptr;
+		undef_opcode->cq_size = cq_size;
+		undef_opcode->engine_id = engine_id;
+		undef_opcode->stream_id = QMAN_STREAMS;
+		undef_opcode->write_enable = 0;
 	}
 }
 
@@ -7929,19 +7934,12 @@ static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type
 				error_count++;
 			}
 
-		/* check for undefined opcode */
-		if (glbl_sts_val & PDMA0_QM_GLBL_ERR_STS_CP_UNDEF_CMD_ERR_MASK) {
+		/* Check for undefined opcode error in lower QM */
+		if ((i == QMAN_STREAMS) &&
+				(glbl_sts_val & PDMA0_QM_GLBL_ERR_STS_CP_UNDEF_CMD_ERR_MASK)) {
+			handle_lower_qman_data_on_err(hdev, qman_base,
+							gaudi2_queue_id_to_engine_id[qid_base]);
 			*event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
-			if (hdev->captured_err_info.undef_opcode.write_enable) {
-				memset(&hdev->captured_err_info.undef_opcode, 0,
-						sizeof(hdev->captured_err_info.undef_opcode));
-				hdev->captured_err_info.undef_opcode.timestamp = ktime_get();
-				hdev->captured_err_info.undef_opcode.engine_id =
-							gaudi2_queue_id_to_engine_id[qid_base];
-			}
-
-			if (i == QMAN_STREAMS)
-				handle_lower_qman_data_on_err(hdev, qman_base, *event_mask);
 		}
 	}
 
@@ -10005,6 +10003,8 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 		error_count = gaudi2_handle_pcie_drain(hdev, &eq_entry->pcie_drain_ind_data);
 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
+		if (hl_is_fw_sw_ver_equal_or_greater(hdev, 1, 13))
+			is_critical = true;
 		break;
 
 	case GAUDI2_EVENT_PSOC59_RPM_ERROR_OR_DRAIN:
diff --git a/drivers/accel/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h b/drivers/accel/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h
index a08378d080..d21fcd3880 100644
--- a/drivers/accel/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h
+++ b/drivers/accel/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h
@@ -242,14 +242,15 @@
 #define QM_FENCE2_OFFSET		(mmPDMA0_QM_CP_FENCE2_RDATA_0 - mmPDMA0_QM_BASE)
 #define QM_SEI_STATUS_OFFSET		(mmPDMA0_QM_SEI_STATUS - mmPDMA0_QM_BASE)
 
-#define QM_CQ_PTR_LO_4_OFFSET		(mmPDMA0_QM_CQ_PTR_LO_4 - mmPDMA0_QM_BASE)
-#define QM_CQ_PTR_HI_4_OFFSET		(mmPDMA0_QM_CQ_PTR_HI_4 - mmPDMA0_QM_BASE)
-#define QM_CQ_TSIZE_4_OFFSET		(mmPDMA0_QM_CQ_TSIZE_4 - mmPDMA0_QM_BASE)
+#define QM_CQ_TSIZE_STS_4_OFFSET	(mmPDMA0_QM_CQ_TSIZE_STS_4 - mmPDMA0_QM_BASE)
+#define QM_CQ_PTR_LO_STS_4_OFFSET	(mmPDMA0_QM_CQ_PTR_LO_STS_4 - mmPDMA0_QM_BASE)
+#define QM_CQ_PTR_HI_STS_4_OFFSET	(mmPDMA0_QM_CQ_PTR_HI_STS_4 - mmPDMA0_QM_BASE)
 
-#define QM_ARC_CQ_PTR_LO_OFFSET		(mmPDMA0_QM_ARC_CQ_PTR_LO - mmPDMA0_QM_BASE)
-#define QM_ARC_CQ_PTR_HI_OFFSET		(mmPDMA0_QM_ARC_CQ_PTR_HI - mmPDMA0_QM_BASE)
-#define QM_ARC_CQ_TSIZE_OFFSET		(mmPDMA0_QM_ARC_CQ_TSIZE - mmPDMA0_QM_BASE)
+#define QM_ARC_CQ_TSIZE_STS_OFFSET	(mmPDMA0_QM_ARC_CQ_TSIZE_STS - mmPDMA0_QM_BASE)
+#define QM_ARC_CQ_PTR_LO_STS_OFFSET	(mmPDMA0_QM_ARC_CQ_PTR_LO_STS - mmPDMA0_QM_BASE)
+#define QM_ARC_CQ_PTR_HI_STS_OFFSET	(mmPDMA0_QM_ARC_CQ_PTR_HI_STS - mmPDMA0_QM_BASE)
 
+#define QM_CP_STS_4_OFFSET		(mmPDMA0_QM_CP_STS_4 - mmPDMA0_QM_BASE)
 #define QM_CP_CURRENT_INST_LO_4_OFFSET	(mmPDMA0_QM_CP_CURRENT_INST_LO_4 - mmPDMA0_QM_BASE)
 #define QM_CP_CURRENT_INST_HI_4_OFFSET	(mmPDMA0_QM_CP_CURRENT_INST_HI_4 - mmPDMA0_QM_BASE)
 
diff --git a/drivers/accel/ivpu/Kconfig b/drivers/accel/ivpu/Kconfig
index 1a4c4ed9d1..682c532452 100644
--- a/drivers/accel/ivpu/Kconfig
+++ b/drivers/accel/ivpu/Kconfig
@@ -1,16 +1,17 @@
 # SPDX-License-Identifier: GPL-2.0-only
 
 config DRM_ACCEL_IVPU
-	tristate "Intel VPU for Meteor Lake and newer"
+	tristate "Intel NPU (Neural Processing Unit)"
 	depends on DRM_ACCEL
 	depends on X86_64 && !UML
 	depends on PCI && PCI_MSI
 	select FW_LOADER
-	select SHMEM
+	select DRM_GEM_SHMEM_HELPER
 	select GENERIC_ALLOCATOR
 	help
-	  Choose this option if you have a system that has an 14th generation Intel CPU
-	  or newer. VPU stands for Versatile Processing Unit and it's a CPU-integrated
-	  inference accelerator for Computer Vision and Deep Learning applications.
+	  Choose this option if you have a system with an 14th generation
+	  Intel CPU (Meteor Lake) or newer. Intel NPU (formerly called Intel VPU)
+	  is a CPU-integrated inference accelerator for Computer Vision
+	  and Deep Learning applications.
 
 	  If "M" is selected, the module will be called intel_vpu.
diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c
index ea453b985b..7cb962e214 100644
--- a/drivers/accel/ivpu/ivpu_debugfs.c
+++ b/drivers/accel/ivpu/ivpu_debugfs.c
@@ -14,6 +14,7 @@
 #include "ivpu_fw.h"
 #include "ivpu_fw_log.h"
 #include "ivpu_gem.h"
+#include "ivpu_hw.h"
 #include "ivpu_jsm_msg.h"
 #include "ivpu_pm.h"
 
@@ -101,7 +102,7 @@ static int reset_pending_show(struct seq_file *s, void *v)
 {
 	struct ivpu_device *vdev = seq_to_ivpu(s);
 
-	seq_printf(s, "%d\n", atomic_read(&vdev->pm->in_reset));
+	seq_printf(s, "%d\n", atomic_read(&vdev->pm->reset_pending));
 	return 0;
 }
 
@@ -115,6 +116,33 @@ static const struct drm_debugfs_info vdev_debugfs_list[] = {
 	{"reset_pending", reset_pending_show, 0},
 };
 
+static ssize_t
+dvfs_mode_fops_write(struct file *file, const char __user *user_buf, size_t size, loff_t *pos)
+{
+	struct ivpu_device *vdev = file->private_data;
+	struct ivpu_fw_info *fw = vdev->fw;
+	u32 dvfs_mode;
+	int ret;
+
+	ret = kstrtou32_from_user(user_buf, size, 0, &dvfs_mode);
+	if (ret < 0)
+		return ret;
+
+	fw->dvfs_mode = dvfs_mode;
+
+	ret = pci_try_reset_function(to_pci_dev(vdev->drm.dev));
+	if (ret)
+		return ret;
+
+	return size;
+}
+
+static const struct file_operations dvfs_mode_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.write = dvfs_mode_fops_write,
+};
+
 static int fw_log_show(struct seq_file *s, void *v)
 {
 	struct ivpu_device *vdev = s->private;
@@ -151,6 +179,33 @@ static const struct file_operations fw_log_fops = {
 	.release = single_release,
 };
 
+static ssize_t
+fw_profiling_freq_fops_write(struct file *file, const char __user *user_buf,
+			     size_t size, loff_t *pos)
+{
+	struct ivpu_device *vdev = file->private_data;
+	bool enable;
+	int ret;
+
+	ret = kstrtobool_from_user(user_buf, size, &enable);
+	if (ret < 0)
+		return ret;
+
+	ivpu_hw_profiling_freq_drive(vdev, enable);
+
+	ret = pci_try_reset_function(to_pci_dev(vdev->drm.dev));
+	if (ret)
+		return ret;
+
+	return size;
+}
+
+static const struct file_operations fw_profiling_freq_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.write = fw_profiling_freq_fops_write,
+};
+
 static ssize_t
 fw_trace_destination_mask_fops_write(struct file *file, const char __user *user_buf,
 				     size_t size, loff_t *pos)
@@ -251,11 +306,18 @@ static ssize_t
 ivpu_force_recovery_fn(struct file *file, const char __user *user_buf, size_t size, loff_t *pos)
 {
 	struct ivpu_device *vdev = file->private_data;
+	int ret;
 
 	if (!size)
 		return -EINVAL;
 
-	ivpu_pm_schedule_recovery(vdev);
+	ret = ivpu_rpm_get(vdev);
+	if (ret)
+		return ret;
+
+	ivpu_pm_trigger_recovery(vdev, "debugfs");
+	flush_work(&vdev->pm->recovery_work);
+	ivpu_rpm_put(vdev);
 	return size;
 }
 
@@ -280,6 +342,9 @@ void ivpu_debugfs_init(struct ivpu_device *vdev)
 	debugfs_create_file("force_recovery", 0200, debugfs_root, vdev,
 			    &ivpu_force_recovery_fops);
 
+	debugfs_create_file("dvfs_mode", 0200, debugfs_root, vdev,
+			    &dvfs_mode_fops);
+
 	debugfs_create_file("fw_log", 0644, debugfs_root, vdev,
 			    &fw_log_fops);
 	debugfs_create_file("fw_trace_destination_mask", 0200, debugfs_root, vdev,
@@ -291,4 +356,8 @@ void ivpu_debugfs_init(struct ivpu_device *vdev)
 
 	debugfs_create_file("reset_engine", 0200, debugfs_root, vdev,
 			    &ivpu_reset_engine_fops);
+
+	if (ivpu_hw_gen(vdev) >= IVPU_HW_40XX)
+		debugfs_create_file("fw_profiling_freq_drive", 0200,
+				    debugfs_root, vdev, &fw_profiling_freq_fops);
 }
diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c
index c856c417a1..b35c7aedca 100644
--- a/drivers/accel/ivpu/ivpu_drv.c
+++ b/drivers/accel/ivpu/ivpu_drv.c
@@ -6,6 +6,7 @@
 #include <linux/firmware.h>
 #include <linux/module.h>
 #include <linux/pci.h>
+#include <linux/pm_runtime.h>
 
 #include <drm/drm_accel.h>
 #include <drm/drm_file.h>
@@ -17,6 +18,7 @@
 #include "ivpu_debugfs.h"
 #include "ivpu_drv.h"
 #include "ivpu_fw.h"
+#include "ivpu_fw_log.h"
 #include "ivpu_gem.h"
 #include "ivpu_hw.h"
 #include "ivpu_ipc.h"
@@ -31,8 +33,6 @@
 			   __stringify(DRM_IVPU_DRIVER_MINOR) "."
 #endif
 
-static const struct drm_driver driver;
-
 static struct lock_class_key submitted_jobs_xa_lock_class_key;
 
 int ivpu_dbg_mask;
@@ -41,7 +41,7 @@ MODULE_PARM_DESC(dbg_mask, "Driver debug mask. See IVPU_DBG_* macros.");
 
 int ivpu_test_mode;
 module_param_named_unsafe(test_mode, ivpu_test_mode, int, 0644);
-MODULE_PARM_DESC(test_mode, "Test mode: 0 - normal operation, 1 - fw unit test, 2 - null hw");
+MODULE_PARM_DESC(test_mode, "Test mode mask. See IVPU_TEST_MODE_* macros.");
 
 u8 ivpu_pll_min_ratio;
 module_param_named(pll_min_ratio, ivpu_pll_min_ratio, byte, 0644);
@@ -67,22 +67,20 @@ struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv)
 	return file_priv;
 }
 
-struct ivpu_file_priv *ivpu_file_priv_get_by_ctx_id(struct ivpu_device *vdev, unsigned long id)
+static void file_priv_unbind(struct ivpu_device *vdev, struct ivpu_file_priv *file_priv)
 {
-	struct ivpu_file_priv *file_priv;
-
-	xa_lock_irq(&vdev->context_xa);
-	file_priv = xa_load(&vdev->context_xa, id);
-	/* file_priv may still be in context_xa during file_priv_release() */
-	if (file_priv && !kref_get_unless_zero(&file_priv->ref))
-		file_priv = NULL;
-	xa_unlock_irq(&vdev->context_xa);
-
-	if (file_priv)
-		ivpu_dbg(vdev, KREF, "file_priv get by id: ctx %u refcount %u\n",
-			 file_priv->ctx.id, kref_read(&file_priv->ref));
-
-	return file_priv;
+	mutex_lock(&file_priv->lock);
+	if (file_priv->bound) {
+		ivpu_dbg(vdev, FILE, "file_priv unbind: ctx %u\n", file_priv->ctx.id);
+
+		ivpu_cmdq_release_all_locked(file_priv);
+		ivpu_jsm_context_release(vdev, file_priv->ctx.id);
+		ivpu_bo_unbind_all_bos_from_context(vdev, &file_priv->ctx);
+		ivpu_mmu_user_context_fini(vdev, &file_priv->ctx);
+		file_priv->bound = false;
+		drm_WARN_ON(&vdev->drm, !xa_erase_irq(&vdev->context_xa, file_priv->ctx.id));
+	}
+	mutex_unlock(&file_priv->lock);
 }
 
 static void file_priv_release(struct kref *ref)
@@ -90,13 +88,15 @@ static void file_priv_release(struct kref *ref)
 	struct ivpu_file_priv *file_priv = container_of(ref, struct ivpu_file_priv, ref);
 	struct ivpu_device *vdev = file_priv->vdev;
 
-	ivpu_dbg(vdev, FILE, "file_priv release: ctx %u\n", file_priv->ctx.id);
+	ivpu_dbg(vdev, FILE, "file_priv release: ctx %u bound %d\n",
+		 file_priv->ctx.id, (bool)file_priv->bound);
+
+	pm_runtime_get_sync(vdev->drm.dev);
+	mutex_lock(&vdev->context_list_lock);
+	file_priv_unbind(vdev, file_priv);
+	mutex_unlock(&vdev->context_list_lock);
+	pm_runtime_put_autosuspend(vdev->drm.dev);
 
-	ivpu_cmdq_release_all(file_priv);
-	ivpu_bo_remove_all_bos_from_context(&file_priv->ctx);
-	ivpu_jsm_context_release(vdev, file_priv->ctx.id);
-	ivpu_mmu_user_context_fini(vdev, &file_priv->ctx);
-	drm_WARN_ON(&vdev->drm, xa_erase_irq(&vdev->context_xa, file_priv->ctx.id) != file_priv);
 	mutex_destroy(&file_priv->lock);
 	kfree(file_priv);
 }
@@ -131,22 +131,6 @@ static int ivpu_get_capabilities(struct ivpu_device *vdev, struct drm_ivpu_param
 	return 0;
 }
 
-static int ivpu_get_core_clock_rate(struct ivpu_device *vdev, u64 *clk_rate)
-{
-	int ret;
-
-	ret = ivpu_rpm_get_if_active(vdev);
-	if (ret < 0)
-		return ret;
-
-	*clk_rate = ret ? ivpu_hw_reg_pll_freq_get(vdev) : 0;
-
-	if (ret)
-		ivpu_rpm_put(vdev);
-
-	return 0;
-}
-
 static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 {
 	struct ivpu_file_priv *file_priv = file->driver_priv;
@@ -170,7 +154,7 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f
 		args->value = vdev->platform;
 		break;
 	case DRM_IVPU_PARAM_CORE_CLOCK_RATE:
-		ret = ivpu_get_core_clock_rate(vdev, &args->value);
+		args->value = ivpu_hw_ratio_to_freq(vdev, vdev->hw->pll.max_ratio);
 		break;
 	case DRM_IVPU_PARAM_NUM_CONTEXTS:
 		args->value = ivpu_get_context_count(vdev);
@@ -178,9 +162,6 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f
 	case DRM_IVPU_PARAM_CONTEXT_BASE_ADDRESS:
 		args->value = vdev->hw->ranges.user.start;
 		break;
-	case DRM_IVPU_PARAM_CONTEXT_PRIORITY:
-		args->value = file_priv->priority;
-		break;
 	case DRM_IVPU_PARAM_CONTEXT_ID:
 		args->value = file_priv->ctx.id;
 		break;
@@ -220,17 +201,10 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f
 
 static int ivpu_set_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 {
-	struct ivpu_file_priv *file_priv = file->driver_priv;
 	struct drm_ivpu_param *args = data;
 	int ret = 0;
 
 	switch (args->param) {
-	case DRM_IVPU_PARAM_CONTEXT_PRIORITY:
-		if (args->value <= DRM_IVPU_CONTEXT_PRIORITY_REALTIME)
-			file_priv->priority = args->value;
-		else
-			ret = -EINVAL;
-		break;
 	default:
 		ret = -EINVAL;
 	}
@@ -243,50 +217,53 @@ static int ivpu_open(struct drm_device *dev, struct drm_file *file)
 	struct ivpu_device *vdev = to_ivpu_device(dev);
 	struct ivpu_file_priv *file_priv;
 	u32 ctx_id;
-	void *old;
-	int ret;
+	int idx, ret;
 
-	ret = xa_alloc_irq(&vdev->context_xa, &ctx_id, NULL, vdev->context_xa_limit, GFP_KERNEL);
-	if (ret) {
-		ivpu_err(vdev, "Failed to allocate context id: %d\n", ret);
-		return ret;
-	}
+	if (!drm_dev_enter(dev, &idx))
+		return -ENODEV;
 
 	file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
 	if (!file_priv) {
 		ret = -ENOMEM;
-		goto err_xa_erase;
+		goto err_dev_exit;
 	}
 
 	file_priv->vdev = vdev;
-	file_priv->priority = DRM_IVPU_CONTEXT_PRIORITY_NORMAL;
+	file_priv->bound = true;
 	kref_init(&file_priv->ref);
 	mutex_init(&file_priv->lock);
 
+	mutex_lock(&vdev->context_list_lock);
+
+	ret = xa_alloc_irq(&vdev->context_xa, &ctx_id, file_priv,
+			   vdev->context_xa_limit, GFP_KERNEL);
+	if (ret) {
+		ivpu_err(vdev, "Failed to allocate context id: %d\n", ret);
+		goto err_unlock;
+	}
+
 	ret = ivpu_mmu_user_context_init(vdev, &file_priv->ctx, ctx_id);
 	if (ret)
-		goto err_mutex_destroy;
+		goto err_xa_erase;
 
-	old = xa_store_irq(&vdev->context_xa, ctx_id, file_priv, GFP_KERNEL);
-	if (xa_is_err(old)) {
-		ret = xa_err(old);
-		ivpu_err(vdev, "Failed to store context %u: %d\n", ctx_id, ret);
-		goto err_ctx_fini;
-	}
+	mutex_unlock(&vdev->context_list_lock);
+	drm_dev_exit(idx);
+
+	file->driver_priv = file_priv;
 
 	ivpu_dbg(vdev, FILE, "file_priv create: ctx %u process %s pid %d\n",
 		 ctx_id, current->comm, task_pid_nr(current));
 
-	file->driver_priv = file_priv;
 	return 0;
 
-err_ctx_fini:
-	ivpu_mmu_user_context_fini(vdev, &file_priv->ctx);
-err_mutex_destroy:
-	mutex_destroy(&file_priv->lock);
-	kfree(file_priv);
 err_xa_erase:
 	xa_erase_irq(&vdev->context_xa, ctx_id);
+err_unlock:
+	mutex_unlock(&vdev->context_list_lock);
+	mutex_destroy(&file_priv->lock);
+	kfree(file_priv);
+err_dev_exit:
+	drm_dev_exit(idx);
 	return ret;
 }
 
@@ -317,16 +294,14 @@ static int ivpu_wait_for_ready(struct ivpu_device *vdev)
 	unsigned long timeout;
 	int ret;
 
-	if (ivpu_test_mode == IVPU_TEST_MODE_FW_TEST)
+	if (ivpu_test_mode & IVPU_TEST_MODE_FW_TEST)
 		return 0;
 
-	ivpu_ipc_consumer_add(vdev, &cons, IVPU_IPC_CHAN_BOOT_MSG);
+	ivpu_ipc_consumer_add(vdev, &cons, IVPU_IPC_CHAN_BOOT_MSG, NULL);
 
 	timeout = jiffies + msecs_to_jiffies(vdev->timeout.boot);
 	while (1) {
-		ret = ivpu_ipc_irq_handler(vdev);
-		if (ret)
-			break;
+		ivpu_ipc_irq_handler(vdev, NULL);
 		ret = ivpu_ipc_receive(vdev, &cons, &ipc_hdr, NULL, 0);
 		if (ret != -ETIMEDOUT || time_after_eq(jiffies, timeout))
 			break;
@@ -344,8 +319,6 @@ static int ivpu_wait_for_ready(struct ivpu_device *vdev)
 
 	if (!ret)
 		ivpu_dbg(vdev, PM, "VPU ready message received successfully\n");
-	else
-		ivpu_hw_diagnose_failure(vdev);
 
 	return ret;
 }
@@ -362,7 +335,7 @@ int ivpu_boot(struct ivpu_device *vdev)
 	int ret;
 
 	/* Update boot params located at first 4KB of FW memory */
-	ivpu_fw_boot_params_setup(vdev, vdev->fw->mem->kvaddr);
+	ivpu_fw_boot_params_setup(vdev, ivpu_bo_vaddr(vdev->fw->mem));
 
 	ret = ivpu_hw_boot_fw(vdev);
 	if (ret) {
@@ -373,6 +346,9 @@ int ivpu_boot(struct ivpu_device *vdev)
 	ret = ivpu_wait_for_ready(vdev);
 	if (ret) {
 		ivpu_err(vdev, "Failed to boot the firmware: %d\n", ret);
+		ivpu_hw_diagnose_failure(vdev);
+		ivpu_mmu_evtq_dump(vdev);
+		ivpu_fw_log_dump(vdev);
 		return ret;
 	}
 
@@ -414,7 +390,9 @@ static const struct drm_driver driver = {
 
 	.open = ivpu_open,
 	.postclose = ivpu_postclose,
-	.gem_prime_import = ivpu_gem_prime_import,
+
+	.gem_create_object = ivpu_gem_create_object,
+	.gem_prime_import_sg_table = drm_gem_shmem_prime_import_sg_table,
 
 	.ioctls = ivpu_drm_ioctls,
 	.num_ioctls = ARRAY_SIZE(ivpu_drm_ioctls),
@@ -427,6 +405,13 @@ static const struct drm_driver driver = {
 	.minor = DRM_IVPU_DRIVER_MINOR,
 };
 
+static irqreturn_t ivpu_irq_thread_handler(int irq, void *arg)
+{
+	struct ivpu_device *vdev = arg;
+
+	return ivpu_ipc_irq_thread_handler(vdev);
+}
+
 static int ivpu_irq_init(struct ivpu_device *vdev)
 {
 	struct pci_dev *pdev = to_pci_dev(vdev->drm.dev);
@@ -440,8 +425,8 @@ static int ivpu_irq_init(struct ivpu_device *vdev)
 
 	vdev->irq = pci_irq_vector(pdev, 0);
 
-	ret = devm_request_irq(vdev->drm.dev, vdev->irq, vdev->hw->ops->irq_handler,
-			       IRQF_NO_AUTOEN, DRIVER_NAME, vdev);
+	ret = devm_request_threaded_irq(vdev->drm.dev, vdev->irq, vdev->hw->ops->irq_handler,
+					ivpu_irq_thread_handler, IRQF_NO_AUTOEN, DRIVER_NAME, vdev);
 	if (ret)
 		ivpu_err(vdev, "Failed to request an IRQ %d\n", ret);
 
@@ -529,9 +514,18 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
 	vdev->context_xa_limit.min = IVPU_USER_CONTEXT_MIN_SSID;
 	vdev->context_xa_limit.max = IVPU_USER_CONTEXT_MAX_SSID;
 	atomic64_set(&vdev->unique_id_counter, 0);
-	xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC);
+	xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
 	xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1);
 	lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key);
+	INIT_LIST_HEAD(&vdev->bo_list);
+
+	ret = drmm_mutex_init(&vdev->drm, &vdev->context_list_lock);
+	if (ret)
+		goto err_xa_destroy;
+
+	ret = drmm_mutex_init(&vdev->drm, &vdev->bo_list_lock);
+	if (ret)
+		goto err_xa_destroy;
 
 	ret = ivpu_pci_init(vdev);
 	if (ret)
@@ -549,7 +543,7 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
 	/* Power up early so the rest of init code can access VPU registers */
 	ret = ivpu_hw_power_up(vdev);
 	if (ret)
-		goto err_xa_destroy;
+		goto err_power_down;
 
 	ret = ivpu_mmu_global_context_init(vdev);
 	if (ret)
@@ -573,20 +567,15 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
 
 	ivpu_pm_init(vdev);
 
-	ret = ivpu_job_done_thread_init(vdev);
-	if (ret)
-		goto err_ipc_fini;
-
 	ret = ivpu_boot(vdev);
 	if (ret)
-		goto err_job_done_thread_fini;
+		goto err_ipc_fini;
 
+	ivpu_job_done_consumer_init(vdev);
 	ivpu_pm_enable(vdev);
 
 	return 0;
 
-err_job_done_thread_fini:
-	ivpu_job_done_thread_fini(vdev);
 err_ipc_fini:
 	ivpu_ipc_fini(vdev);
 err_fw_fini:
@@ -605,14 +594,30 @@ err_xa_destroy:
 	return ret;
 }
 
+static void ivpu_bo_unbind_all_user_contexts(struct ivpu_device *vdev)
+{
+	struct ivpu_file_priv *file_priv;
+	unsigned long ctx_id;
+
+	mutex_lock(&vdev->context_list_lock);
+
+	xa_for_each(&vdev->context_xa, ctx_id, file_priv)
+		file_priv_unbind(vdev, file_priv);
+
+	mutex_unlock(&vdev->context_list_lock);
+}
+
 static void ivpu_dev_fini(struct ivpu_device *vdev)
 {
 	ivpu_pm_disable(vdev);
 	ivpu_shutdown(vdev);
 	if (IVPU_WA(d3hot_after_power_off))
 		pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
-	ivpu_job_done_thread_fini(vdev);
+
+	ivpu_jobs_abort_all(vdev);
+	ivpu_job_done_consumer_fini(vdev);
 	ivpu_pm_cancel_recovery(vdev);
+	ivpu_bo_unbind_all_user_contexts(vdev);
 
 	ivpu_ipc_fini(vdev);
 	ivpu_fw_fini(vdev);
diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h
index 417ddeca85..069ace4adb 100644
--- a/drivers/accel/ivpu/ivpu_drv.h
+++ b/drivers/accel/ivpu/ivpu_drv.h
@@ -17,9 +17,10 @@
 #include <uapi/drm/ivpu_accel.h>
 
 #include "ivpu_mmu_context.h"
+#include "ivpu_ipc.h"
 
 #define DRIVER_NAME "intel_vpu"
-#define DRIVER_DESC "Driver for Intel Versatile Processing Unit (VPU)"
+#define DRIVER_DESC "Driver for Intel NPU (Neural Processing Unit)"
 #define DRIVER_DATE "20230117"
 
 #define PCI_DEVICE_ID_MTL   0x7d1d
@@ -55,6 +56,7 @@
 #define IVPU_DBG_JSM	 BIT(10)
 #define IVPU_DBG_KREF	 BIT(11)
 #define IVPU_DBG_RPM	 BIT(12)
+#define IVPU_DBG_MMU_MAP BIT(13)
 
 #define ivpu_err(vdev, fmt, ...) \
 	drm_err(&(vdev)->drm, "%s(): " fmt, __func__, ##__VA_ARGS__)
@@ -88,6 +90,7 @@ struct ivpu_wa_table {
 	bool d3hot_after_power_off;
 	bool interrupt_clear_with_0;
 	bool disable_clock_relinquish;
+	bool disable_d0i3_msg;
 };
 
 struct ivpu_hw_info;
@@ -112,11 +115,15 @@ struct ivpu_device {
 
 	struct ivpu_mmu_context gctx;
 	struct ivpu_mmu_context rctx;
+	struct mutex context_list_lock; /* Protects user context addition/removal */
 	struct xarray context_xa;
 	struct xa_limit context_xa_limit;
 
+	struct mutex bo_list_lock; /* Protects bo_list */
+	struct list_head bo_list;
+
 	struct xarray submitted_jobs_xa;
-	struct task_struct *job_done_thread;
+	struct ivpu_ipc_consumer job_done_consumer;
 
 	atomic64_t unique_id_counter;
 
@@ -126,6 +133,7 @@ struct ivpu_device {
 		int tdr;
 		int reschedule_suspend;
 		int autosuspend;
+		int d0i3_entry_msg;
 	} timeout;
 };
 
@@ -139,8 +147,8 @@ struct ivpu_file_priv {
 	struct mutex lock; /* Protects cmdq */
 	struct ivpu_cmdq *cmdq[IVPU_NUM_ENGINES];
 	struct ivpu_mmu_context ctx;
-	u32 priority;
 	bool has_mmu_faults;
+	bool bound;
 };
 
 extern int ivpu_dbg_mask;
@@ -148,13 +156,14 @@ extern u8 ivpu_pll_min_ratio;
 extern u8 ivpu_pll_max_ratio;
 extern bool ivpu_disable_mmu_cont_pages;
 
-#define IVPU_TEST_MODE_DISABLED  0
-#define IVPU_TEST_MODE_FW_TEST   1
-#define IVPU_TEST_MODE_NULL_HW   2
+#define IVPU_TEST_MODE_FW_TEST            BIT(0)
+#define IVPU_TEST_MODE_NULL_HW            BIT(1)
+#define IVPU_TEST_MODE_NULL_SUBMISSION    BIT(2)
+#define IVPU_TEST_MODE_D0I3_MSG_DISABLE   BIT(4)
+#define IVPU_TEST_MODE_D0I3_MSG_ENABLE    BIT(5)
 extern int ivpu_test_mode;
 
 struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv);
-struct ivpu_file_priv *ivpu_file_priv_get_by_ctx_id(struct ivpu_device *vdev, unsigned long id);
 void ivpu_file_priv_put(struct ivpu_file_priv **link);
 
 int ivpu_boot(struct ivpu_device *vdev);
diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c
index 691da521dd..5fa8bd4603 100644
--- a/drivers/accel/ivpu/ivpu_fw.c
+++ b/drivers/accel/ivpu/ivpu_fw.c
@@ -33,12 +33,17 @@
 
 #define ADDR_TO_L2_CACHE_CFG(addr) ((addr) >> 31)
 
-#define IVPU_FW_CHECK_API(vdev, fw_hdr, name, min_major) \
+/* Check if FW API is compatible with the driver */
+#define IVPU_FW_CHECK_API_COMPAT(vdev, fw_hdr, name, min_major) \
 	ivpu_fw_check_api(vdev, fw_hdr, #name, \
 			  VPU_##name##_API_VER_INDEX, \
 			  VPU_##name##_API_VER_MAJOR, \
 			  VPU_##name##_API_VER_MINOR, min_major)
 
+/* Check if API version is lower that the given version */
+#define IVPU_FW_CHECK_API_VER_LT(vdev, fw_hdr, name, major, minor) \
+	ivpu_fw_check_api_ver_lt(vdev, fw_hdr, #name, VPU_##name##_API_VER_INDEX, major, minor)
+
 static char *ivpu_firmware;
 module_param_named_unsafe(firmware, ivpu_firmware, charp, 0644);
 MODULE_PARM_DESC(firmware, "VPU firmware binary in /lib/firmware/..");
@@ -105,6 +110,19 @@ ivpu_fw_check_api(struct ivpu_device *vdev, const struct vpu_firmware_header *fw
 	return 0;
 }
 
+static bool
+ivpu_fw_check_api_ver_lt(struct ivpu_device *vdev, const struct vpu_firmware_header *fw_hdr,
+			 const char *str, int index, u16 major, u16 minor)
+{
+	u16 fw_major = (u16)(fw_hdr->api_version[index] >> 16);
+	u16 fw_minor = (u16)(fw_hdr->api_version[index]);
+
+	if (fw_major < major || (fw_major == major && fw_minor < minor))
+		return true;
+
+	return false;
+}
+
 static int ivpu_fw_parse(struct ivpu_device *vdev)
 {
 	struct ivpu_fw_info *fw = vdev->fw;
@@ -164,9 +182,9 @@ static int ivpu_fw_parse(struct ivpu_device *vdev)
 	ivpu_info(vdev, "Firmware: %s, version: %s", fw->name,
 		  (const char *)fw_hdr + VPU_FW_HEADER_SIZE);
 
-	if (IVPU_FW_CHECK_API(vdev, fw_hdr, BOOT, 3))
+	if (IVPU_FW_CHECK_API_COMPAT(vdev, fw_hdr, BOOT, 3))
 		return -EINVAL;
-	if (IVPU_FW_CHECK_API(vdev, fw_hdr, JSM, 3))
+	if (IVPU_FW_CHECK_API_COMPAT(vdev, fw_hdr, JSM, 3))
 		return -EINVAL;
 
 	fw->runtime_addr = runtime_addr;
@@ -182,6 +200,8 @@ static int ivpu_fw_parse(struct ivpu_device *vdev)
 	fw->trace_destination_mask = VPU_TRACE_DESTINATION_VERBOSE_TRACING;
 	fw->trace_hw_component_mask = -1;
 
+	fw->dvfs_mode = 0;
+
 	ivpu_dbg(vdev, FW_BOOT, "Size: file %lu image %u runtime %u shavenn %u\n",
 		 fw->file->size, fw->image_size, fw->runtime_size, fw->shave_nn_size);
 	ivpu_dbg(vdev, FW_BOOT, "Address: runtime 0x%llx, load 0x%llx, entry point 0x%llx\n",
@@ -195,6 +215,23 @@ static void ivpu_fw_release(struct ivpu_device *vdev)
 	release_firmware(vdev->fw->file);
 }
 
+/* Initialize workarounds that depend on FW version */
+static void
+ivpu_fw_init_wa(struct ivpu_device *vdev)
+{
+	const struct vpu_firmware_header *fw_hdr = (const void *)vdev->fw->file->data;
+
+	if (IVPU_FW_CHECK_API_VER_LT(vdev, fw_hdr, BOOT, 3, 17) ||
+	    (ivpu_test_mode & IVPU_TEST_MODE_D0I3_MSG_DISABLE))
+		vdev->wa.disable_d0i3_msg = true;
+
+	/* Force enable the feature for testing purposes */
+	if (ivpu_test_mode & IVPU_TEST_MODE_D0I3_MSG_ENABLE)
+		vdev->wa.disable_d0i3_msg = false;
+
+	IVPU_PRINT_WA(disable_d0i3_msg);
+}
+
 static int ivpu_fw_update_global_range(struct ivpu_device *vdev)
 {
 	struct ivpu_fw_info *fw = vdev->fw;
@@ -248,7 +285,7 @@ static int ivpu_fw_mem_init(struct ivpu_device *vdev)
 
 	if (fw->shave_nn_size) {
 		fw->mem_shave_nn = ivpu_bo_alloc_internal(vdev, vdev->hw->ranges.shave.start,
-							  fw->shave_nn_size, DRM_IVPU_BO_UNCACHED);
+							  fw->shave_nn_size, DRM_IVPU_BO_WC);
 		if (!fw->mem_shave_nn) {
 			ivpu_err(vdev, "Failed to allocate shavenn buffer\n");
 			ret = -ENOMEM;
@@ -297,6 +334,8 @@ int ivpu_fw_init(struct ivpu_device *vdev)
 	if (ret)
 		goto err_fw_release;
 
+	ivpu_fw_init_wa(vdev);
+
 	ret = ivpu_fw_mem_init(vdev);
 	if (ret)
 		goto err_fw_release;
@@ -422,14 +461,31 @@ static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_
 		 boot_params->punit_telemetry_sram_size);
 	ivpu_dbg(vdev, FW_BOOT, "boot_params.vpu_telemetry_enable = 0x%x\n",
 		 boot_params->vpu_telemetry_enable);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.dvfs_mode = %u\n",
+		 boot_params->dvfs_mode);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_delayed_entry = %d\n",
+		 boot_params->d0i3_delayed_entry);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_residency_time_us = %lld\n",
+		 boot_params->d0i3_residency_time_us);
+	ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_entry_vpu_ts = %llu\n",
+		 boot_params->d0i3_entry_vpu_ts);
 }
 
 void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params *boot_params)
 {
 	struct ivpu_bo *ipc_mem_rx = vdev->ipc->mem_rx;
 
-	/* In case of warm boot we only have to reset the entrypoint addr */
+	/* In case of warm boot only update variable params */
 	if (!ivpu_fw_is_cold_boot(vdev)) {
+		boot_params->d0i3_residency_time_us =
+			ktime_us_delta(ktime_get_boottime(), vdev->hw->d0i3_entry_host_ts);
+		boot_params->d0i3_entry_vpu_ts = vdev->hw->d0i3_entry_vpu_ts;
+
+		ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_residency_time_us = %lld\n",
+			 boot_params->d0i3_residency_time_us);
+		ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_entry_vpu_ts = %llu\n",
+			 boot_params->d0i3_entry_vpu_ts);
+
 		boot_params->save_restore_ret_address = 0;
 		vdev->pm->is_warmboot = true;
 		wmb(); /* Flush WC buffers after writing save_restore_ret_address */
@@ -442,6 +498,13 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
 	boot_params->vpu_id = to_pci_dev(vdev->drm.dev)->bus->number;
 	boot_params->frequency = ivpu_hw_reg_pll_freq_get(vdev);
 
+	/*
+	 * This param is a debug firmware feature.  It switches default clock
+	 * to higher resolution one for fine-grained and more accurate firmware
+	 * task profiling.
+	 */
+	boot_params->perf_clk_frequency = ivpu_hw_profiling_freq_get(vdev);
+
 	/*
 	 * Uncached region of VPU address space, covers IPC buffers, job queues
 	 * and log buffers, programmable to L2$ Uncached by VPU MTRR
@@ -493,6 +556,11 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
 	boot_params->punit_telemetry_sram_base = ivpu_hw_reg_telemetry_offset_get(vdev);
 	boot_params->punit_telemetry_sram_size = ivpu_hw_reg_telemetry_size_get(vdev);
 	boot_params->vpu_telemetry_enable = ivpu_hw_reg_telemetry_enable_get(vdev);
+	boot_params->dvfs_mode = vdev->fw->dvfs_mode;
+	if (!IVPU_WA(disable_d0i3_msg))
+		boot_params->d0i3_delayed_entry = 1;
+	boot_params->d0i3_residency_time_us = 0;
+	boot_params->d0i3_entry_vpu_ts = 0;
 
 	wmb(); /* Flush WC buffers after writing bootparams */
 
diff --git a/drivers/accel/ivpu/ivpu_fw.h b/drivers/accel/ivpu/ivpu_fw.h
index 10ae2847f0..66b60fa161 100644
--- a/drivers/accel/ivpu/ivpu_fw.h
+++ b/drivers/accel/ivpu/ivpu_fw.h
@@ -27,6 +27,7 @@ struct ivpu_fw_info {
 	u32 trace_level;
 	u32 trace_destination_mask;
 	u64 trace_hw_component_mask;
+	u32 dvfs_mode;
 };
 
 int ivpu_fw_init(struct ivpu_device *vdev);
diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c
index c91852f2ed..e9ddbe9f50 100644
--- a/drivers/accel/ivpu/ivpu_gem.c
+++ b/drivers/accel/ivpu/ivpu_gem.c
@@ -20,215 +20,15 @@
 #include "ivpu_mmu.h"
 #include "ivpu_mmu_context.h"
 
-MODULE_IMPORT_NS(DMA_BUF);
-
 static const struct drm_gem_object_funcs ivpu_gem_funcs;
 
-static struct lock_class_key prime_bo_lock_class_key;
-
-static int __must_check prime_alloc_pages_locked(struct ivpu_bo *bo)
-{
-	/* Pages are managed by the underlying dma-buf */
-	return 0;
-}
-
-static void prime_free_pages_locked(struct ivpu_bo *bo)
-{
-	/* Pages are managed by the underlying dma-buf */
-}
-
-static int prime_map_pages_locked(struct ivpu_bo *bo)
-{
-	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
-	struct sg_table *sgt;
-
-	sgt = dma_buf_map_attachment_unlocked(bo->base.import_attach, DMA_BIDIRECTIONAL);
-	if (IS_ERR(sgt)) {
-		ivpu_err(vdev, "Failed to map attachment: %ld\n", PTR_ERR(sgt));
-		return PTR_ERR(sgt);
-	}
-
-	bo->sgt = sgt;
-	return 0;
-}
-
-static void prime_unmap_pages_locked(struct ivpu_bo *bo)
-{
-	dma_buf_unmap_attachment_unlocked(bo->base.import_attach, bo->sgt, DMA_BIDIRECTIONAL);
-	bo->sgt = NULL;
-}
-
-static const struct ivpu_bo_ops prime_ops = {
-	.type = IVPU_BO_TYPE_PRIME,
-	.name = "prime",
-	.alloc_pages = prime_alloc_pages_locked,
-	.free_pages = prime_free_pages_locked,
-	.map_pages = prime_map_pages_locked,
-	.unmap_pages = prime_unmap_pages_locked,
-};
-
-static int __must_check shmem_alloc_pages_locked(struct ivpu_bo *bo)
-{
-	int npages = ivpu_bo_size(bo) >> PAGE_SHIFT;
-	struct page **pages;
-
-	pages = drm_gem_get_pages(&bo->base);
-	if (IS_ERR(pages))
-		return PTR_ERR(pages);
-
-	if (bo->flags & DRM_IVPU_BO_WC)
-		set_pages_array_wc(pages, npages);
-	else if (bo->flags & DRM_IVPU_BO_UNCACHED)
-		set_pages_array_uc(pages, npages);
-
-	bo->pages = pages;
-	return 0;
-}
-
-static void shmem_free_pages_locked(struct ivpu_bo *bo)
-{
-	if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
-		set_pages_array_wb(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
-
-	drm_gem_put_pages(&bo->base, bo->pages, true, false);
-	bo->pages = NULL;
-}
-
-static int ivpu_bo_map_pages_locked(struct ivpu_bo *bo)
-{
-	int npages = ivpu_bo_size(bo) >> PAGE_SHIFT;
-	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
-	struct sg_table *sgt;
-	int ret;
-
-	sgt = drm_prime_pages_to_sg(&vdev->drm, bo->pages, npages);
-	if (IS_ERR(sgt)) {
-		ivpu_err(vdev, "Failed to allocate sgtable\n");
-		return PTR_ERR(sgt);
-	}
-
-	ret = dma_map_sgtable(vdev->drm.dev, sgt, DMA_BIDIRECTIONAL, 0);
-	if (ret) {
-		ivpu_err(vdev, "Failed to map BO in IOMMU: %d\n", ret);
-		goto err_free_sgt;
-	}
-
-	bo->sgt = sgt;
-	return 0;
-
-err_free_sgt:
-	kfree(sgt);
-	return ret;
-}
-
-static void ivpu_bo_unmap_pages_locked(struct ivpu_bo *bo)
-{
-	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
-
-	dma_unmap_sgtable(vdev->drm.dev, bo->sgt, DMA_BIDIRECTIONAL, 0);
-	sg_free_table(bo->sgt);
-	kfree(bo->sgt);
-	bo->sgt = NULL;
-}
-
-static const struct ivpu_bo_ops shmem_ops = {
-	.type = IVPU_BO_TYPE_SHMEM,
-	.name = "shmem",
-	.alloc_pages = shmem_alloc_pages_locked,
-	.free_pages = shmem_free_pages_locked,
-	.map_pages = ivpu_bo_map_pages_locked,
-	.unmap_pages = ivpu_bo_unmap_pages_locked,
-};
-
-static int __must_check internal_alloc_pages_locked(struct ivpu_bo *bo)
-{
-	unsigned int i, npages = ivpu_bo_size(bo) >> PAGE_SHIFT;
-	struct page **pages;
-	int ret;
-
-	pages = kvmalloc_array(npages, sizeof(*bo->pages), GFP_KERNEL);
-	if (!pages)
-		return -ENOMEM;
-
-	for (i = 0; i < npages; i++) {
-		pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
-		if (!pages[i]) {
-			ret = -ENOMEM;
-			goto err_free_pages;
-		}
-		cond_resched();
-	}
-
-	bo->pages = pages;
-	return 0;
-
-err_free_pages:
-	while (i--)
-		put_page(pages[i]);
-	kvfree(pages);
-	return ret;
-}
-
-static void internal_free_pages_locked(struct ivpu_bo *bo)
+static inline void ivpu_dbg_bo(struct ivpu_device *vdev, struct ivpu_bo *bo, const char *action)
 {
-	unsigned int i, npages = ivpu_bo_size(bo) >> PAGE_SHIFT;
-
-	if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
-		set_pages_array_wb(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
-
-	for (i = 0; i < npages; i++)
-		put_page(bo->pages[i]);
-
-	kvfree(bo->pages);
-	bo->pages = NULL;
-}
-
-static const struct ivpu_bo_ops internal_ops = {
-	.type = IVPU_BO_TYPE_INTERNAL,
-	.name = "internal",
-	.alloc_pages = internal_alloc_pages_locked,
-	.free_pages = internal_free_pages_locked,
-	.map_pages = ivpu_bo_map_pages_locked,
-	.unmap_pages = ivpu_bo_unmap_pages_locked,
-};
-
-static int __must_check ivpu_bo_alloc_and_map_pages_locked(struct ivpu_bo *bo)
-{
-	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
-	int ret;
-
-	lockdep_assert_held(&bo->lock);
-	drm_WARN_ON(&vdev->drm, bo->sgt);
-
-	ret = bo->ops->alloc_pages(bo);
-	if (ret) {
-		ivpu_err(vdev, "Failed to allocate pages for BO: %d", ret);
-		return ret;
-	}
-
-	ret = bo->ops->map_pages(bo);
-	if (ret) {
-		ivpu_err(vdev, "Failed to map pages for BO: %d", ret);
-		goto err_free_pages;
-	}
-	return ret;
-
-err_free_pages:
-	bo->ops->free_pages(bo);
-	return ret;
-}
-
-static void ivpu_bo_unmap_and_free_pages(struct ivpu_bo *bo)
-{
-	mutex_lock(&bo->lock);
-
-	WARN_ON(!bo->sgt);
-	bo->ops->unmap_pages(bo);
-	WARN_ON(bo->sgt);
-	bo->ops->free_pages(bo);
-	WARN_ON(bo->pages);
-
-	mutex_unlock(&bo->lock);
+	ivpu_dbg(vdev, BO,
+		 "%6s: bo %8p vpu_addr %9llx size %8zu ctx %d has_pages %d dma_mapped %d mmu_mapped %d wc %d imported %d\n",
+		 action, bo, bo->vpu_addr, ivpu_bo_size(bo), bo->ctx ? bo->ctx->id : 0,
+		 (bool)bo->base.pages, (bool)bo->base.sgt, bo->mmu_mapped, bo->base.map_wc,
+		 (bool)bo->base.base.import_attach);
 }
 
 /*
@@ -245,21 +45,19 @@ int __must_check ivpu_bo_pin(struct ivpu_bo *bo)
 
 	mutex_lock(&bo->lock);
 
-	if (!bo->vpu_addr) {
-		ivpu_err(vdev, "vpu_addr not set for BO ctx_id: %d handle: %d\n",
-			 bo->ctx->id, bo->handle);
-		ret = -EINVAL;
-		goto unlock;
-	}
+	ivpu_dbg_bo(vdev, bo, "pin");
+	drm_WARN_ON(&vdev->drm, !bo->ctx);
+
+	if (!bo->mmu_mapped) {
+		struct sg_table *sgt = drm_gem_shmem_get_pages_sgt(&bo->base);
 
-	if (!bo->sgt) {
-		ret = ivpu_bo_alloc_and_map_pages_locked(bo);
-		if (ret)
+		if (IS_ERR(sgt)) {
+			ret = PTR_ERR(sgt);
+			ivpu_err(vdev, "Failed to map BO in IOMMU: %d\n", ret);
 			goto unlock;
-	}
+		}
 
-	if (!bo->mmu_mapped) {
-		ret = ivpu_mmu_context_map_sgt(vdev, bo->ctx, bo->vpu_addr, bo->sgt,
+		ret = ivpu_mmu_context_map_sgt(vdev, bo->ctx, bo->vpu_addr, sgt,
 					       ivpu_bo_is_snooped(bo));
 		if (ret) {
 			ivpu_err(vdev, "Failed to map BO in MMU: %d\n", ret);
@@ -279,250 +77,187 @@ ivpu_bo_alloc_vpu_addr(struct ivpu_bo *bo, struct ivpu_mmu_context *ctx,
 		       const struct ivpu_addr_range *range)
 {
 	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
-	int ret;
+	int idx, ret;
 
-	if (!range) {
-		if (bo->flags & DRM_IVPU_BO_SHAVE_MEM)
-			range = &vdev->hw->ranges.shave;
-		else if (bo->flags & DRM_IVPU_BO_DMA_MEM)
-			range = &vdev->hw->ranges.dma;
-		else
-			range = &vdev->hw->ranges.user;
-	}
+	if (!drm_dev_enter(&vdev->drm, &idx))
+		return -ENODEV;
+
+	mutex_lock(&bo->lock);
 
-	mutex_lock(&ctx->lock);
-	ret = ivpu_mmu_context_insert_node_locked(ctx, range, ivpu_bo_size(bo), &bo->mm_node);
+	ret = ivpu_mmu_context_insert_node(ctx, range, ivpu_bo_size(bo), &bo->mm_node);
 	if (!ret) {
 		bo->ctx = ctx;
 		bo->vpu_addr = bo->mm_node.start;
-		list_add_tail(&bo->ctx_node, &ctx->bo_list);
+	} else {
+		ivpu_err(vdev, "Failed to add BO to context %u: %d\n", ctx->id, ret);
 	}
-	mutex_unlock(&ctx->lock);
+
+	ivpu_dbg_bo(vdev, bo, "alloc");
+
+	mutex_unlock(&bo->lock);
+
+	drm_dev_exit(idx);
 
 	return ret;
 }
 
-static void ivpu_bo_free_vpu_addr(struct ivpu_bo *bo)
+static void ivpu_bo_unbind_locked(struct ivpu_bo *bo)
 {
 	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
-	struct ivpu_mmu_context *ctx = bo->ctx;
-
-	ivpu_dbg(vdev, BO, "remove from ctx: ctx %d vpu_addr 0x%llx allocated %d mmu_mapped %d\n",
-		 ctx->id, bo->vpu_addr, (bool)bo->sgt, bo->mmu_mapped);
 
-	mutex_lock(&bo->lock);
+	lockdep_assert(lockdep_is_held(&bo->lock) || !kref_read(&bo->base.base.refcount));
 
 	if (bo->mmu_mapped) {
-		drm_WARN_ON(&vdev->drm, !bo->sgt);
-		ivpu_mmu_context_unmap_sgt(vdev, ctx, bo->vpu_addr, bo->sgt);
+		drm_WARN_ON(&vdev->drm, !bo->ctx);
+		drm_WARN_ON(&vdev->drm, !bo->vpu_addr);
+		drm_WARN_ON(&vdev->drm, !bo->base.sgt);
+		ivpu_mmu_context_unmap_sgt(vdev, bo->ctx, bo->vpu_addr, bo->base.sgt);
 		bo->mmu_mapped = false;
 	}
 
-	mutex_lock(&ctx->lock);
-	list_del(&bo->ctx_node);
-	bo->vpu_addr = 0;
-	bo->ctx = NULL;
-	ivpu_mmu_context_remove_node_locked(ctx, &bo->mm_node);
-	mutex_unlock(&ctx->lock);
+	if (bo->ctx) {
+		ivpu_mmu_context_remove_node(bo->ctx, &bo->mm_node);
+		bo->ctx = NULL;
+	}
 
-	mutex_unlock(&bo->lock);
+	if (bo->base.base.import_attach)
+		return;
+
+	dma_resv_lock(bo->base.base.resv, NULL);
+	if (bo->base.sgt) {
+		dma_unmap_sgtable(vdev->drm.dev, bo->base.sgt, DMA_BIDIRECTIONAL, 0);
+		sg_free_table(bo->base.sgt);
+		kfree(bo->base.sgt);
+		bo->base.sgt = NULL;
+	}
+	dma_resv_unlock(bo->base.base.resv);
 }
 
-void ivpu_bo_remove_all_bos_from_context(struct ivpu_mmu_context *ctx)
+void ivpu_bo_unbind_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx)
 {
-	struct ivpu_bo *bo, *tmp;
+	struct ivpu_bo *bo;
+
+	if (drm_WARN_ON(&vdev->drm, !ctx))
+		return;
 
-	list_for_each_entry_safe(bo, tmp, &ctx->bo_list, ctx_node)
-		ivpu_bo_free_vpu_addr(bo);
+	mutex_lock(&vdev->bo_list_lock);
+	list_for_each_entry(bo, &vdev->bo_list, bo_list_node) {
+		mutex_lock(&bo->lock);
+		if (bo->ctx == ctx) {
+			ivpu_dbg_bo(vdev, bo, "unbind");
+			ivpu_bo_unbind_locked(bo);
+		}
+		mutex_unlock(&bo->lock);
+	}
+	mutex_unlock(&vdev->bo_list_lock);
 }
 
-static struct ivpu_bo *
-ivpu_bo_alloc(struct ivpu_device *vdev, struct ivpu_mmu_context *mmu_context,
-	      u64 size, u32 flags, const struct ivpu_bo_ops *ops,
-	      const struct ivpu_addr_range *range, u64 user_ptr)
+struct drm_gem_object *ivpu_gem_create_object(struct drm_device *dev, size_t size)
 {
 	struct ivpu_bo *bo;
-	int ret = 0;
 
-	if (drm_WARN_ON(&vdev->drm, size == 0 || !PAGE_ALIGNED(size)))
+	if (size == 0 || !PAGE_ALIGNED(size))
 		return ERR_PTR(-EINVAL);
 
-	switch (flags & DRM_IVPU_BO_CACHE_MASK) {
-	case DRM_IVPU_BO_CACHED:
-	case DRM_IVPU_BO_UNCACHED:
-	case DRM_IVPU_BO_WC:
-		break;
-	default:
-		return ERR_PTR(-EINVAL);
-	}
-
 	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
 	if (!bo)
 		return ERR_PTR(-ENOMEM);
 
-	mutex_init(&bo->lock);
-	bo->base.funcs = &ivpu_gem_funcs;
-	bo->flags = flags;
-	bo->ops = ops;
-	bo->user_ptr = user_ptr;
-
-	if (ops->type == IVPU_BO_TYPE_SHMEM)
-		ret = drm_gem_object_init(&vdev->drm, &bo->base, size);
-	else
-		drm_gem_private_object_init(&vdev->drm, &bo->base, size);
+	bo->base.base.funcs = &ivpu_gem_funcs;
+	bo->base.pages_mark_dirty_on_put = true; /* VPU can dirty a BO anytime */
 
-	if (ret) {
-		ivpu_err(vdev, "Failed to initialize drm object\n");
-		goto err_free;
-	}
-
-	if (flags & DRM_IVPU_BO_MAPPABLE) {
-		ret = drm_gem_create_mmap_offset(&bo->base);
-		if (ret) {
-			ivpu_err(vdev, "Failed to allocate mmap offset\n");
-			goto err_release;
-		}
-	}
-
-	if (mmu_context) {
-		ret = ivpu_bo_alloc_vpu_addr(bo, mmu_context, range);
-		if (ret) {
-			ivpu_err(vdev, "Failed to add BO to context: %d\n", ret);
-			goto err_release;
-		}
-	}
-
-	return bo;
+	INIT_LIST_HEAD(&bo->bo_list_node);
+	mutex_init(&bo->lock);
 
-err_release:
-	drm_gem_object_release(&bo->base);
-err_free:
-	kfree(bo);
-	return ERR_PTR(ret);
+	return &bo->base.base;
 }
 
-static void ivpu_bo_free(struct drm_gem_object *obj)
+static struct ivpu_bo *
+ivpu_bo_create(struct ivpu_device *vdev, u64 size, u32 flags)
 {
-	struct ivpu_bo *bo = to_ivpu_bo(obj);
-	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
-
-	if (bo->ctx)
-		ivpu_dbg(vdev, BO, "free: ctx %d vpu_addr 0x%llx allocated %d mmu_mapped %d\n",
-			 bo->ctx->id, bo->vpu_addr, (bool)bo->sgt, bo->mmu_mapped);
-	else
-		ivpu_dbg(vdev, BO, "free: ctx (released) allocated %d mmu_mapped %d\n",
-			 (bool)bo->sgt, bo->mmu_mapped);
-
-	drm_WARN_ON(&vdev->drm, !dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ));
-
-	vunmap(bo->kvaddr);
+	struct drm_gem_shmem_object *shmem;
+	struct ivpu_bo *bo;
 
-	if (bo->ctx)
-		ivpu_bo_free_vpu_addr(bo);
+	switch (flags & DRM_IVPU_BO_CACHE_MASK) {
+	case DRM_IVPU_BO_CACHED:
+	case DRM_IVPU_BO_WC:
+		break;
+	default:
+		return ERR_PTR(-EINVAL);
+	}
 
-	if (bo->sgt)
-		ivpu_bo_unmap_and_free_pages(bo);
+	shmem = drm_gem_shmem_create(&vdev->drm, size);
+	if (IS_ERR(shmem))
+		return ERR_CAST(shmem);
 
-	if (bo->base.import_attach)
-		drm_prime_gem_destroy(&bo->base, bo->sgt);
+	bo = to_ivpu_bo(&shmem->base);
+	bo->base.map_wc = flags & DRM_IVPU_BO_WC;
+	bo->flags = flags;
 
-	drm_gem_object_release(&bo->base);
+	mutex_lock(&vdev->bo_list_lock);
+	list_add_tail(&bo->bo_list_node, &vdev->bo_list);
+	mutex_unlock(&vdev->bo_list_lock);
 
-	mutex_destroy(&bo->lock);
-	kfree(bo);
+	return bo;
 }
 
-static int ivpu_bo_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+static int ivpu_bo_open(struct drm_gem_object *obj, struct drm_file *file)
 {
+	struct ivpu_file_priv *file_priv = file->driver_priv;
+	struct ivpu_device *vdev = file_priv->vdev;
 	struct ivpu_bo *bo = to_ivpu_bo(obj);
-	struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
-
-	ivpu_dbg(vdev, BO, "mmap: ctx %u handle %u vpu_addr 0x%llx size %zu type %s",
-		 bo->ctx->id, bo->handle, bo->vpu_addr, ivpu_bo_size(bo), bo->ops->name);
+	struct ivpu_addr_range *range;
 
-	if (obj->import_attach) {
-		/* Drop the reference drm_gem_mmap_obj() acquired.*/
-		drm_gem_object_put(obj);
-		vma->vm_private_data = NULL;
-		return dma_buf_mmap(obj->dma_buf, vma, 0);
+	if (bo->ctx) {
+		ivpu_warn(vdev, "Can't add BO to ctx %u: already in ctx %u\n",
+			  file_priv->ctx.id, bo->ctx->id);
+		return -EALREADY;
 	}
 
-	vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND);
-	vma->vm_page_prot = ivpu_bo_pgprot(bo, vm_get_page_prot(vma->vm_flags));
-
-	return 0;
-}
-
-static struct sg_table *ivpu_bo_get_sg_table(struct drm_gem_object *obj)
-{
-	struct ivpu_bo *bo = to_ivpu_bo(obj);
-	loff_t npages = obj->size >> PAGE_SHIFT;
-	int ret = 0;
-
-	mutex_lock(&bo->lock);
-
-	if (!bo->sgt)
-		ret = ivpu_bo_alloc_and_map_pages_locked(bo);
-
-	mutex_unlock(&bo->lock);
-
-	if (ret)
-		return ERR_PTR(ret);
+	if (bo->flags & DRM_IVPU_BO_SHAVE_MEM)
+		range = &vdev->hw->ranges.shave;
+	else if (bo->flags & DRM_IVPU_BO_DMA_MEM)
+		range = &vdev->hw->ranges.dma;
+	else
+		range = &vdev->hw->ranges.user;
 
-	return drm_prime_pages_to_sg(obj->dev, bo->pages, npages);
+	return ivpu_bo_alloc_vpu_addr(bo, &file_priv->ctx, range);
 }
 
-static vm_fault_t ivpu_vm_fault(struct vm_fault *vmf)
+static void ivpu_bo_free(struct drm_gem_object *obj)
 {
-	struct vm_area_struct *vma = vmf->vma;
-	struct drm_gem_object *obj = vma->vm_private_data;
+	struct ivpu_device *vdev = to_ivpu_device(obj->dev);
 	struct ivpu_bo *bo = to_ivpu_bo(obj);
-	loff_t npages = obj->size >> PAGE_SHIFT;
-	pgoff_t page_offset;
-	struct page *page;
-	vm_fault_t ret;
-	int err;
 
-	mutex_lock(&bo->lock);
+	ivpu_dbg_bo(vdev, bo, "free");
 
-	if (!bo->sgt) {
-		err = ivpu_bo_alloc_and_map_pages_locked(bo);
-		if (err) {
-			ret = vmf_error(err);
-			goto unlock;
-		}
-	}
+	mutex_lock(&vdev->bo_list_lock);
+	list_del(&bo->bo_list_node);
+	mutex_unlock(&vdev->bo_list_lock);
 
-	/* We don't use vmf->pgoff since that has the fake offset */
-	page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
-	if (page_offset >= npages) {
-		ret = VM_FAULT_SIGBUS;
-	} else {
-		page = bo->pages[page_offset];
-		ret = vmf_insert_pfn(vma, vmf->address, page_to_pfn(page));
-	}
+	drm_WARN_ON(&vdev->drm, !dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ));
 
-unlock:
-	mutex_unlock(&bo->lock);
+	ivpu_bo_unbind_locked(bo);
+	mutex_destroy(&bo->lock);
 
-	return ret;
+	drm_WARN_ON(obj->dev, bo->base.pages_use_count > 1);
+	drm_gem_shmem_free(&bo->base);
 }
 
-static const struct vm_operations_struct ivpu_vm_ops = {
-	.fault = ivpu_vm_fault,
-	.open = drm_gem_vm_open,
-	.close = drm_gem_vm_close,
-};
-
 static const struct drm_gem_object_funcs ivpu_gem_funcs = {
 	.free = ivpu_bo_free,
-	.mmap = ivpu_bo_mmap,
-	.vm_ops = &ivpu_vm_ops,
-	.get_sg_table = ivpu_bo_get_sg_table,
+	.open = ivpu_bo_open,
+	.print_info = drm_gem_shmem_object_print_info,
+	.pin = drm_gem_shmem_object_pin,
+	.unpin = drm_gem_shmem_object_unpin,
+	.get_sg_table = drm_gem_shmem_object_get_sg_table,
+	.vmap = drm_gem_shmem_object_vmap,
+	.vunmap = drm_gem_shmem_object_vunmap,
+	.mmap = drm_gem_shmem_object_mmap,
+	.vm_ops = &drm_gem_shmem_vm_ops,
 };
 
-int
-ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 {
 	struct ivpu_file_priv *file_priv = file->driver_priv;
 	struct ivpu_device *vdev = file_priv->vdev;
@@ -537,23 +272,18 @@ ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	if (size == 0)
 		return -EINVAL;
 
-	bo = ivpu_bo_alloc(vdev, &file_priv->ctx, size, args->flags, &shmem_ops, NULL, 0);
+	bo = ivpu_bo_create(vdev, size, args->flags);
 	if (IS_ERR(bo)) {
 		ivpu_err(vdev, "Failed to create BO: %pe (ctx %u size %llu flags 0x%x)",
 			 bo, file_priv->ctx.id, args->size, args->flags);
 		return PTR_ERR(bo);
 	}
 
-	ret = drm_gem_handle_create(file, &bo->base, &bo->handle);
-	if (!ret) {
+	ret = drm_gem_handle_create(file, &bo->base.base, &args->handle);
+	if (!ret)
 		args->vpu_addr = bo->vpu_addr;
-		args->handle = bo->handle;
-	}
-
-	drm_gem_object_put(&bo->base);
 
-	ivpu_dbg(vdev, BO, "alloc shmem: ctx %u vpu_addr 0x%llx size %zu flags 0x%x\n",
-		 file_priv->ctx.id, bo->vpu_addr, ivpu_bo_size(bo), bo->flags);
+	drm_gem_object_put(&bo->base.base);
 
 	return ret;
 }
@@ -563,8 +293,8 @@ ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 fla
 {
 	const struct ivpu_addr_range *range;
 	struct ivpu_addr_range fixed_range;
+	struct iosys_map map;
 	struct ivpu_bo *bo;
-	pgprot_t prot;
 	int ret;
 
 	drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(vpu_addr));
@@ -578,81 +308,47 @@ ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 fla
 		range = &vdev->hw->ranges.global;
 	}
 
-	bo = ivpu_bo_alloc(vdev, &vdev->gctx, size, flags, &internal_ops, range, 0);
+	bo = ivpu_bo_create(vdev, size, flags);
 	if (IS_ERR(bo)) {
 		ivpu_err(vdev, "Failed to create BO: %pe (vpu_addr 0x%llx size %llu flags 0x%x)",
 			 bo, vpu_addr, size, flags);
 		return NULL;
 	}
 
-	ret = ivpu_bo_pin(bo);
+	ret = ivpu_bo_alloc_vpu_addr(bo, &vdev->gctx, range);
 	if (ret)
 		goto err_put;
 
-	if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
-		drm_clflush_pages(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
-
-	if (bo->flags & DRM_IVPU_BO_WC)
-		set_pages_array_wc(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
-	else if (bo->flags & DRM_IVPU_BO_UNCACHED)
-		set_pages_array_uc(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
-
-	prot = ivpu_bo_pgprot(bo, PAGE_KERNEL);
-	bo->kvaddr = vmap(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT, VM_MAP, prot);
-	if (!bo->kvaddr) {
-		ivpu_err(vdev, "Failed to map BO into kernel virtual memory\n");
+	ret = ivpu_bo_pin(bo);
+	if (ret)
 		goto err_put;
-	}
 
-	ivpu_dbg(vdev, BO, "alloc internal: ctx 0 vpu_addr 0x%llx size %zu flags 0x%x\n",
-		 bo->vpu_addr, ivpu_bo_size(bo), flags);
+	dma_resv_lock(bo->base.base.resv, NULL);
+	ret = drm_gem_shmem_vmap(&bo->base, &map);
+	dma_resv_unlock(bo->base.base.resv);
+	if (ret)
+		goto err_put;
 
 	return bo;
 
 err_put:
-	drm_gem_object_put(&bo->base);
+	drm_gem_object_put(&bo->base.base);
 	return NULL;
 }
 
 void ivpu_bo_free_internal(struct ivpu_bo *bo)
 {
-	drm_gem_object_put(&bo->base);
-}
+	struct iosys_map map = IOSYS_MAP_INIT_VADDR(bo->base.vaddr);
 
-struct drm_gem_object *ivpu_gem_prime_import(struct drm_device *dev, struct dma_buf *buf)
-{
-	struct ivpu_device *vdev = to_ivpu_device(dev);
-	struct dma_buf_attachment *attach;
-	struct ivpu_bo *bo;
+	dma_resv_lock(bo->base.base.resv, NULL);
+	drm_gem_shmem_vunmap(&bo->base, &map);
+	dma_resv_unlock(bo->base.base.resv);
 
-	attach = dma_buf_attach(buf, dev->dev);
-	if (IS_ERR(attach))
-		return ERR_CAST(attach);
-
-	get_dma_buf(buf);
-
-	bo = ivpu_bo_alloc(vdev, NULL, buf->size, DRM_IVPU_BO_MAPPABLE, &prime_ops, NULL, 0);
-	if (IS_ERR(bo)) {
-		ivpu_err(vdev, "Failed to import BO: %pe (size %lu)", bo, buf->size);
-		goto err_detach;
-	}
-
-	lockdep_set_class(&bo->lock, &prime_bo_lock_class_key);
-
-	bo->base.import_attach = attach;
-
-	return &bo->base;
-
-err_detach:
-	dma_buf_detach(buf, attach);
-	dma_buf_put(buf);
-	return ERR_CAST(bo);
+	drm_gem_object_put(&bo->base.base);
 }
 
 int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 {
-	struct ivpu_file_priv *file_priv = file->driver_priv;
-	struct ivpu_device *vdev = to_ivpu_device(dev);
 	struct drm_ivpu_bo_info *args = data;
 	struct drm_gem_object *obj;
 	struct ivpu_bo *bo;
@@ -665,21 +361,12 @@ int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file
 	bo = to_ivpu_bo(obj);
 
 	mutex_lock(&bo->lock);
-
-	if (!bo->ctx) {
-		ret = ivpu_bo_alloc_vpu_addr(bo, &file_priv->ctx, NULL);
-		if (ret) {
-			ivpu_err(vdev, "Failed to allocate vpu_addr: %d\n", ret);
-			goto unlock;
-		}
-	}
-
 	args->flags = bo->flags;
 	args->mmap_offset = drm_vma_node_offset_addr(&obj->vma_node);
 	args->vpu_addr = bo->vpu_addr;
 	args->size = obj->size;
-unlock:
 	mutex_unlock(&bo->lock);
+
 	drm_gem_object_put(obj);
 	return ret;
 }
@@ -712,43 +399,38 @@ int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file
 
 static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p)
 {
-	unsigned long dma_refcount = 0;
+	mutex_lock(&bo->lock);
+
+	drm_printf(p, "%-9p %-3u 0x%-12llx %-10lu 0x%-8x %-4u",
+		   bo, bo->ctx->id, bo->vpu_addr, bo->base.base.size,
+		   bo->flags, kref_read(&bo->base.base.refcount));
+
+	if (bo->base.pages)
+		drm_printf(p, " has_pages");
 
-	if (bo->base.dma_buf && bo->base.dma_buf->file)
-		dma_refcount = atomic_long_read(&bo->base.dma_buf->file->f_count);
+	if (bo->mmu_mapped)
+		drm_printf(p, " mmu_mapped");
 
-	drm_printf(p, "%5u %6d %16llx %10lu %10u %12lu %14s\n",
-		   bo->ctx->id, bo->handle, bo->vpu_addr, ivpu_bo_size(bo),
-		   kref_read(&bo->base.refcount), dma_refcount, bo->ops->name);
+	if (bo->base.base.import_attach)
+		drm_printf(p, " imported");
+
+	drm_printf(p, "\n");
+
+	mutex_unlock(&bo->lock);
 }
 
 void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p)
 {
 	struct ivpu_device *vdev = to_ivpu_device(dev);
-	struct ivpu_file_priv *file_priv;
-	unsigned long ctx_id;
 	struct ivpu_bo *bo;
 
-	drm_printf(p, "%5s %6s %16s %10s %10s %12s %14s\n",
-		   "ctx", "handle", "vpu_addr", "size", "refcount", "dma_refcount", "type");
+	drm_printf(p, "%-9s %-3s %-14s %-10s %-10s %-4s %s\n",
+		   "bo", "ctx", "vpu_addr", "size", "flags", "refs", "attribs");
 
-	mutex_lock(&vdev->gctx.lock);
-	list_for_each_entry(bo, &vdev->gctx.bo_list, ctx_node)
+	mutex_lock(&vdev->bo_list_lock);
+	list_for_each_entry(bo, &vdev->bo_list, bo_list_node)
 		ivpu_bo_print_info(bo, p);
-	mutex_unlock(&vdev->gctx.lock);
-
-	xa_for_each(&vdev->context_xa, ctx_id, file_priv) {
-		file_priv = ivpu_file_priv_get_by_ctx_id(vdev, ctx_id);
-		if (!file_priv)
-			continue;
-
-		mutex_lock(&file_priv->ctx.lock);
-		list_for_each_entry(bo, &file_priv->ctx.bo_list, ctx_node)
-			ivpu_bo_print_info(bo, p);
-		mutex_unlock(&file_priv->ctx.lock);
-
-		ivpu_file_priv_put(&file_priv);
-	}
+	mutex_unlock(&vdev->bo_list_lock);
 }
 
 void ivpu_bo_list_print(struct drm_device *dev)
diff --git a/drivers/accel/ivpu/ivpu_gem.h b/drivers/accel/ivpu/ivpu_gem.h
index a0b4d4a32b..a8559211c7 100644
--- a/drivers/accel/ivpu/ivpu_gem.h
+++ b/drivers/accel/ivpu/ivpu_gem.h
@@ -6,84 +6,51 @@
 #define __IVPU_GEM_H__
 
 #include <drm/drm_gem.h>
+#include <drm/drm_gem_shmem_helper.h>
 #include <drm/drm_mm.h>
 
-struct dma_buf;
-struct ivpu_bo_ops;
 struct ivpu_file_priv;
 
 struct ivpu_bo {
-	struct drm_gem_object base;
-	const struct ivpu_bo_ops *ops;
-
+	struct drm_gem_shmem_object base;
 	struct ivpu_mmu_context *ctx;
-	struct list_head ctx_node;
+	struct list_head bo_list_node;
 	struct drm_mm_node mm_node;
 
-	struct mutex lock; /* Protects: pages, sgt, mmu_mapped */
-	struct sg_table *sgt;
-	struct page **pages;
-	bool mmu_mapped;
-
-	void *kvaddr;
+	struct mutex lock; /* Protects: ctx, mmu_mapped, vpu_addr */
 	u64 vpu_addr;
-	u32 handle;
 	u32 flags;
-	uintptr_t user_ptr;
-	u32 job_status;
-};
-
-enum ivpu_bo_type {
-	IVPU_BO_TYPE_SHMEM = 1,
-	IVPU_BO_TYPE_INTERNAL,
-	IVPU_BO_TYPE_PRIME,
-};
-
-struct ivpu_bo_ops {
-	enum ivpu_bo_type type;
-	const char *name;
-	int (*alloc_pages)(struct ivpu_bo *bo);
-	void (*free_pages)(struct ivpu_bo *bo);
-	int (*map_pages)(struct ivpu_bo *bo);
-	void (*unmap_pages)(struct ivpu_bo *bo);
+	u32 job_status; /* Valid only for command buffer */
+	bool mmu_mapped;
 };
 
 int ivpu_bo_pin(struct ivpu_bo *bo);
-void ivpu_bo_remove_all_bos_from_context(struct ivpu_mmu_context *ctx);
-void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p);
-void ivpu_bo_list_print(struct drm_device *dev);
+void ivpu_bo_unbind_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx);
 
-struct ivpu_bo *
-ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 flags);
+struct drm_gem_object *ivpu_gem_create_object(struct drm_device *dev, size_t size);
+struct ivpu_bo *ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 flags);
 void ivpu_bo_free_internal(struct ivpu_bo *bo);
-struct drm_gem_object *ivpu_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf);
-void ivpu_bo_unmap_sgt_and_remove_from_context(struct ivpu_bo *bo);
 
 int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
 int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
 int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
 
+void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p);
+void ivpu_bo_list_print(struct drm_device *dev);
+
 static inline struct ivpu_bo *to_ivpu_bo(struct drm_gem_object *obj)
 {
-	return container_of(obj, struct ivpu_bo, base);
+	return container_of(obj, struct ivpu_bo, base.base);
 }
 
 static inline void *ivpu_bo_vaddr(struct ivpu_bo *bo)
 {
-	return bo->kvaddr;
+	return bo->base.vaddr;
 }
 
 static inline size_t ivpu_bo_size(struct ivpu_bo *bo)
 {
-	return bo->base.size;
-}
-
-static inline struct page *ivpu_bo_get_page(struct ivpu_bo *bo, u64 offset)
-{
-	if (offset > ivpu_bo_size(bo) || !bo->pages)
-		return NULL;
-
-	return bo->pages[offset / PAGE_SIZE];
+	return bo->base.base.size;
 }
 
 static inline u32 ivpu_bo_cache_mode(struct ivpu_bo *bo)
@@ -96,20 +63,9 @@ static inline bool ivpu_bo_is_snooped(struct ivpu_bo *bo)
 	return ivpu_bo_cache_mode(bo) == DRM_IVPU_BO_CACHED;
 }
 
-static inline pgprot_t ivpu_bo_pgprot(struct ivpu_bo *bo, pgprot_t prot)
-{
-	if (bo->flags & DRM_IVPU_BO_WC)
-		return pgprot_writecombine(prot);
-
-	if (bo->flags & DRM_IVPU_BO_UNCACHED)
-		return pgprot_noncached(prot);
-
-	return prot;
-}
-
 static inline struct ivpu_device *ivpu_bo_to_vdev(struct ivpu_bo *bo)
 {
-	return to_ivpu_device(bo->base.dev);
+	return to_ivpu_device(bo->base.base.dev);
 }
 
 static inline void *ivpu_to_cpu_addr(struct ivpu_bo *bo, u32 vpu_addr)
diff --git a/drivers/accel/ivpu/ivpu_hw.h b/drivers/accel/ivpu/ivpu_hw.h
index 1079e06255..094c659d28 100644
--- a/drivers/accel/ivpu/ivpu_hw.h
+++ b/drivers/accel/ivpu/ivpu_hw.h
@@ -15,9 +15,13 @@ struct ivpu_hw_ops {
 	int (*power_down)(struct ivpu_device *vdev);
 	int (*reset)(struct ivpu_device *vdev);
 	bool (*is_idle)(struct ivpu_device *vdev);
+	int (*wait_for_idle)(struct ivpu_device *vdev);
 	void (*wdt_disable)(struct ivpu_device *vdev);
 	void (*diagnose_failure)(struct ivpu_device *vdev);
+	u32 (*profiling_freq_get)(struct ivpu_device *vdev);
+	void (*profiling_freq_drive)(struct ivpu_device *vdev, bool enable);
 	u32 (*reg_pll_freq_get)(struct ivpu_device *vdev);
+	u32 (*ratio_to_freq)(struct ivpu_device *vdev, u32 ratio);
 	u32 (*reg_telemetry_offset_get)(struct ivpu_device *vdev);
 	u32 (*reg_telemetry_size_get)(struct ivpu_device *vdev);
 	u32 (*reg_telemetry_enable_get)(struct ivpu_device *vdev);
@@ -58,6 +62,8 @@ struct ivpu_hw_info {
 	u32 sku;
 	u16 config;
 	int dma_bits;
+	ktime_t d0i3_entry_host_ts;
+	u64 d0i3_entry_vpu_ts;
 };
 
 extern const struct ivpu_hw_ops ivpu_hw_37xx_ops;
@@ -85,6 +91,11 @@ static inline bool ivpu_hw_is_idle(struct ivpu_device *vdev)
 	return vdev->hw->ops->is_idle(vdev);
 };
 
+static inline int ivpu_hw_wait_for_idle(struct ivpu_device *vdev)
+{
+	return vdev->hw->ops->wait_for_idle(vdev);
+};
+
 static inline int ivpu_hw_power_down(struct ivpu_device *vdev)
 {
 	ivpu_dbg(vdev, PM, "HW power down\n");
@@ -104,12 +115,27 @@ static inline void ivpu_hw_wdt_disable(struct ivpu_device *vdev)
 	vdev->hw->ops->wdt_disable(vdev);
 };
 
+static inline u32 ivpu_hw_profiling_freq_get(struct ivpu_device *vdev)
+{
+	return vdev->hw->ops->profiling_freq_get(vdev);
+};
+
+static inline void ivpu_hw_profiling_freq_drive(struct ivpu_device *vdev, bool enable)
+{
+	return vdev->hw->ops->profiling_freq_drive(vdev, enable);
+};
+
 /* Register indirect accesses */
 static inline u32 ivpu_hw_reg_pll_freq_get(struct ivpu_device *vdev)
 {
 	return vdev->hw->ops->reg_pll_freq_get(vdev);
 };
 
+static inline u32 ivpu_hw_ratio_to_freq(struct ivpu_device *vdev, u32 ratio)
+{
+	return vdev->hw->ops->ratio_to_freq(vdev, ratio);
+}
+
 static inline u32 ivpu_hw_reg_telemetry_offset_get(struct ivpu_device *vdev)
 {
 	return vdev->hw->ops->reg_telemetry_offset_get(vdev);
diff --git a/drivers/accel/ivpu/ivpu_hw_37xx.c b/drivers/accel/ivpu/ivpu_hw_37xx.c
index e658fcf849..32bb772e03 100644
--- a/drivers/accel/ivpu/ivpu_hw_37xx.c
+++ b/drivers/accel/ivpu/ivpu_hw_37xx.c
@@ -29,6 +29,7 @@
 
 #define PLL_REF_CLK_FREQ	     (50 * 1000000)
 #define PLL_SIMULATION_FREQ	     (10 * 1000000)
+#define PLL_PROF_CLK_FREQ	     (38400 * 1000)
 #define PLL_DEFAULT_EPP_VALUE	     0x80
 
 #define TIM_SAFE_ENABLE		     0xf1d0dead
@@ -37,7 +38,7 @@
 #define TIMEOUT_US		     (150 * USEC_PER_MSEC)
 #define PWR_ISLAND_STATUS_TIMEOUT_US (5 * USEC_PER_MSEC)
 #define PLL_TIMEOUT_US		     (1500 * USEC_PER_MSEC)
-#define IDLE_TIMEOUT_US		     (500 * USEC_PER_MSEC)
+#define IDLE_TIMEOUT_US		     (5 * USEC_PER_MSEC)
 
 #define ICB_0_IRQ_MASK ((REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT)) | \
 			(REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT)) | \
@@ -96,6 +97,7 @@ static void ivpu_hw_timeouts_init(struct ivpu_device *vdev)
 	vdev->timeout.tdr = 2000;
 	vdev->timeout.reschedule_suspend = 10;
 	vdev->timeout.autosuspend = 10;
+	vdev->timeout.d0i3_entry_msg = 5;
 }
 
 static int ivpu_pll_wait_for_cmd_send(struct ivpu_device *vdev)
@@ -508,16 +510,6 @@ static int ivpu_boot_pwr_domain_enable(struct ivpu_device *vdev)
 	return ret;
 }
 
-static int ivpu_boot_pwr_domain_disable(struct ivpu_device *vdev)
-{
-	ivpu_boot_dpu_active_drive(vdev, false);
-	ivpu_boot_pwr_island_isolation_drive(vdev, true);
-	ivpu_boot_pwr_island_trickle_drive(vdev, false);
-	ivpu_boot_pwr_island_drive(vdev, false);
-
-	return ivpu_boot_wait_for_pwr_island_status(vdev, 0x0);
-}
-
 static void ivpu_boot_no_snoop_enable(struct ivpu_device *vdev)
 {
 	u32 val = REGV_RD32(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES);
@@ -614,12 +606,37 @@ static int ivpu_hw_37xx_info_init(struct ivpu_device *vdev)
 	return 0;
 }
 
+static int ivpu_hw_37xx_ip_reset(struct ivpu_device *vdev)
+{
+	int ret;
+	u32 val;
+
+	if (IVPU_WA(punit_disabled))
+		return 0;
+
+	ret = REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US);
+	if (ret) {
+		ivpu_err(vdev, "Timed out waiting for TRIGGER bit\n");
+		return ret;
+	}
+
+	val = REGB_RD32(VPU_37XX_BUTTRESS_VPU_IP_RESET);
+	val = REG_SET_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, val);
+	REGB_WR32(VPU_37XX_BUTTRESS_VPU_IP_RESET, val);
+
+	ret = REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_IP_RESET, TRIGGER, 0, TIMEOUT_US);
+	if (ret)
+		ivpu_err(vdev, "Timed out waiting for RESET completion\n");
+
+	return ret;
+}
+
 static int ivpu_hw_37xx_reset(struct ivpu_device *vdev)
 {
 	int ret = 0;
 
-	if (ivpu_boot_pwr_domain_disable(vdev)) {
-		ivpu_err(vdev, "Failed to disable power domain\n");
+	if (ivpu_hw_37xx_ip_reset(vdev)) {
+		ivpu_err(vdev, "Failed to reset NPU\n");
 		ret = -EIO;
 	}
 
@@ -659,6 +676,11 @@ static int ivpu_hw_37xx_power_up(struct ivpu_device *vdev)
 {
 	int ret;
 
+	/* PLL requests may fail when powering down, so issue WP 0 here */
+	ret = ivpu_pll_disable(vdev);
+	if (ret)
+		ivpu_warn(vdev, "Failed to disable PLL: %d\n", ret);
+
 	ret = ivpu_hw_37xx_d0i3_disable(vdev);
 	if (ret)
 		ivpu_warn(vdev, "Failed to disable D0I3: %d\n", ret);
@@ -722,10 +744,23 @@ static bool ivpu_hw_37xx_is_idle(struct ivpu_device *vdev)
 	       REG_TEST_FLD(VPU_37XX_BUTTRESS_VPU_STATUS, IDLE, val);
 }
 
+static int ivpu_hw_37xx_wait_for_idle(struct ivpu_device *vdev)
+{
+	return REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_STATUS, IDLE, 0x1, IDLE_TIMEOUT_US);
+}
+
+static void ivpu_hw_37xx_save_d0i3_entry_timestamp(struct ivpu_device *vdev)
+{
+	vdev->hw->d0i3_entry_host_ts = ktime_get_boottime();
+	vdev->hw->d0i3_entry_vpu_ts = REGV_RD64(VPU_37XX_CPU_SS_TIM_PERF_FREE_CNT);
+}
+
 static int ivpu_hw_37xx_power_down(struct ivpu_device *vdev)
 {
 	int ret = 0;
 
+	ivpu_hw_37xx_save_d0i3_entry_timestamp(vdev);
+
 	if (!ivpu_hw_37xx_is_idle(vdev))
 		ivpu_warn(vdev, "VPU not idle during power down\n");
 
@@ -760,12 +795,22 @@ static void ivpu_hw_37xx_wdt_disable(struct ivpu_device *vdev)
 	REGV_WR32(VPU_37XX_CPU_SS_TIM_GEN_CONFIG, val);
 }
 
-static u32 ivpu_hw_37xx_pll_to_freq(u32 ratio, u32 config)
+static u32 ivpu_hw_37xx_profiling_freq_get(struct ivpu_device *vdev)
+{
+	return PLL_PROF_CLK_FREQ;
+}
+
+static void ivpu_hw_37xx_profiling_freq_drive(struct ivpu_device *vdev, bool enable)
+{
+	/* Profiling freq - is a debug feature. Unavailable on VPU 37XX. */
+}
+
+static u32 ivpu_hw_37xx_ratio_to_freq(struct ivpu_device *vdev, u32 ratio)
 {
 	u32 pll_clock = PLL_REF_CLK_FREQ * ratio;
 	u32 cpu_clock;
 
-	if ((config & 0xff) == PLL_RATIO_4_3)
+	if ((vdev->hw->config & 0xff) == PLL_RATIO_4_3)
 		cpu_clock = pll_clock * 2 / 4;
 	else
 		cpu_clock = pll_clock * 2 / 5;
@@ -784,7 +829,7 @@ static u32 ivpu_hw_37xx_reg_pll_freq_get(struct ivpu_device *vdev)
 	if (!ivpu_is_silicon(vdev))
 		return PLL_SIMULATION_FREQ;
 
-	return ivpu_hw_37xx_pll_to_freq(pll_curr_ratio, vdev->hw->config);
+	return ivpu_hw_37xx_ratio_to_freq(vdev, pll_curr_ratio);
 }
 
 static u32 ivpu_hw_37xx_reg_telemetry_offset_get(struct ivpu_device *vdev)
@@ -850,38 +895,35 @@ static void ivpu_hw_37xx_irq_disable(struct ivpu_device *vdev)
 
 static void ivpu_hw_37xx_irq_wdt_nce_handler(struct ivpu_device *vdev)
 {
-	ivpu_err_ratelimited(vdev, "WDT NCE irq\n");
-
-	ivpu_pm_schedule_recovery(vdev);
+	ivpu_pm_trigger_recovery(vdev, "WDT NCE IRQ");
 }
 
 static void ivpu_hw_37xx_irq_wdt_mss_handler(struct ivpu_device *vdev)
 {
-	ivpu_err_ratelimited(vdev, "WDT MSS irq\n");
-
 	ivpu_hw_wdt_disable(vdev);
-	ivpu_pm_schedule_recovery(vdev);
+	ivpu_pm_trigger_recovery(vdev, "WDT MSS IRQ");
 }
 
 static void ivpu_hw_37xx_irq_noc_firewall_handler(struct ivpu_device *vdev)
 {
-	ivpu_err_ratelimited(vdev, "NOC Firewall irq\n");
-
-	ivpu_pm_schedule_recovery(vdev);
+	ivpu_pm_trigger_recovery(vdev, "NOC Firewall IRQ");
 }
 
 /* Handler for IRQs from VPU core (irqV) */
-static u32 ivpu_hw_37xx_irqv_handler(struct ivpu_device *vdev, int irq)
+static bool ivpu_hw_37xx_irqv_handler(struct ivpu_device *vdev, int irq, bool *wake_thread)
 {
 	u32 status = REGV_RD32(VPU_37XX_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK;
 
+	if (!status)
+		return false;
+
 	REGV_WR32(VPU_37XX_HOST_SS_ICB_CLEAR_0, status);
 
 	if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT, status))
 		ivpu_mmu_irq_evtq_handler(vdev);
 
 	if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT, status))
-		ivpu_ipc_irq_handler(vdev);
+		ivpu_ipc_irq_handler(vdev, wake_thread);
 
 	if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT, status))
 		ivpu_dbg(vdev, IRQ, "MMU sync complete\n");
@@ -898,17 +940,17 @@ static u32 ivpu_hw_37xx_irqv_handler(struct ivpu_device *vdev, int irq)
 	if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, status))
 		ivpu_hw_37xx_irq_noc_firewall_handler(vdev);
 
-	return status;
+	return true;
 }
 
 /* Handler for IRQs from Buttress core (irqB) */
-static u32 ivpu_hw_37xx_irqb_handler(struct ivpu_device *vdev, int irq)
+static bool ivpu_hw_37xx_irqb_handler(struct ivpu_device *vdev, int irq)
 {
 	u32 status = REGB_RD32(VPU_37XX_BUTTRESS_INTERRUPT_STAT) & BUTTRESS_IRQ_MASK;
 	bool schedule_recovery = false;
 
-	if (status == 0)
-		return 0;
+	if (!status)
+		return false;
 
 	if (REG_TEST_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE, status))
 		ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x",
@@ -942,25 +984,29 @@ static u32 ivpu_hw_37xx_irqb_handler(struct ivpu_device *vdev, int irq)
 		REGB_WR32(VPU_37XX_BUTTRESS_INTERRUPT_STAT, status);
 
 	if (schedule_recovery)
-		ivpu_pm_schedule_recovery(vdev);
+		ivpu_pm_trigger_recovery(vdev, "Buttress IRQ");
 
-	return status;
+	return true;
 }
 
 static irqreturn_t ivpu_hw_37xx_irq_handler(int irq, void *ptr)
 {
 	struct ivpu_device *vdev = ptr;
-	u32 ret_irqv, ret_irqb;
+	bool irqv_handled, irqb_handled, wake_thread = false;
 
 	REGB_WR32(VPU_37XX_BUTTRESS_GLOBAL_INT_MASK, 0x1);
 
-	ret_irqv = ivpu_hw_37xx_irqv_handler(vdev, irq);
-	ret_irqb = ivpu_hw_37xx_irqb_handler(vdev, irq);
+	irqv_handled = ivpu_hw_37xx_irqv_handler(vdev, irq, &wake_thread);
+	irqb_handled = ivpu_hw_37xx_irqb_handler(vdev, irq);
 
 	/* Re-enable global interrupts to re-trigger MSI for pending interrupts */
 	REGB_WR32(VPU_37XX_BUTTRESS_GLOBAL_INT_MASK, 0x0);
 
-	return IRQ_RETVAL(ret_irqb | ret_irqv);
+	if (wake_thread)
+		return IRQ_WAKE_THREAD;
+	if (irqv_handled || irqb_handled)
+		return IRQ_HANDLED;
+	return IRQ_NONE;
 }
 
 static void ivpu_hw_37xx_diagnose_failure(struct ivpu_device *vdev)
@@ -997,12 +1043,16 @@ const struct ivpu_hw_ops ivpu_hw_37xx_ops = {
 	.info_init = ivpu_hw_37xx_info_init,
 	.power_up = ivpu_hw_37xx_power_up,
 	.is_idle = ivpu_hw_37xx_is_idle,
+	.wait_for_idle = ivpu_hw_37xx_wait_for_idle,
 	.power_down = ivpu_hw_37xx_power_down,
 	.reset = ivpu_hw_37xx_reset,
 	.boot_fw = ivpu_hw_37xx_boot_fw,
 	.wdt_disable = ivpu_hw_37xx_wdt_disable,
 	.diagnose_failure = ivpu_hw_37xx_diagnose_failure,
+	.profiling_freq_get = ivpu_hw_37xx_profiling_freq_get,
+	.profiling_freq_drive = ivpu_hw_37xx_profiling_freq_drive,
 	.reg_pll_freq_get = ivpu_hw_37xx_reg_pll_freq_get,
+	.ratio_to_freq = ivpu_hw_37xx_ratio_to_freq,
 	.reg_telemetry_offset_get = ivpu_hw_37xx_reg_telemetry_offset_get,
 	.reg_telemetry_size_get = ivpu_hw_37xx_reg_telemetry_size_get,
 	.reg_telemetry_enable_get = ivpu_hw_37xx_reg_telemetry_enable_get,
diff --git a/drivers/accel/ivpu/ivpu_hw_37xx_reg.h b/drivers/accel/ivpu/ivpu_hw_37xx_reg.h
index 4083beb5e9..f6fec19192 100644
--- a/drivers/accel/ivpu/ivpu_hw_37xx_reg.h
+++ b/drivers/accel/ivpu/ivpu_hw_37xx_reg.h
@@ -240,6 +240,8 @@
 #define VPU_37XX_CPU_SS_TIM_GEN_CONFIG					0x06021008u
 #define VPU_37XX_CPU_SS_TIM_GEN_CONFIG_WDOG_TO_INT_CLR_MASK		BIT_MASK(9)
 
+#define VPU_37XX_CPU_SS_TIM_PERF_FREE_CNT				0x06029000u
+
 #define VPU_37XX_CPU_SS_DOORBELL_0					0x06300000u
 #define VPU_37XX_CPU_SS_DOORBELL_0_SET_MASK				BIT_MASK(0)
 
diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c
index 40f9ee99ec..0be353ad87 100644
--- a/drivers/accel/ivpu/ivpu_hw_40xx.c
+++ b/drivers/accel/ivpu/ivpu_hw_40xx.c
@@ -39,6 +39,7 @@
 #define TIMEOUT_US		     (150 * USEC_PER_MSEC)
 #define PWR_ISLAND_STATUS_TIMEOUT_US (5 * USEC_PER_MSEC)
 #define PLL_TIMEOUT_US		     (1500 * USEC_PER_MSEC)
+#define IDLE_TIMEOUT_US		     (5 * USEC_PER_MSEC)
 
 #define WEIGHTS_DEFAULT              0xf711f711u
 #define WEIGHTS_ATS_DEFAULT          0x0000f711u
@@ -139,18 +140,21 @@ static void ivpu_hw_timeouts_init(struct ivpu_device *vdev)
 		vdev->timeout.tdr = 2000000;
 		vdev->timeout.reschedule_suspend = 1000;
 		vdev->timeout.autosuspend = -1;
+		vdev->timeout.d0i3_entry_msg = 500;
 	} else if (ivpu_is_simics(vdev)) {
 		vdev->timeout.boot = 50;
 		vdev->timeout.jsm = 500;
 		vdev->timeout.tdr = 10000;
 		vdev->timeout.reschedule_suspend = 10;
 		vdev->timeout.autosuspend = -1;
+		vdev->timeout.d0i3_entry_msg = 100;
 	} else {
 		vdev->timeout.boot = 1000;
 		vdev->timeout.jsm = 500;
 		vdev->timeout.tdr = 2000;
 		vdev->timeout.reschedule_suspend = 10;
 		vdev->timeout.autosuspend = 10;
+		vdev->timeout.d0i3_entry_msg = 5;
 	}
 }
 
@@ -737,7 +741,7 @@ static int ivpu_hw_40xx_info_init(struct ivpu_device *vdev)
 	return 0;
 }
 
-static int ivpu_hw_40xx_reset(struct ivpu_device *vdev)
+static int ivpu_hw_40xx_ip_reset(struct ivpu_device *vdev)
 {
 	int ret;
 	u32 val;
@@ -759,6 +763,23 @@ static int ivpu_hw_40xx_reset(struct ivpu_device *vdev)
 	return ret;
 }
 
+static int ivpu_hw_40xx_reset(struct ivpu_device *vdev)
+{
+	int ret = 0;
+
+	if (ivpu_hw_40xx_ip_reset(vdev)) {
+		ivpu_err(vdev, "Failed to reset VPU IP\n");
+		ret = -EIO;
+	}
+
+	if (ivpu_pll_disable(vdev)) {
+		ivpu_err(vdev, "Failed to disable PLL\n");
+		ret = -EIO;
+	}
+
+	return ret;
+}
+
 static int ivpu_hw_40xx_d0i3_enable(struct ivpu_device *vdev)
 {
 	int ret;
@@ -819,12 +840,6 @@ static int ivpu_hw_40xx_power_up(struct ivpu_device *vdev)
 {
 	int ret;
 
-	ret = ivpu_hw_40xx_reset(vdev);
-	if (ret) {
-		ivpu_err(vdev, "Failed to reset HW: %d\n", ret);
-		return ret;
-	}
-
 	ret = ivpu_hw_40xx_d0i3_disable(vdev);
 	if (ret)
 		ivpu_warn(vdev, "Failed to disable D0I3: %d\n", ret);
@@ -893,11 +908,24 @@ static bool ivpu_hw_40xx_is_idle(struct ivpu_device *vdev)
 	       REG_TEST_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, IDLE, val);
 }
 
+static int ivpu_hw_40xx_wait_for_idle(struct ivpu_device *vdev)
+{
+	return REGB_POLL_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, IDLE, 0x1, IDLE_TIMEOUT_US);
+}
+
+static void ivpu_hw_40xx_save_d0i3_entry_timestamp(struct ivpu_device *vdev)
+{
+	vdev->hw->d0i3_entry_host_ts = ktime_get_boottime();
+	vdev->hw->d0i3_entry_vpu_ts = REGV_RD64(VPU_40XX_CPU_SS_TIM_PERF_EXT_FREE_CNT);
+}
+
 static int ivpu_hw_40xx_power_down(struct ivpu_device *vdev)
 {
 	int ret = 0;
 
-	if (!ivpu_hw_40xx_is_idle(vdev) && ivpu_hw_40xx_reset(vdev))
+	ivpu_hw_40xx_save_d0i3_entry_timestamp(vdev);
+
+	if (!ivpu_hw_40xx_is_idle(vdev) && ivpu_hw_40xx_ip_reset(vdev))
 		ivpu_warn(vdev, "Failed to reset the VPU\n");
 
 	if (ivpu_pll_disable(vdev)) {
@@ -928,6 +956,19 @@ static void ivpu_hw_40xx_wdt_disable(struct ivpu_device *vdev)
 	REGV_WR32(VPU_40XX_CPU_SS_TIM_GEN_CONFIG, val);
 }
 
+static u32 ivpu_hw_40xx_profiling_freq_get(struct ivpu_device *vdev)
+{
+	return vdev->hw->pll.profiling_freq;
+}
+
+static void ivpu_hw_40xx_profiling_freq_drive(struct ivpu_device *vdev, bool enable)
+{
+	if (enable)
+		vdev->hw->pll.profiling_freq = PLL_PROFILING_FREQ_HIGH;
+	else
+		vdev->hw->pll.profiling_freq = PLL_PROFILING_FREQ_DEFAULT;
+}
+
 /* Register indirect accesses */
 static u32 ivpu_hw_40xx_reg_pll_freq_get(struct ivpu_device *vdev)
 {
@@ -939,6 +980,11 @@ static u32 ivpu_hw_40xx_reg_pll_freq_get(struct ivpu_device *vdev)
 	return PLL_RATIO_TO_FREQ(pll_curr_ratio);
 }
 
+static u32 ivpu_hw_40xx_ratio_to_freq(struct ivpu_device *vdev, u32 ratio)
+{
+	return PLL_RATIO_TO_FREQ(ratio);
+}
+
 static u32 ivpu_hw_40xx_reg_telemetry_offset_get(struct ivpu_device *vdev)
 {
 	return REGB_RD32(VPU_40XX_BUTTRESS_VPU_TELEMETRY_OFFSET);
@@ -1003,28 +1049,27 @@ static void ivpu_hw_40xx_irq_disable(struct ivpu_device *vdev)
 static void ivpu_hw_40xx_irq_wdt_nce_handler(struct ivpu_device *vdev)
 {
 	/* TODO: For LNN hang consider engine reset instead of full recovery */
-	ivpu_pm_schedule_recovery(vdev);
+	ivpu_pm_trigger_recovery(vdev, "WDT NCE IRQ");
 }
 
 static void ivpu_hw_40xx_irq_wdt_mss_handler(struct ivpu_device *vdev)
 {
 	ivpu_hw_wdt_disable(vdev);
-	ivpu_pm_schedule_recovery(vdev);
+	ivpu_pm_trigger_recovery(vdev, "WDT MSS IRQ");
 }
 
 static void ivpu_hw_40xx_irq_noc_firewall_handler(struct ivpu_device *vdev)
 {
-	ivpu_pm_schedule_recovery(vdev);
+	ivpu_pm_trigger_recovery(vdev, "NOC Firewall IRQ");
 }
 
 /* Handler for IRQs from VPU core (irqV) */
-static irqreturn_t ivpu_hw_40xx_irqv_handler(struct ivpu_device *vdev, int irq)
+static bool ivpu_hw_40xx_irqv_handler(struct ivpu_device *vdev, int irq, bool *wake_thread)
 {
 	u32 status = REGV_RD32(VPU_40XX_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK;
-	irqreturn_t ret = IRQ_NONE;
 
 	if (!status)
-		return IRQ_NONE;
+		return false;
 
 	REGV_WR32(VPU_40XX_HOST_SS_ICB_CLEAR_0, status);
 
@@ -1032,7 +1077,7 @@ static irqreturn_t ivpu_hw_40xx_irqv_handler(struct ivpu_device *vdev, int irq)
 		ivpu_mmu_irq_evtq_handler(vdev);
 
 	if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT, status))
-		ret |= ivpu_ipc_irq_handler(vdev);
+		ivpu_ipc_irq_handler(vdev, wake_thread);
 
 	if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT, status))
 		ivpu_dbg(vdev, IRQ, "MMU sync complete\n");
@@ -1049,17 +1094,17 @@ static irqreturn_t ivpu_hw_40xx_irqv_handler(struct ivpu_device *vdev, int irq)
 	if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, status))
 		ivpu_hw_40xx_irq_noc_firewall_handler(vdev);
 
-	return ret;
+	return true;
 }
 
 /* Handler for IRQs from Buttress core (irqB) */
-static irqreturn_t ivpu_hw_40xx_irqb_handler(struct ivpu_device *vdev, int irq)
+static bool ivpu_hw_40xx_irqb_handler(struct ivpu_device *vdev, int irq)
 {
 	bool schedule_recovery = false;
 	u32 status = REGB_RD32(VPU_40XX_BUTTRESS_INTERRUPT_STAT) & BUTTRESS_IRQ_MASK;
 
-	if (status == 0)
-		return IRQ_NONE;
+	if (!status)
+		return false;
 
 	if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE, status))
 		ivpu_dbg(vdev, IRQ, "FREQ_CHANGE");
@@ -1109,28 +1154,29 @@ static irqreturn_t ivpu_hw_40xx_irqb_handler(struct ivpu_device *vdev, int irq)
 	REGB_WR32(VPU_40XX_BUTTRESS_INTERRUPT_STAT, status);
 
 	if (schedule_recovery)
-		ivpu_pm_schedule_recovery(vdev);
+		ivpu_pm_trigger_recovery(vdev, "Buttress IRQ");
 
-	return IRQ_HANDLED;
+	return true;
 }
 
 static irqreturn_t ivpu_hw_40xx_irq_handler(int irq, void *ptr)
 {
+	bool irqv_handled, irqb_handled, wake_thread = false;
 	struct ivpu_device *vdev = ptr;
-	irqreturn_t ret = IRQ_NONE;
 
 	REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x1);
 
-	ret |= ivpu_hw_40xx_irqv_handler(vdev, irq);
-	ret |= ivpu_hw_40xx_irqb_handler(vdev, irq);
+	irqv_handled = ivpu_hw_40xx_irqv_handler(vdev, irq, &wake_thread);
+	irqb_handled = ivpu_hw_40xx_irqb_handler(vdev, irq);
 
 	/* Re-enable global interrupts to re-trigger MSI for pending interrupts */
 	REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x0);
 
-	if (ret & IRQ_WAKE_THREAD)
+	if (wake_thread)
 		return IRQ_WAKE_THREAD;
-
-	return ret;
+	if (irqv_handled || irqb_handled)
+		return IRQ_HANDLED;
+	return IRQ_NONE;
 }
 
 static void ivpu_hw_40xx_diagnose_failure(struct ivpu_device *vdev)
@@ -1180,12 +1226,16 @@ const struct ivpu_hw_ops ivpu_hw_40xx_ops = {
 	.info_init = ivpu_hw_40xx_info_init,
 	.power_up = ivpu_hw_40xx_power_up,
 	.is_idle = ivpu_hw_40xx_is_idle,
+	.wait_for_idle = ivpu_hw_40xx_wait_for_idle,
 	.power_down = ivpu_hw_40xx_power_down,
 	.reset = ivpu_hw_40xx_reset,
 	.boot_fw = ivpu_hw_40xx_boot_fw,
 	.wdt_disable = ivpu_hw_40xx_wdt_disable,
 	.diagnose_failure = ivpu_hw_40xx_diagnose_failure,
+	.profiling_freq_get = ivpu_hw_40xx_profiling_freq_get,
+	.profiling_freq_drive = ivpu_hw_40xx_profiling_freq_drive,
 	.reg_pll_freq_get = ivpu_hw_40xx_reg_pll_freq_get,
+	.ratio_to_freq = ivpu_hw_40xx_ratio_to_freq,
 	.reg_telemetry_offset_get = ivpu_hw_40xx_reg_telemetry_offset_get,
 	.reg_telemetry_size_get = ivpu_hw_40xx_reg_telemetry_size_get,
 	.reg_telemetry_enable_get = ivpu_hw_40xx_reg_telemetry_enable_get,
diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c
index a4ca40b184..f4c5392483 100644
--- a/drivers/accel/ivpu/ivpu_ipc.c
+++ b/drivers/accel/ivpu/ivpu_ipc.c
@@ -1,11 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
  */
 
 #include <linux/genalloc.h>
 #include <linux/highmem.h>
-#include <linux/kthread.h>
+#include <linux/pm_runtime.h>
 #include <linux/wait.h>
 
 #include "ivpu_drv.h"
@@ -17,19 +17,12 @@
 #include "ivpu_pm.h"
 
 #define IPC_MAX_RX_MSG	128
-#define IS_KTHREAD()	(get_current()->flags & PF_KTHREAD)
 
 struct ivpu_ipc_tx_buf {
 	struct ivpu_ipc_hdr ipc;
 	struct vpu_jsm_msg jsm;
 };
 
-struct ivpu_ipc_rx_msg {
-	struct list_head link;
-	struct ivpu_ipc_hdr *ipc_hdr;
-	struct vpu_jsm_msg *jsm_msg;
-};
-
 static void ivpu_ipc_msg_dump(struct ivpu_device *vdev, char *c,
 			      struct ivpu_ipc_hdr *ipc_hdr, u32 vpu_addr)
 {
@@ -139,8 +132,49 @@ static void ivpu_ipc_tx(struct ivpu_device *vdev, u32 vpu_addr)
 	ivpu_hw_reg_ipc_tx_set(vdev, vpu_addr);
 }
 
-void
-ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, u32 channel)
+static void
+ivpu_ipc_rx_msg_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
+		    struct ivpu_ipc_hdr *ipc_hdr, struct vpu_jsm_msg *jsm_msg)
+{
+	struct ivpu_ipc_info *ipc = vdev->ipc;
+	struct ivpu_ipc_rx_msg *rx_msg;
+
+	lockdep_assert_held(&ipc->cons_lock);
+	lockdep_assert_irqs_disabled();
+
+	rx_msg = kzalloc(sizeof(*rx_msg), GFP_ATOMIC);
+	if (!rx_msg) {
+		ivpu_ipc_rx_mark_free(vdev, ipc_hdr, jsm_msg);
+		return;
+	}
+
+	atomic_inc(&ipc->rx_msg_count);
+
+	rx_msg->ipc_hdr = ipc_hdr;
+	rx_msg->jsm_msg = jsm_msg;
+	rx_msg->callback = cons->rx_callback;
+
+	if (rx_msg->callback) {
+		list_add_tail(&rx_msg->link, &ipc->cb_msg_list);
+	} else {
+		spin_lock(&cons->rx_lock);
+		list_add_tail(&rx_msg->link, &cons->rx_msg_list);
+		spin_unlock(&cons->rx_lock);
+		wake_up(&cons->rx_msg_wq);
+	}
+}
+
+static void
+ivpu_ipc_rx_msg_del(struct ivpu_device *vdev, struct ivpu_ipc_rx_msg *rx_msg)
+{
+	list_del(&rx_msg->link);
+	ivpu_ipc_rx_mark_free(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg);
+	atomic_dec(&vdev->ipc->rx_msg_count);
+	kfree(rx_msg);
+}
+
+void ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
+			   u32 channel, ivpu_ipc_rx_callback_t rx_callback)
 {
 	struct ivpu_ipc_info *ipc = vdev->ipc;
 
@@ -148,13 +182,15 @@ ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
 	cons->channel = channel;
 	cons->tx_vpu_addr = 0;
 	cons->request_id = 0;
-	spin_lock_init(&cons->rx_msg_lock);
+	cons->aborted = false;
+	cons->rx_callback = rx_callback;
+	spin_lock_init(&cons->rx_lock);
 	INIT_LIST_HEAD(&cons->rx_msg_list);
 	init_waitqueue_head(&cons->rx_msg_wq);
 
-	spin_lock_irq(&ipc->cons_list_lock);
+	spin_lock_irq(&ipc->cons_lock);
 	list_add_tail(&cons->link, &ipc->cons_list);
-	spin_unlock_irq(&ipc->cons_list_lock);
+	spin_unlock_irq(&ipc->cons_lock);
 }
 
 void ivpu_ipc_consumer_del(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons)
@@ -162,18 +198,14 @@ void ivpu_ipc_consumer_del(struct ivpu_device *vdev, struct ivpu_ipc_consumer *c
 	struct ivpu_ipc_info *ipc = vdev->ipc;
 	struct ivpu_ipc_rx_msg *rx_msg, *r;
 
-	spin_lock_irq(&ipc->cons_list_lock);
+	spin_lock_irq(&ipc->cons_lock);
 	list_del(&cons->link);
-	spin_unlock_irq(&ipc->cons_list_lock);
-
-	spin_lock_irq(&cons->rx_msg_lock);
-	list_for_each_entry_safe(rx_msg, r, &cons->rx_msg_list, link) {
-		list_del(&rx_msg->link);
-		ivpu_ipc_rx_mark_free(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg);
-		atomic_dec(&ipc->rx_msg_count);
-		kfree(rx_msg);
-	}
-	spin_unlock_irq(&cons->rx_msg_lock);
+	spin_unlock_irq(&ipc->cons_lock);
+
+	spin_lock_irq(&cons->rx_lock);
+	list_for_each_entry_safe(rx_msg, r, &cons->rx_msg_list, link)
+		ivpu_ipc_rx_msg_del(vdev, rx_msg);
+	spin_unlock_irq(&cons->rx_lock);
 
 	ivpu_ipc_tx_release(vdev, cons->tx_vpu_addr);
 }
@@ -202,52 +234,61 @@ unlock:
 	return ret;
 }
 
+static bool ivpu_ipc_rx_need_wakeup(struct ivpu_ipc_consumer *cons)
+{
+	bool ret;
+
+	spin_lock_irq(&cons->rx_lock);
+	ret = !list_empty(&cons->rx_msg_list) || cons->aborted;
+	spin_unlock_irq(&cons->rx_lock);
+
+	return ret;
+}
+
 int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
 		     struct ivpu_ipc_hdr *ipc_buf,
-		     struct vpu_jsm_msg *ipc_payload, unsigned long timeout_ms)
+		     struct vpu_jsm_msg *jsm_msg, unsigned long timeout_ms)
 {
-	struct ivpu_ipc_info *ipc = vdev->ipc;
 	struct ivpu_ipc_rx_msg *rx_msg;
 	int wait_ret, ret = 0;
 
+	if (drm_WARN_ONCE(&vdev->drm, cons->rx_callback, "Consumer works only in async mode\n"))
+		return -EINVAL;
+
 	wait_ret = wait_event_timeout(cons->rx_msg_wq,
-				      (IS_KTHREAD() && kthread_should_stop()) ||
-				      !list_empty(&cons->rx_msg_list),
+				      ivpu_ipc_rx_need_wakeup(cons),
 				      msecs_to_jiffies(timeout_ms));
 
-	if (IS_KTHREAD() && kthread_should_stop())
-		return -EINTR;
-
 	if (wait_ret == 0)
 		return -ETIMEDOUT;
 
-	spin_lock_irq(&cons->rx_msg_lock);
+	spin_lock_irq(&cons->rx_lock);
+	if (cons->aborted) {
+		spin_unlock_irq(&cons->rx_lock);
+		return -ECANCELED;
+	}
 	rx_msg = list_first_entry_or_null(&cons->rx_msg_list, struct ivpu_ipc_rx_msg, link);
 	if (!rx_msg) {
-		spin_unlock_irq(&cons->rx_msg_lock);
+		spin_unlock_irq(&cons->rx_lock);
 		return -EAGAIN;
 	}
-	list_del(&rx_msg->link);
-	spin_unlock_irq(&cons->rx_msg_lock);
 
 	if (ipc_buf)
 		memcpy(ipc_buf, rx_msg->ipc_hdr, sizeof(*ipc_buf));
 	if (rx_msg->jsm_msg) {
-		u32 size = min_t(int, rx_msg->ipc_hdr->data_size, sizeof(*ipc_payload));
+		u32 size = min_t(int, rx_msg->ipc_hdr->data_size, sizeof(*jsm_msg));
 
 		if (rx_msg->jsm_msg->result != VPU_JSM_STATUS_SUCCESS) {
 			ivpu_dbg(vdev, IPC, "IPC resp result error: %d\n", rx_msg->jsm_msg->result);
 			ret = -EBADMSG;
 		}
 
-		if (ipc_payload)
-			memcpy(ipc_payload, rx_msg->jsm_msg, size);
+		if (jsm_msg)
+			memcpy(jsm_msg, rx_msg->jsm_msg, size);
 	}
 
-	ivpu_ipc_rx_mark_free(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg);
-	atomic_dec(&ipc->rx_msg_count);
-	kfree(rx_msg);
-
+	ivpu_ipc_rx_msg_del(vdev, rx_msg);
+	spin_unlock_irq(&cons->rx_lock);
 	return ret;
 }
 
@@ -260,7 +301,7 @@ ivpu_ipc_send_receive_internal(struct ivpu_device *vdev, struct vpu_jsm_msg *req
 	struct ivpu_ipc_consumer cons;
 	int ret;
 
-	ivpu_ipc_consumer_add(vdev, &cons, channel);
+	ivpu_ipc_consumer_add(vdev, &cons, channel, NULL);
 
 	ret = ivpu_ipc_send(vdev, &cons, req);
 	if (ret) {
@@ -285,33 +326,41 @@ consumer_del:
 	return ret;
 }
 
-int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
-			  enum vpu_ipc_msg_type expected_resp_type,
-			  struct vpu_jsm_msg *resp, u32 channel,
-			  unsigned long timeout_ms)
+int ivpu_ipc_send_receive_active(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
+				 enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp,
+				 u32 channel, unsigned long timeout_ms)
 {
 	struct vpu_jsm_msg hb_req = { .type = VPU_JSM_MSG_QUERY_ENGINE_HB };
 	struct vpu_jsm_msg hb_resp;
 	int ret, hb_ret;
 
-	ret = ivpu_rpm_get(vdev);
-	if (ret < 0)
-		return ret;
+	drm_WARN_ON(&vdev->drm, pm_runtime_status_suspended(vdev->drm.dev));
 
-	ret = ivpu_ipc_send_receive_internal(vdev, req, expected_resp_type, resp,
-					     channel, timeout_ms);
+	ret = ivpu_ipc_send_receive_internal(vdev, req, expected_resp, resp, channel, timeout_ms);
 	if (ret != -ETIMEDOUT)
-		goto rpm_put;
+		return ret;
 
 	hb_ret = ivpu_ipc_send_receive_internal(vdev, &hb_req, VPU_JSM_MSG_QUERY_ENGINE_HB_DONE,
 						&hb_resp, VPU_IPC_CHAN_ASYNC_CMD,
 						vdev->timeout.jsm);
-	if (hb_ret == -ETIMEDOUT) {
-		ivpu_hw_diagnose_failure(vdev);
-		ivpu_pm_schedule_recovery(vdev);
-	}
+	if (hb_ret == -ETIMEDOUT)
+		ivpu_pm_trigger_recovery(vdev, "IPC timeout");
+
+	return ret;
+}
+
+int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
+			  enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp,
+			  u32 channel, unsigned long timeout_ms)
+{
+	int ret;
+
+	ret = ivpu_rpm_get(vdev);
+	if (ret < 0)
+		return ret;
+
+	ret = ivpu_ipc_send_receive_active(vdev, req, expected_resp, resp, channel, timeout_ms);
 
-rpm_put:
 	ivpu_rpm_put(vdev);
 	return ret;
 }
@@ -329,35 +378,7 @@ ivpu_ipc_match_consumer(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons
 	return false;
 }
 
-static void
-ivpu_ipc_dispatch(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
-		  struct ivpu_ipc_hdr *ipc_hdr, struct vpu_jsm_msg *jsm_msg)
-{
-	struct ivpu_ipc_info *ipc = vdev->ipc;
-	struct ivpu_ipc_rx_msg *rx_msg;
-	unsigned long flags;
-
-	lockdep_assert_held(&ipc->cons_list_lock);
-
-	rx_msg = kzalloc(sizeof(*rx_msg), GFP_ATOMIC);
-	if (!rx_msg) {
-		ivpu_ipc_rx_mark_free(vdev, ipc_hdr, jsm_msg);
-		return;
-	}
-
-	atomic_inc(&ipc->rx_msg_count);
-
-	rx_msg->ipc_hdr = ipc_hdr;
-	rx_msg->jsm_msg = jsm_msg;
-
-	spin_lock_irqsave(&cons->rx_msg_lock, flags);
-	list_add_tail(&rx_msg->link, &cons->rx_msg_list);
-	spin_unlock_irqrestore(&cons->rx_msg_lock, flags);
-
-	wake_up(&cons->rx_msg_wq);
-}
-
-int ivpu_ipc_irq_handler(struct ivpu_device *vdev)
+void ivpu_ipc_irq_handler(struct ivpu_device *vdev, bool *wake_thread)
 {
 	struct ivpu_ipc_info *ipc = vdev->ipc;
 	struct ivpu_ipc_consumer *cons;
@@ -375,7 +396,7 @@ int ivpu_ipc_irq_handler(struct ivpu_device *vdev)
 		vpu_addr = ivpu_hw_reg_ipc_rx_addr_get(vdev);
 		if (vpu_addr == REG_IO_ERROR) {
 			ivpu_err_ratelimited(vdev, "Failed to read IPC rx addr register\n");
-			return -EIO;
+			return;
 		}
 
 		ipc_hdr = ivpu_to_cpu_addr(ipc->mem_rx, vpu_addr);
@@ -405,15 +426,15 @@ int ivpu_ipc_irq_handler(struct ivpu_device *vdev)
 		}
 
 		dispatched = false;
-		spin_lock_irqsave(&ipc->cons_list_lock, flags);
+		spin_lock_irqsave(&ipc->cons_lock, flags);
 		list_for_each_entry(cons, &ipc->cons_list, link) {
 			if (ivpu_ipc_match_consumer(vdev, cons, ipc_hdr, jsm_msg)) {
-				ivpu_ipc_dispatch(vdev, cons, ipc_hdr, jsm_msg);
+				ivpu_ipc_rx_msg_add(vdev, cons, ipc_hdr, jsm_msg);
 				dispatched = true;
 				break;
 			}
 		}
-		spin_unlock_irqrestore(&ipc->cons_list_lock, flags);
+		spin_unlock_irqrestore(&ipc->cons_lock, flags);
 
 		if (!dispatched) {
 			ivpu_dbg(vdev, IPC, "IPC RX msg 0x%x dropped (no consumer)\n", vpu_addr);
@@ -421,7 +442,28 @@ int ivpu_ipc_irq_handler(struct ivpu_device *vdev)
 		}
 	}
 
-	return 0;
+	if (wake_thread)
+		*wake_thread = !list_empty(&ipc->cb_msg_list);
+}
+
+irqreturn_t ivpu_ipc_irq_thread_handler(struct ivpu_device *vdev)
+{
+	struct ivpu_ipc_info *ipc = vdev->ipc;
+	struct ivpu_ipc_rx_msg *rx_msg, *r;
+	struct list_head cb_msg_list;
+
+	INIT_LIST_HEAD(&cb_msg_list);
+
+	spin_lock_irq(&ipc->cons_lock);
+	list_splice_tail_init(&ipc->cb_msg_list, &cb_msg_list);
+	spin_unlock_irq(&ipc->cons_lock);
+
+	list_for_each_entry_safe(rx_msg, r, &cb_msg_list, link) {
+		rx_msg->callback(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg);
+		ivpu_ipc_rx_msg_del(vdev, rx_msg);
+	}
+
+	return IRQ_HANDLED;
 }
 
 int ivpu_ipc_init(struct ivpu_device *vdev)
@@ -456,10 +498,14 @@ int ivpu_ipc_init(struct ivpu_device *vdev)
 		goto err_free_rx;
 	}
 
+	spin_lock_init(&ipc->cons_lock);
 	INIT_LIST_HEAD(&ipc->cons_list);
-	spin_lock_init(&ipc->cons_list_lock);
-	drmm_mutex_init(&vdev->drm, &ipc->lock);
-
+	INIT_LIST_HEAD(&ipc->cb_msg_list);
+	ret = drmm_mutex_init(&vdev->drm, &ipc->lock);
+	if (ret) {
+		ivpu_err(vdev, "Failed to initialize ipc->lock, ret %d\n", ret);
+		goto err_free_rx;
+	}
 	ivpu_ipc_reset(vdev);
 	return 0;
 
@@ -472,6 +518,13 @@ err_free_tx:
 
 void ivpu_ipc_fini(struct ivpu_device *vdev)
 {
+	struct ivpu_ipc_info *ipc = vdev->ipc;
+
+	drm_WARN_ON(&vdev->drm, ipc->on);
+	drm_WARN_ON(&vdev->drm, !list_empty(&ipc->cons_list));
+	drm_WARN_ON(&vdev->drm, !list_empty(&ipc->cb_msg_list));
+	drm_WARN_ON(&vdev->drm, atomic_read(&ipc->rx_msg_count) > 0);
+
 	ivpu_ipc_mem_fini(vdev);
 }
 
@@ -488,16 +541,27 @@ void ivpu_ipc_disable(struct ivpu_device *vdev)
 {
 	struct ivpu_ipc_info *ipc = vdev->ipc;
 	struct ivpu_ipc_consumer *cons, *c;
-	unsigned long flags;
+	struct ivpu_ipc_rx_msg *rx_msg, *r;
+
+	drm_WARN_ON(&vdev->drm, !list_empty(&ipc->cb_msg_list));
 
 	mutex_lock(&ipc->lock);
 	ipc->on = false;
 	mutex_unlock(&ipc->lock);
 
-	spin_lock_irqsave(&ipc->cons_list_lock, flags);
-	list_for_each_entry_safe(cons, c, &ipc->cons_list, link)
+	spin_lock_irq(&ipc->cons_lock);
+	list_for_each_entry_safe(cons, c, &ipc->cons_list, link) {
+		spin_lock(&cons->rx_lock);
+		if (!cons->rx_callback)
+			cons->aborted = true;
+		list_for_each_entry_safe(rx_msg, r, &cons->rx_msg_list, link)
+			ivpu_ipc_rx_msg_del(vdev, rx_msg);
+		spin_unlock(&cons->rx_lock);
 		wake_up(&cons->rx_msg_wq);
-	spin_unlock_irqrestore(&ipc->cons_list_lock, flags);
+	}
+	spin_unlock_irq(&ipc->cons_lock);
+
+	drm_WARN_ON(&vdev->drm, atomic_read(&ipc->rx_msg_count) > 0);
 }
 
 void ivpu_ipc_reset(struct ivpu_device *vdev)
@@ -505,6 +569,7 @@ void ivpu_ipc_reset(struct ivpu_device *vdev)
 	struct ivpu_ipc_info *ipc = vdev->ipc;
 
 	mutex_lock(&ipc->lock);
+	drm_WARN_ON(&vdev->drm, ipc->on);
 
 	memset(ivpu_bo_vaddr(ipc->mem_tx), 0, ivpu_bo_size(ipc->mem_tx));
 	memset(ivpu_bo_vaddr(ipc->mem_rx), 0, ivpu_bo_size(ipc->mem_rx));
diff --git a/drivers/accel/ivpu/ivpu_ipc.h b/drivers/accel/ivpu/ivpu_ipc.h
index 68f5b6668e..40ca3cc4e6 100644
--- a/drivers/accel/ivpu/ivpu_ipc.h
+++ b/drivers/accel/ivpu/ivpu_ipc.h
@@ -42,13 +42,26 @@ struct ivpu_ipc_hdr {
 	u8 status;
 } __packed __aligned(IVPU_IPC_ALIGNMENT);
 
+typedef void (*ivpu_ipc_rx_callback_t)(struct ivpu_device *vdev,
+				       struct ivpu_ipc_hdr *ipc_hdr,
+				       struct vpu_jsm_msg *jsm_msg);
+
+struct ivpu_ipc_rx_msg {
+	struct list_head link;
+	struct ivpu_ipc_hdr *ipc_hdr;
+	struct vpu_jsm_msg *jsm_msg;
+	ivpu_ipc_rx_callback_t callback;
+};
+
 struct ivpu_ipc_consumer {
 	struct list_head link;
 	u32 channel;
 	u32 tx_vpu_addr;
 	u32 request_id;
+	bool aborted;
+	ivpu_ipc_rx_callback_t rx_callback;
 
-	spinlock_t rx_msg_lock; /* Protects rx_msg_list */
+	spinlock_t rx_lock; /* Protects rx_msg_list and aborted */
 	struct list_head rx_msg_list;
 	wait_queue_head_t rx_msg_wq;
 };
@@ -60,8 +73,9 @@ struct ivpu_ipc_info {
 
 	atomic_t rx_msg_count;
 
-	spinlock_t cons_list_lock; /* Protects cons_list */
+	spinlock_t cons_lock; /* Protects cons_list and cb_msg_list */
 	struct list_head cons_list;
+	struct list_head cb_msg_list;
 
 	atomic_t request_id;
 	struct mutex lock; /* Lock on status */
@@ -75,19 +89,22 @@ void ivpu_ipc_enable(struct ivpu_device *vdev);
 void ivpu_ipc_disable(struct ivpu_device *vdev);
 void ivpu_ipc_reset(struct ivpu_device *vdev);
 
-int ivpu_ipc_irq_handler(struct ivpu_device *vdev);
+void ivpu_ipc_irq_handler(struct ivpu_device *vdev, bool *wake_thread);
+irqreturn_t ivpu_ipc_irq_thread_handler(struct ivpu_device *vdev);
 
 void ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
-			   u32 channel);
+			   u32 channel, ivpu_ipc_rx_callback_t callback);
 void ivpu_ipc_consumer_del(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons);
 
 int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
-		     struct ivpu_ipc_hdr *ipc_buf, struct vpu_jsm_msg *ipc_payload,
+		     struct ivpu_ipc_hdr *ipc_buf, struct vpu_jsm_msg *jsm_msg,
 		     unsigned long timeout_ms);
 
+int ivpu_ipc_send_receive_active(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
+				 enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp,
+				 u32 channel, unsigned long timeout_ms);
 int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
-			  enum vpu_ipc_msg_type expected_resp_type,
-			  struct vpu_jsm_msg *resp, u32 channel,
-			  unsigned long timeout_ms);
+			  enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp,
+			  u32 channel, unsigned long timeout_ms);
 
 #endif /* __IVPU_IPC_H__ */
diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c
index 8983e3a4fd..e70cfb8593 100644
--- a/drivers/accel/ivpu/ivpu_job.c
+++ b/drivers/accel/ivpu/ivpu_job.c
@@ -7,7 +7,6 @@
 
 #include <linux/bitfield.h>
 #include <linux/highmem.h>
-#include <linux/kthread.h>
 #include <linux/pci.h>
 #include <linux/module.h>
 #include <uapi/drm/ivpu_accel.h>
@@ -24,10 +23,6 @@
 #define JOB_ID_CONTEXT_MASK  GENMASK(31, 8)
 #define JOB_MAX_BUFFER_COUNT 65535
 
-static unsigned int ivpu_tdr_timeout_ms;
-module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, uint, 0644);
-MODULE_PARM_DESC(tdr_timeout_ms, "Timeout for device hang detection, in milliseconds, 0 - default");
-
 static void ivpu_cmdq_ring_db(struct ivpu_device *vdev, struct ivpu_cmdq *cmdq)
 {
 	ivpu_hw_reg_db_set(vdev, cmdq->db_id);
@@ -117,22 +112,20 @@ static void ivpu_cmdq_release_locked(struct ivpu_file_priv *file_priv, u16 engin
 	}
 }
 
-void ivpu_cmdq_release_all(struct ivpu_file_priv *file_priv)
+void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv)
 {
 	int i;
 
-	mutex_lock(&file_priv->lock);
+	lockdep_assert_held(&file_priv->lock);
 
 	for (i = 0; i < IVPU_NUM_ENGINES; i++)
 		ivpu_cmdq_release_locked(file_priv, i);
-
-	mutex_unlock(&file_priv->lock);
 }
 
 /*
  * Mark the doorbell as unregistered and reset job queue pointers.
  * This function needs to be called when the VPU hardware is restarted
- * and FW looses job queue state. The next time job queue is used it
+ * and FW loses job queue state. The next time job queue is used it
  * will be registered again.
  */
 static void ivpu_cmdq_reset_locked(struct ivpu_file_priv *file_priv, u16 engine)
@@ -166,15 +159,13 @@ void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev)
 	struct ivpu_file_priv *file_priv;
 	unsigned long ctx_id;
 
-	xa_for_each(&vdev->context_xa, ctx_id, file_priv) {
-		file_priv = ivpu_file_priv_get_by_ctx_id(vdev, ctx_id);
-		if (!file_priv)
-			continue;
+	mutex_lock(&vdev->context_list_lock);
 
+	xa_for_each(&vdev->context_xa, ctx_id, file_priv)
 		ivpu_cmdq_reset_all(file_priv);
 
-		ivpu_file_priv_put(&file_priv);
-	}
+	mutex_unlock(&vdev->context_list_lock);
+
 }
 
 static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job)
@@ -196,6 +187,8 @@ static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job)
 	entry->batch_buf_addr = job->cmd_buf_vpu_addr;
 	entry->job_id = job->job_id;
 	entry->flags = 0;
+	if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_SUBMISSION))
+		entry->flags = VPU_JOB_FLAGS_NULL_SUBMISSION_MASK;
 	wmb(); /* Ensure that tail is updated after filling entry */
 	header->tail = next_entry;
 	wmb(); /* Flush WC buffer for jobq header */
@@ -246,60 +239,32 @@ static struct dma_fence *ivpu_fence_create(struct ivpu_device *vdev)
 	return &fence->base;
 }
 
-static void job_get(struct ivpu_job *job, struct ivpu_job **link)
-{
-	struct ivpu_device *vdev = job->vdev;
-
-	kref_get(&job->ref);
-	*link = job;
-
-	ivpu_dbg(vdev, KREF, "Job get: id %u refcount %u\n", job->job_id, kref_read(&job->ref));
-}
-
-static void job_release(struct kref *ref)
+static void ivpu_job_destroy(struct ivpu_job *job)
 {
-	struct ivpu_job *job = container_of(ref, struct ivpu_job, ref);
 	struct ivpu_device *vdev = job->vdev;
 	u32 i;
 
+	ivpu_dbg(vdev, JOB, "Job destroyed: id %3u ctx %2d engine %d",
+		 job->job_id, job->file_priv->ctx.id, job->engine_idx);
+
 	for (i = 0; i < job->bo_count; i++)
 		if (job->bos[i])
-			drm_gem_object_put(&job->bos[i]->base);
+			drm_gem_object_put(&job->bos[i]->base.base);
 
 	dma_fence_put(job->done_fence);
 	ivpu_file_priv_put(&job->file_priv);
-
-	ivpu_dbg(vdev, KREF, "Job released: id %u\n", job->job_id);
 	kfree(job);
-
-	/* Allow the VPU to get suspended, must be called after ivpu_file_priv_put() */
-	ivpu_rpm_put(vdev);
-}
-
-static void job_put(struct ivpu_job *job)
-{
-	struct ivpu_device *vdev = job->vdev;
-
-	ivpu_dbg(vdev, KREF, "Job put: id %u refcount %u\n", job->job_id, kref_read(&job->ref));
-	kref_put(&job->ref, job_release);
 }
 
 static struct ivpu_job *
-ivpu_create_job(struct ivpu_file_priv *file_priv, u32 engine_idx, u32 bo_count)
+ivpu_job_create(struct ivpu_file_priv *file_priv, u32 engine_idx, u32 bo_count)
 {
 	struct ivpu_device *vdev = file_priv->vdev;
 	struct ivpu_job *job;
-	int ret;
-
-	ret = ivpu_rpm_get(vdev);
-	if (ret < 0)
-		return NULL;
 
 	job = kzalloc(struct_size(job, bos, bo_count), GFP_KERNEL);
 	if (!job)
-		goto err_rpm_put;
-
-	kref_init(&job->ref);
+		return NULL;
 
 	job->vdev = vdev;
 	job->engine_idx = engine_idx;
@@ -313,17 +278,14 @@ ivpu_create_job(struct ivpu_file_priv *file_priv, u32 engine_idx, u32 bo_count)
 	job->file_priv = ivpu_file_priv_get(file_priv);
 
 	ivpu_dbg(vdev, JOB, "Job created: ctx %2d engine %d", file_priv->ctx.id, job->engine_idx);
-
 	return job;
 
 err_free_job:
 	kfree(job);
-err_rpm_put:
-	ivpu_rpm_put(vdev);
 	return NULL;
 }
 
-static int ivpu_job_done(struct ivpu_device *vdev, u32 job_id, u32 job_status)
+static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32 job_status)
 {
 	struct ivpu_job *job;
 
@@ -332,7 +294,7 @@ static int ivpu_job_done(struct ivpu_device *vdev, u32 job_id, u32 job_status)
 		return -ENOENT;
 
 	if (job->file_priv->has_mmu_faults)
-		job_status = VPU_JSM_STATUS_ABORTED;
+		job_status = DRM_IVPU_JOB_STATUS_ABORTED;
 
 	job->bos[CMD_BUF_IDX]->job_status = job_status;
 	dma_fence_signal(job->done_fence);
@@ -340,21 +302,11 @@ static int ivpu_job_done(struct ivpu_device *vdev, u32 job_id, u32 job_status)
 	ivpu_dbg(vdev, JOB, "Job complete:  id %3u ctx %2d engine %d status 0x%x\n",
 		 job->job_id, job->file_priv->ctx.id, job->engine_idx, job_status);
 
-	job_put(job);
-	return 0;
-}
-
-static void ivpu_job_done_message(struct ivpu_device *vdev, void *msg)
-{
-	struct vpu_ipc_msg_payload_job_done *payload;
-	struct vpu_jsm_msg *job_ret_msg = msg;
-	int ret;
-
-	payload = (struct vpu_ipc_msg_payload_job_done *)&job_ret_msg->payload;
+	ivpu_job_destroy(job);
+	ivpu_stop_job_timeout_detection(vdev);
 
-	ret = ivpu_job_done(vdev, payload->job_id, payload->job_status);
-	if (ret)
-		ivpu_err(vdev, "Failed to finish job %d: %d\n", payload->job_id, ret);
+	ivpu_rpm_put(vdev);
+	return 0;
 }
 
 void ivpu_jobs_abort_all(struct ivpu_device *vdev)
@@ -363,10 +315,10 @@ void ivpu_jobs_abort_all(struct ivpu_device *vdev)
 	unsigned long id;
 
 	xa_for_each(&vdev->submitted_jobs_xa, id, job)
-		ivpu_job_done(vdev, id, VPU_JSM_STATUS_ABORTED);
+		ivpu_job_signal_and_destroy(vdev, id, DRM_IVPU_JOB_STATUS_ABORTED);
 }
 
-static int ivpu_direct_job_submission(struct ivpu_job *job)
+static int ivpu_job_submit(struct ivpu_job *job)
 {
 	struct ivpu_file_priv *file_priv = job->file_priv;
 	struct ivpu_device *vdev = job->vdev;
@@ -374,51 +326,65 @@ static int ivpu_direct_job_submission(struct ivpu_job *job)
 	struct ivpu_cmdq *cmdq;
 	int ret;
 
+	ret = ivpu_rpm_get(vdev);
+	if (ret < 0)
+		return ret;
+
 	mutex_lock(&file_priv->lock);
 
 	cmdq = ivpu_cmdq_acquire(job->file_priv, job->engine_idx);
 	if (!cmdq) {
-		ivpu_warn(vdev, "Failed get job queue, ctx %d engine %d\n",
-			  file_priv->ctx.id, job->engine_idx);
+		ivpu_warn_ratelimited(vdev, "Failed get job queue, ctx %d engine %d\n",
+				      file_priv->ctx.id, job->engine_idx);
 		ret = -EINVAL;
-		goto err_unlock;
+		goto err_unlock_file_priv;
 	}
 
 	job_id_range.min = FIELD_PREP(JOB_ID_CONTEXT_MASK, (file_priv->ctx.id - 1));
 	job_id_range.max = job_id_range.min | JOB_ID_JOB_MASK;
 
-	job_get(job, &job);
-	ret = xa_alloc(&vdev->submitted_jobs_xa, &job->job_id, job, job_id_range, GFP_KERNEL);
+	xa_lock(&vdev->submitted_jobs_xa);
+	ret = __xa_alloc(&vdev->submitted_jobs_xa, &job->job_id, job, job_id_range, GFP_KERNEL);
 	if (ret) {
-		ivpu_warn_ratelimited(vdev, "Failed to allocate job id: %d\n", ret);
-		goto err_job_put;
+		ivpu_dbg(vdev, JOB, "Too many active jobs in ctx %d\n",
+			 file_priv->ctx.id);
+		ret = -EBUSY;
+		goto err_unlock_submitted_jobs_xa;
 	}
 
 	ret = ivpu_cmdq_push_job(cmdq, job);
 	if (ret)
-		goto err_xa_erase;
+		goto err_erase_xa;
 
-	ivpu_dbg(vdev, JOB, "Job submitted: id %3u addr 0x%llx ctx %2d engine %d next %d\n",
-		 job->job_id, job->cmd_buf_vpu_addr, file_priv->ctx.id,
-		 job->engine_idx, cmdq->jobq->header.tail);
+	ivpu_start_job_timeout_detection(vdev);
 
-	if (ivpu_test_mode == IVPU_TEST_MODE_NULL_HW) {
-		ivpu_job_done(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS);
+	if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW)) {
 		cmdq->jobq->header.head = cmdq->jobq->header.tail;
 		wmb(); /* Flush WC buffer for jobq header */
 	} else {
 		ivpu_cmdq_ring_db(vdev, cmdq);
 	}
 
+	ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d engine %d addr 0x%llx next %d\n",
+		 job->job_id, file_priv->ctx.id, job->engine_idx,
+		 job->cmd_buf_vpu_addr, cmdq->jobq->header.tail);
+
+	xa_unlock(&vdev->submitted_jobs_xa);
+
 	mutex_unlock(&file_priv->lock);
+
+	if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW))
+		ivpu_job_signal_and_destroy(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS);
+
 	return 0;
 
-err_xa_erase:
-	xa_erase(&vdev->submitted_jobs_xa, job->job_id);
-err_job_put:
-	job_put(job);
-err_unlock:
+err_erase_xa:
+	__xa_erase(&vdev->submitted_jobs_xa, job->job_id);
+err_unlock_submitted_jobs_xa:
+	xa_unlock(&vdev->submitted_jobs_xa);
+err_unlock_file_priv:
 	mutex_unlock(&file_priv->lock);
+	ivpu_rpm_put(vdev);
 	return ret;
 }
 
@@ -448,7 +414,7 @@ ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32
 	}
 
 	bo = job->bos[CMD_BUF_IDX];
-	if (!dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_READ)) {
+	if (!dma_resv_test_signaled(bo->base.base.resv, DMA_RESV_USAGE_READ)) {
 		ivpu_warn(vdev, "Buffer is already in use\n");
 		return -EBUSY;
 	}
@@ -468,7 +434,7 @@ ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32
 	}
 
 	for (i = 0; i < buf_count; i++) {
-		ret = dma_resv_reserve_fences(job->bos[i]->base.resv, 1);
+		ret = dma_resv_reserve_fences(job->bos[i]->base.base.resv, 1);
 		if (ret) {
 			ivpu_warn(vdev, "Failed to reserve fences: %d\n", ret);
 			goto unlock_reservations;
@@ -477,7 +443,7 @@ ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32
 
 	for (i = 0; i < buf_count; i++) {
 		usage = (i == CMD_BUF_IDX) ? DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_BOOKKEEP;
-		dma_resv_add_fence(job->bos[i]->base.resv, job->done_fence, usage);
+		dma_resv_add_fence(job->bos[i]->base.base.resv, job->done_fence, usage);
 	}
 
 unlock_reservations:
@@ -500,6 +466,9 @@ int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	if (params->engine > DRM_IVPU_ENGINE_COPY)
 		return -EINVAL;
 
+	if (params->priority > DRM_IVPU_JOB_PRIORITY_REALTIME)
+		return -EINVAL;
+
 	if (params->buffer_count == 0 || params->buffer_count > JOB_MAX_BUFFER_COUNT)
 		return -EINVAL;
 
@@ -521,102 +490,82 @@ int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 			     params->buffer_count * sizeof(u32));
 	if (ret) {
 		ret = -EFAULT;
-		goto free_handles;
+		goto err_free_handles;
 	}
 
 	if (!drm_dev_enter(&vdev->drm, &idx)) {
 		ret = -ENODEV;
-		goto free_handles;
+		goto err_free_handles;
 	}
 
 	ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u buf_count %u\n",
 		 file_priv->ctx.id, params->buffer_count);
 
-	job = ivpu_create_job(file_priv, params->engine, params->buffer_count);
+	job = ivpu_job_create(file_priv, params->engine, params->buffer_count);
 	if (!job) {
 		ivpu_err(vdev, "Failed to create job\n");
 		ret = -ENOMEM;
-		goto dev_exit;
+		goto err_exit_dev;
 	}
 
 	ret = ivpu_job_prepare_bos_for_submit(file, job, buf_handles, params->buffer_count,
 					      params->commands_offset);
 	if (ret) {
-		ivpu_err(vdev, "Failed to prepare job, ret %d\n", ret);
-		goto job_put;
+		ivpu_err(vdev, "Failed to prepare job: %d\n", ret);
+		goto err_destroy_job;
 	}
 
-	ret = ivpu_direct_job_submission(job);
-	if (ret) {
-		dma_fence_signal(job->done_fence);
-		ivpu_err(vdev, "Failed to submit job to the HW, ret %d\n", ret);
-	}
+	down_read(&vdev->pm->reset_lock);
+	ret = ivpu_job_submit(job);
+	up_read(&vdev->pm->reset_lock);
+	if (ret)
+		goto err_signal_fence;
 
-job_put:
-	job_put(job);
-dev_exit:
 	drm_dev_exit(idx);
-free_handles:
 	kfree(buf_handles);
+	return ret;
 
+err_signal_fence:
+	dma_fence_signal(job->done_fence);
+err_destroy_job:
+	ivpu_job_destroy(job);
+err_exit_dev:
+	drm_dev_exit(idx);
+err_free_handles:
+	kfree(buf_handles);
 	return ret;
 }
 
-static int ivpu_job_done_thread(void *arg)
+static void
+ivpu_job_done_callback(struct ivpu_device *vdev, struct ivpu_ipc_hdr *ipc_hdr,
+		       struct vpu_jsm_msg *jsm_msg)
 {
-	struct ivpu_device *vdev = (struct ivpu_device *)arg;
-	struct ivpu_ipc_consumer cons;
-	struct vpu_jsm_msg jsm_msg;
-	bool jobs_submitted;
-	unsigned int timeout;
+	struct vpu_ipc_msg_payload_job_done *payload;
 	int ret;
 
-	ivpu_dbg(vdev, JOB, "Started %s\n", __func__);
-
-	ivpu_ipc_consumer_add(vdev, &cons, VPU_IPC_CHAN_JOB_RET);
-
-	while (!kthread_should_stop()) {
-		timeout = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr;
-		jobs_submitted = !xa_empty(&vdev->submitted_jobs_xa);
-		ret = ivpu_ipc_receive(vdev, &cons, NULL, &jsm_msg, timeout);
-		if (!ret) {
-			ivpu_job_done_message(vdev, &jsm_msg);
-		} else if (ret == -ETIMEDOUT) {
-			if (jobs_submitted && !xa_empty(&vdev->submitted_jobs_xa)) {
-				ivpu_err(vdev, "TDR detected, timeout %d ms", timeout);
-				ivpu_hw_diagnose_failure(vdev);
-				ivpu_pm_schedule_recovery(vdev);
-			}
-		}
+	if (!jsm_msg) {
+		ivpu_err(vdev, "IPC message has no JSM payload\n");
+		return;
 	}
 
-	ivpu_ipc_consumer_del(vdev, &cons);
-
-	ivpu_jobs_abort_all(vdev);
+	if (jsm_msg->result != VPU_JSM_STATUS_SUCCESS) {
+		ivpu_err(vdev, "Invalid JSM message result: %d\n", jsm_msg->result);
+		return;
+	}
 
-	ivpu_dbg(vdev, JOB, "Stopped %s\n", __func__);
-	return 0;
+	payload = (struct vpu_ipc_msg_payload_job_done *)&jsm_msg->payload;
+	ret = ivpu_job_signal_and_destroy(vdev, payload->job_id, payload->job_status);
+	if (!ret && !xa_empty(&vdev->submitted_jobs_xa))
+		ivpu_start_job_timeout_detection(vdev);
 }
 
-int ivpu_job_done_thread_init(struct ivpu_device *vdev)
+void ivpu_job_done_consumer_init(struct ivpu_device *vdev)
 {
-	struct task_struct *thread;
-
-	thread = kthread_run(&ivpu_job_done_thread, (void *)vdev, "ivpu_job_done_thread");
-	if (IS_ERR(thread)) {
-		ivpu_err(vdev, "Failed to start job completion thread\n");
-		return -EIO;
-	}
-
-	get_task_struct(thread);
-	wake_up_process(thread);
-
-	vdev->job_done_thread = thread;
-
-	return 0;
+	ivpu_ipc_consumer_add(vdev, &vdev->job_done_consumer,
+			      VPU_IPC_CHAN_JOB_RET, ivpu_job_done_callback);
 }
 
-void ivpu_job_done_thread_fini(struct ivpu_device *vdev)
+void ivpu_job_done_consumer_fini(struct ivpu_device *vdev)
 {
-	kthread_stop_put(vdev->job_done_thread);
+	ivpu_ipc_consumer_del(vdev, &vdev->job_done_consumer);
 }
diff --git a/drivers/accel/ivpu/ivpu_job.h b/drivers/accel/ivpu/ivpu_job.h
index 5514c2d8a6..ca4984071c 100644
--- a/drivers/accel/ivpu/ivpu_job.h
+++ b/drivers/accel/ivpu/ivpu_job.h
@@ -43,7 +43,6 @@ struct ivpu_cmdq {
 			  will update the job status
  */
 struct ivpu_job {
-	struct kref ref;
 	struct ivpu_device *vdev;
 	struct ivpu_file_priv *file_priv;
 	struct dma_fence *done_fence;
@@ -56,11 +55,11 @@ struct ivpu_job {
 
 int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
 
-void ivpu_cmdq_release_all(struct ivpu_file_priv *file_priv);
+void ivpu_cmdq_release_all_locked(struct ivpu_file_priv *file_priv);
 void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev);
 
-int ivpu_job_done_thread_init(struct ivpu_device *vdev);
-void ivpu_job_done_thread_fini(struct ivpu_device *vdev);
+void ivpu_job_done_consumer_init(struct ivpu_device *vdev);
+void ivpu_job_done_consumer_fini(struct ivpu_device *vdev);
 
 void ivpu_jobs_abort_all(struct ivpu_device *vdev);
 
diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.c b/drivers/accel/ivpu/ivpu_jsm_msg.c
index 0c2fe71420..8cea0dd731 100644
--- a/drivers/accel/ivpu/ivpu_jsm_msg.c
+++ b/drivers/accel/ivpu/ivpu_jsm_msg.c
@@ -4,6 +4,7 @@
  */
 
 #include "ivpu_drv.h"
+#include "ivpu_hw.h"
 #include "ivpu_ipc.h"
 #include "ivpu_jsm_msg.h"
 
@@ -36,6 +37,17 @@ const char *ivpu_jsm_msg_type_to_str(enum vpu_ipc_msg_type type)
 	IVPU_CASE_TO_STR(VPU_JSM_MSG_DESTROY_CMD_QUEUE);
 	IVPU_CASE_TO_STR(VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES);
 	IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_REGISTER_DB);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_RESUME_CMDQ);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_SUSPEND_CMDQ);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_RESUME_CMDQ_RSP);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_SUSPEND_CMDQ_DONE);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG_RSP);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_ENGINE_RESUME);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_RESUME_ENGINE_DONE);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_STATE_DUMP);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_STATE_DUMP_RSP);
 	IVPU_CASE_TO_STR(VPU_JSM_MSG_BLOB_DEINIT);
 	IVPU_CASE_TO_STR(VPU_JSM_MSG_DYNDBG_CONTROL);
 	IVPU_CASE_TO_STR(VPU_JSM_MSG_JOB_DONE);
@@ -65,6 +77,12 @@ const char *ivpu_jsm_msg_type_to_str(enum vpu_ipc_msg_type type)
 	IVPU_CASE_TO_STR(VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES_RSP);
 	IVPU_CASE_TO_STR(VPU_JSM_MSG_BLOB_DEINIT_DONE);
 	IVPU_CASE_TO_STR(VPU_JSM_MSG_DYNDBG_CONTROL_RSP);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_PWR_D0I3_ENTER);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_PWR_D0I3_ENTER_DONE);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_DCT_ENABLE);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_DCT_ENABLE_DONE);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_DCT_DISABLE);
+	IVPU_CASE_TO_STR(VPU_JSM_MSG_DCT_DISABLE_DONE);
 	}
 	#undef IVPU_CASE_TO_STR
 
@@ -243,3 +261,23 @@ int ivpu_jsm_context_release(struct ivpu_device *vdev, u32 host_ssid)
 	return ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_SSID_RELEASE_DONE, &resp,
 				     VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
 }
+
+int ivpu_jsm_pwr_d0i3_enter(struct ivpu_device *vdev)
+{
+	struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_PWR_D0I3_ENTER };
+	struct vpu_jsm_msg resp;
+	int ret;
+
+	if (IVPU_WA(disable_d0i3_msg))
+		return 0;
+
+	req.payload.pwr_d0i3_enter.send_response = 1;
+
+	ret = ivpu_ipc_send_receive_active(vdev, &req, VPU_JSM_MSG_PWR_D0I3_ENTER_DONE,
+					   &resp, VPU_IPC_CHAN_GEN_CMD,
+					   vdev->timeout.d0i3_entry_msg);
+	if (ret)
+		return ret;
+
+	return ivpu_hw_wait_for_idle(vdev);
+}
diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.h b/drivers/accel/ivpu/ivpu_jsm_msg.h
index 66979a948c..ae75e5dbcc 100644
--- a/drivers/accel/ivpu/ivpu_jsm_msg.h
+++ b/drivers/accel/ivpu/ivpu_jsm_msg.h
@@ -22,4 +22,5 @@ int ivpu_jsm_trace_get_capability(struct ivpu_device *vdev, u32 *trace_destinati
 int ivpu_jsm_trace_set_config(struct ivpu_device *vdev, u32 trace_level, u32 trace_destination_mask,
 			      u64 trace_hw_component_mask);
 int ivpu_jsm_context_release(struct ivpu_device *vdev, u32 host_ssid);
+int ivpu_jsm_pwr_d0i3_enter(struct ivpu_device *vdev);
 #endif
diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c
index 9898946174..91bd640655 100644
--- a/drivers/accel/ivpu/ivpu_mmu.c
+++ b/drivers/accel/ivpu/ivpu_mmu.c
@@ -7,6 +7,7 @@
 #include <linux/highmem.h>
 
 #include "ivpu_drv.h"
+#include "ivpu_hw.h"
 #include "ivpu_hw_reg_io.h"
 #include "ivpu_mmu.h"
 #include "ivpu_mmu_context.h"
@@ -71,10 +72,10 @@
 
 #define IVPU_MMU_Q_COUNT_LOG2		4 /* 16 entries */
 #define IVPU_MMU_Q_COUNT		((u32)1 << IVPU_MMU_Q_COUNT_LOG2)
-#define IVPU_MMU_Q_WRAP_BIT		(IVPU_MMU_Q_COUNT << 1)
-#define IVPU_MMU_Q_WRAP_MASK		(IVPU_MMU_Q_WRAP_BIT - 1)
-#define IVPU_MMU_Q_IDX_MASK		(IVPU_MMU_Q_COUNT - 1)
+#define IVPU_MMU_Q_WRAP_MASK            GENMASK(IVPU_MMU_Q_COUNT_LOG2, 0)
+#define IVPU_MMU_Q_IDX_MASK             (IVPU_MMU_Q_COUNT - 1)
 #define IVPU_MMU_Q_IDX(val)		((val) & IVPU_MMU_Q_IDX_MASK)
+#define IVPU_MMU_Q_WRP(val)             ((val) & IVPU_MMU_Q_COUNT)
 
 #define IVPU_MMU_CMDQ_CMD_SIZE		16
 #define IVPU_MMU_CMDQ_SIZE		(IVPU_MMU_Q_COUNT * IVPU_MMU_CMDQ_CMD_SIZE)
@@ -230,7 +231,12 @@
 				  (REG_FLD(IVPU_MMU_REG_GERROR, MSI_PRIQ_ABT)) | \
 				  (REG_FLD(IVPU_MMU_REG_GERROR, MSI_ABT)))
 
-static char *ivpu_mmu_event_to_str(u32 cmd)
+#define IVPU_MMU_CERROR_NONE         0x0
+#define IVPU_MMU_CERROR_ILL          0x1
+#define IVPU_MMU_CERROR_ABT          0x2
+#define IVPU_MMU_CERROR_ATC_INV_SYNC 0x3
+
+static const char *ivpu_mmu_event_to_str(u32 cmd)
 {
 	switch (cmd) {
 	case IVPU_MMU_EVT_F_UUT:
@@ -276,6 +282,22 @@ static char *ivpu_mmu_event_to_str(u32 cmd)
 	}
 }
 
+static const char *ivpu_mmu_cmdq_err_to_str(u32 err)
+{
+	switch (err) {
+	case IVPU_MMU_CERROR_NONE:
+		return "No CMDQ Error";
+	case IVPU_MMU_CERROR_ILL:
+		return "Illegal command";
+	case IVPU_MMU_CERROR_ABT:
+		return "External abort on CMDQ read";
+	case IVPU_MMU_CERROR_ATC_INV_SYNC:
+		return "Sync failed to complete ATS invalidation";
+	default:
+		return "Unknown CMDQ Error";
+	}
+}
+
 static void ivpu_mmu_config_check(struct ivpu_device *vdev)
 {
 	u32 val_ref;
@@ -453,20 +475,32 @@ static int ivpu_mmu_cmdq_wait_for_cons(struct ivpu_device *vdev)
 	return 0;
 }
 
+static bool ivpu_mmu_queue_is_full(struct ivpu_mmu_queue *q)
+{
+	return ((IVPU_MMU_Q_IDX(q->prod) == IVPU_MMU_Q_IDX(q->cons)) &&
+		(IVPU_MMU_Q_WRP(q->prod) != IVPU_MMU_Q_WRP(q->cons)));
+}
+
+static bool ivpu_mmu_queue_is_empty(struct ivpu_mmu_queue *q)
+{
+	return ((IVPU_MMU_Q_IDX(q->prod) == IVPU_MMU_Q_IDX(q->cons)) &&
+		(IVPU_MMU_Q_WRP(q->prod) == IVPU_MMU_Q_WRP(q->cons)));
+}
+
 static int ivpu_mmu_cmdq_cmd_write(struct ivpu_device *vdev, const char *name, u64 data0, u64 data1)
 {
-	struct ivpu_mmu_queue *q = &vdev->mmu->cmdq;
-	u64 *queue_buffer = q->base;
-	int idx = IVPU_MMU_Q_IDX(q->prod) * (IVPU_MMU_CMDQ_CMD_SIZE / sizeof(*queue_buffer));
+	struct ivpu_mmu_queue *cmdq = &vdev->mmu->cmdq;
+	u64 *queue_buffer = cmdq->base;
+	int idx = IVPU_MMU_Q_IDX(cmdq->prod) * (IVPU_MMU_CMDQ_CMD_SIZE / sizeof(*queue_buffer));
 
-	if (!CIRC_SPACE(IVPU_MMU_Q_IDX(q->prod), IVPU_MMU_Q_IDX(q->cons), IVPU_MMU_Q_COUNT)) {
+	if (ivpu_mmu_queue_is_full(cmdq)) {
 		ivpu_err(vdev, "Failed to write MMU CMD %s\n", name);
 		return -EBUSY;
 	}
 
 	queue_buffer[idx] = data0;
 	queue_buffer[idx + 1] = data1;
-	q->prod = (q->prod + 1) & IVPU_MMU_Q_WRAP_MASK;
+	cmdq->prod = (cmdq->prod + 1) & IVPU_MMU_Q_WRAP_MASK;
 
 	ivpu_dbg(vdev, MMU, "CMD write: %s data: 0x%llx 0x%llx\n", name, data0, data1);
 
@@ -479,10 +513,7 @@ static int ivpu_mmu_cmdq_sync(struct ivpu_device *vdev)
 	u64 val;
 	int ret;
 
-	val = FIELD_PREP(IVPU_MMU_CMD_OPCODE, CMD_SYNC) |
-	      FIELD_PREP(IVPU_MMU_CMD_SYNC_0_CS, 0x2) |
-	      FIELD_PREP(IVPU_MMU_CMD_SYNC_0_MSH, 0x3) |
-	      FIELD_PREP(IVPU_MMU_CMD_SYNC_0_MSI_ATTR, 0xf);
+	val = FIELD_PREP(IVPU_MMU_CMD_OPCODE, CMD_SYNC);
 
 	ret = ivpu_mmu_cmdq_cmd_write(vdev, "SYNC", val, 0);
 	if (ret)
@@ -492,8 +523,16 @@ static int ivpu_mmu_cmdq_sync(struct ivpu_device *vdev)
 	REGV_WR32(IVPU_MMU_REG_CMDQ_PROD, q->prod);
 
 	ret = ivpu_mmu_cmdq_wait_for_cons(vdev);
-	if (ret)
-		ivpu_err(vdev, "Timed out waiting for consumer: %d\n", ret);
+	if (ret) {
+		u32 err;
+
+		val = REGV_RD32(IVPU_MMU_REG_CMDQ_CONS);
+		err = REG_GET_FLD(IVPU_MMU_REG_CMDQ_CONS, ERR, val);
+
+		ivpu_err(vdev, "Timed out waiting for MMU consumer: %d, error: %s\n", ret,
+			 ivpu_mmu_cmdq_err_to_str(err));
+		ivpu_hw_diagnose_failure(vdev);
+	}
 
 	return ret;
 }
@@ -749,9 +788,12 @@ int ivpu_mmu_init(struct ivpu_device *vdev)
 
 	ivpu_dbg(vdev, MMU, "Init..\n");
 
-	drmm_mutex_init(&vdev->drm, &mmu->lock);
 	ivpu_mmu_config_check(vdev);
 
+	ret = drmm_mutex_init(&vdev->drm, &mmu->lock);
+	if (ret)
+		return ret;
+
 	ret = ivpu_mmu_structs_alloc(vdev);
 	if (ret)
 		return ret;
@@ -843,18 +885,15 @@ static u32 *ivpu_mmu_get_event(struct ivpu_device *vdev)
 	u32 *evt = evtq->base + (idx * IVPU_MMU_EVTQ_CMD_SIZE);
 
 	evtq->prod = REGV_RD32(IVPU_MMU_REG_EVTQ_PROD_SEC);
-	if (!CIRC_CNT(IVPU_MMU_Q_IDX(evtq->prod), IVPU_MMU_Q_IDX(evtq->cons), IVPU_MMU_Q_COUNT))
+	if (ivpu_mmu_queue_is_empty(evtq))
 		return NULL;
 
 	evtq->cons = (evtq->cons + 1) & IVPU_MMU_Q_WRAP_MASK;
-	REGV_WR32(IVPU_MMU_REG_EVTQ_CONS_SEC, evtq->cons);
-
 	return evt;
 }
 
 void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev)
 {
-	bool schedule_recovery = false;
 	u32 *event;
 	u32 ssid;
 
@@ -864,14 +903,22 @@ void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev)
 		ivpu_mmu_dump_event(vdev, event);
 
 		ssid = FIELD_GET(IVPU_MMU_EVT_SSID_MASK, event[0]);
-		if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID)
-			schedule_recovery = true;
-		else
-			ivpu_mmu_user_context_mark_invalid(vdev, ssid);
+		if (ssid == IVPU_GLOBAL_CONTEXT_MMU_SSID) {
+			ivpu_pm_trigger_recovery(vdev, "MMU event");
+			return;
+		}
+
+		ivpu_mmu_user_context_mark_invalid(vdev, ssid);
+		REGV_WR32(IVPU_MMU_REG_EVTQ_CONS_SEC, vdev->mmu->evtq.cons);
 	}
+}
 
-	if (schedule_recovery)
-		ivpu_pm_schedule_recovery(vdev);
+void ivpu_mmu_evtq_dump(struct ivpu_device *vdev)
+{
+	u32 *event;
+
+	while ((event = ivpu_mmu_get_event(vdev)) != NULL)
+		ivpu_mmu_dump_event(vdev, event);
 }
 
 void ivpu_mmu_irq_gerr_handler(struct ivpu_device *vdev)
diff --git a/drivers/accel/ivpu/ivpu_mmu.h b/drivers/accel/ivpu/ivpu_mmu.h
index cb55112680..6fa35c2407 100644
--- a/drivers/accel/ivpu/ivpu_mmu.h
+++ b/drivers/accel/ivpu/ivpu_mmu.h
@@ -46,5 +46,6 @@ int ivpu_mmu_invalidate_tlb(struct ivpu_device *vdev, u16 ssid);
 
 void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev);
 void ivpu_mmu_irq_gerr_handler(struct ivpu_device *vdev);
+void ivpu_mmu_evtq_dump(struct ivpu_device *vdev);
 
 #endif /* __IVPU_MMU_H__ */
diff --git a/drivers/accel/ivpu/ivpu_mmu_context.c b/drivers/accel/ivpu/ivpu_mmu_context.c
index c1050a2df9..fe61612992 100644
--- a/drivers/accel/ivpu/ivpu_mmu_context.c
+++ b/drivers/accel/ivpu/ivpu_mmu_context.c
@@ -5,6 +5,9 @@
 
 #include <linux/bitfield.h>
 #include <linux/highmem.h>
+#include <linux/set_memory.h>
+
+#include <drm/drm_cache.h>
 
 #include "ivpu_drv.h"
 #include "ivpu_hw.h"
@@ -39,12 +42,57 @@
 #define IVPU_MMU_ENTRY_MAPPED  (IVPU_MMU_ENTRY_FLAG_AF | IVPU_MMU_ENTRY_FLAG_USER | \
 				IVPU_MMU_ENTRY_FLAG_NG | IVPU_MMU_ENTRY_VALID)
 
+static void *ivpu_pgtable_alloc_page(struct ivpu_device *vdev, dma_addr_t *dma)
+{
+	dma_addr_t dma_addr;
+	struct page *page;
+	void *cpu;
+
+	page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
+	if (!page)
+		return NULL;
+
+	set_pages_array_wc(&page, 1);
+
+	dma_addr = dma_map_page(vdev->drm.dev, page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(vdev->drm.dev, dma_addr))
+		goto err_free_page;
+
+	cpu = vmap(&page, 1, VM_MAP, pgprot_writecombine(PAGE_KERNEL));
+	if (!cpu)
+		goto err_dma_unmap_page;
+
+
+	*dma = dma_addr;
+	return cpu;
+
+err_dma_unmap_page:
+	dma_unmap_page(vdev->drm.dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+err_free_page:
+	put_page(page);
+	return NULL;
+}
+
+static void ivpu_pgtable_free_page(struct ivpu_device *vdev, u64 *cpu_addr, dma_addr_t dma_addr)
+{
+	struct page *page;
+
+	if (cpu_addr) {
+		page = vmalloc_to_page(cpu_addr);
+		vunmap(cpu_addr);
+		dma_unmap_page(vdev->drm.dev, dma_addr & ~IVPU_MMU_ENTRY_FLAGS_MASK, PAGE_SIZE,
+			       DMA_BIDIRECTIONAL);
+		set_pages_array_wb(&page, 1);
+		put_page(page);
+	}
+}
+
 static int ivpu_mmu_pgtable_init(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable)
 {
 	dma_addr_t pgd_dma;
 
-	pgtable->pgd_dma_ptr = dma_alloc_coherent(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pgd_dma,
-						  GFP_KERNEL);
+	pgtable->pgd_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pgd_dma);
 	if (!pgtable->pgd_dma_ptr)
 		return -ENOMEM;
 
@@ -53,13 +101,6 @@ static int ivpu_mmu_pgtable_init(struct ivpu_device *vdev, struct ivpu_mmu_pgtab
 	return 0;
 }
 
-static void ivpu_mmu_pgtable_free(struct ivpu_device *vdev, u64 *cpu_addr, dma_addr_t dma_addr)
-{
-	if (cpu_addr)
-		dma_free_coherent(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, cpu_addr,
-				  dma_addr & ~IVPU_MMU_ENTRY_FLAGS_MASK);
-}
-
 static void ivpu_mmu_pgtables_free(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable)
 {
 	int pgd_idx, pud_idx, pmd_idx;
@@ -84,19 +125,19 @@ static void ivpu_mmu_pgtables_free(struct ivpu_device *vdev, struct ivpu_mmu_pgt
 				pte_dma_ptr = pgtable->pte_ptrs[pgd_idx][pud_idx][pmd_idx];
 				pte_dma = pgtable->pmd_ptrs[pgd_idx][pud_idx][pmd_idx];
 
-				ivpu_mmu_pgtable_free(vdev, pte_dma_ptr, pte_dma);
+				ivpu_pgtable_free_page(vdev, pte_dma_ptr, pte_dma);
 			}
 
 			kfree(pgtable->pte_ptrs[pgd_idx][pud_idx]);
-			ivpu_mmu_pgtable_free(vdev, pmd_dma_ptr, pmd_dma);
+			ivpu_pgtable_free_page(vdev, pmd_dma_ptr, pmd_dma);
 		}
 
 		kfree(pgtable->pmd_ptrs[pgd_idx]);
 		kfree(pgtable->pte_ptrs[pgd_idx]);
-		ivpu_mmu_pgtable_free(vdev, pud_dma_ptr, pud_dma);
+		ivpu_pgtable_free_page(vdev, pud_dma_ptr, pud_dma);
 	}
 
-	ivpu_mmu_pgtable_free(vdev, pgtable->pgd_dma_ptr, pgtable->pgd_dma);
+	ivpu_pgtable_free_page(vdev, pgtable->pgd_dma_ptr, pgtable->pgd_dma);
 }
 
 static u64*
@@ -108,7 +149,7 @@ ivpu_mmu_ensure_pud(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable,
 	if (pud_dma_ptr)
 		return pud_dma_ptr;
 
-	pud_dma_ptr = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pud_dma, GFP_KERNEL);
+	pud_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pud_dma);
 	if (!pud_dma_ptr)
 		return NULL;
 
@@ -131,7 +172,7 @@ err_free_pmd_ptrs:
 	kfree(pgtable->pmd_ptrs[pgd_idx]);
 
 err_free_pud_dma_ptr:
-	ivpu_mmu_pgtable_free(vdev, pud_dma_ptr, pud_dma);
+	ivpu_pgtable_free_page(vdev, pud_dma_ptr, pud_dma);
 	return NULL;
 }
 
@@ -145,7 +186,7 @@ ivpu_mmu_ensure_pmd(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable,
 	if (pmd_dma_ptr)
 		return pmd_dma_ptr;
 
-	pmd_dma_ptr = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pmd_dma, GFP_KERNEL);
+	pmd_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pmd_dma);
 	if (!pmd_dma_ptr)
 		return NULL;
 
@@ -160,7 +201,7 @@ ivpu_mmu_ensure_pmd(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable,
 	return pmd_dma_ptr;
 
 err_free_pmd_dma_ptr:
-	ivpu_mmu_pgtable_free(vdev, pmd_dma_ptr, pmd_dma);
+	ivpu_pgtable_free_page(vdev, pmd_dma_ptr, pmd_dma);
 	return NULL;
 }
 
@@ -174,7 +215,7 @@ ivpu_mmu_ensure_pte(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable,
 	if (pte_dma_ptr)
 		return pte_dma_ptr;
 
-	pte_dma_ptr = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pte_dma, GFP_KERNEL);
+	pte_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pte_dma);
 	if (!pte_dma_ptr)
 		return NULL;
 
@@ -249,38 +290,6 @@ static void ivpu_mmu_context_unmap_page(struct ivpu_mmu_context *ctx, u64 vpu_ad
 	ctx->pgtable.pte_ptrs[pgd_idx][pud_idx][pmd_idx][pte_idx] = IVPU_MMU_ENTRY_INVALID;
 }
 
-static void
-ivpu_mmu_context_flush_page_tables(struct ivpu_mmu_context *ctx, u64 vpu_addr, size_t size)
-{
-	struct ivpu_mmu_pgtable *pgtable = &ctx->pgtable;
-	u64 end_addr = vpu_addr + size;
-
-	/* Align to PMD entry (2 MB) */
-	vpu_addr &= ~(IVPU_MMU_PTE_MAP_SIZE - 1);
-
-	while (vpu_addr < end_addr) {
-		int pgd_idx = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr);
-		u64 pud_end = (pgd_idx + 1) * (u64)IVPU_MMU_PUD_MAP_SIZE;
-
-		while (vpu_addr < end_addr && vpu_addr < pud_end) {
-			int pud_idx = FIELD_GET(IVPU_MMU_PUD_INDEX_MASK, vpu_addr);
-			u64 pmd_end = (pud_idx + 1) * (u64)IVPU_MMU_PMD_MAP_SIZE;
-
-			while (vpu_addr < end_addr && vpu_addr < pmd_end) {
-				int pmd_idx = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr);
-
-				clflush_cache_range(pgtable->pte_ptrs[pgd_idx][pud_idx][pmd_idx],
-						    IVPU_MMU_PGTABLE_SIZE);
-				vpu_addr += IVPU_MMU_PTE_MAP_SIZE;
-			}
-			clflush_cache_range(pgtable->pmd_ptrs[pgd_idx][pud_idx],
-					    IVPU_MMU_PGTABLE_SIZE);
-		}
-		clflush_cache_range(pgtable->pud_ptrs[pgd_idx], IVPU_MMU_PGTABLE_SIZE);
-	}
-	clflush_cache_range(pgtable->pgd_dma_ptr, IVPU_MMU_PGTABLE_SIZE);
-}
-
 static int
 ivpu_mmu_context_map_pages(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
 			   u64 vpu_addr, dma_addr_t dma_addr, size_t size, u64 prot)
@@ -327,6 +336,9 @@ ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
 	u64 prot;
 	u64 i;
 
+	if (drm_WARN_ON(&vdev->drm, !ctx))
+		return -EINVAL;
+
 	if (!IS_ALIGNED(vpu_addr, IVPU_MMU_PAGE_SIZE))
 		return -EINVAL;
 
@@ -343,16 +355,21 @@ ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
 		dma_addr_t dma_addr = sg_dma_address(sg) - sg->offset;
 		size_t size = sg_dma_len(sg) + sg->offset;
 
+		ivpu_dbg(vdev, MMU_MAP, "Map ctx: %u dma_addr: 0x%llx vpu_addr: 0x%llx size: %lu\n",
+			 ctx->id, dma_addr, vpu_addr, size);
+
 		ret = ivpu_mmu_context_map_pages(vdev, ctx, vpu_addr, dma_addr, size, prot);
 		if (ret) {
 			ivpu_err(vdev, "Failed to map context pages\n");
 			mutex_unlock(&ctx->lock);
 			return ret;
 		}
-		ivpu_mmu_context_flush_page_tables(ctx, vpu_addr, size);
 		vpu_addr += size;
 	}
 
+	/* Ensure page table modifications are flushed from wc buffers to memory */
+	wmb();
+
 	mutex_unlock(&ctx->lock);
 
 	ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id);
@@ -369,19 +386,25 @@ ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ct
 	int ret;
 	u64 i;
 
-	if (!IS_ALIGNED(vpu_addr, IVPU_MMU_PAGE_SIZE))
-		ivpu_warn(vdev, "Unaligned vpu_addr: 0x%llx\n", vpu_addr);
+	if (drm_WARN_ON(&vdev->drm, !ctx))
+		return;
 
 	mutex_lock(&ctx->lock);
 
 	for_each_sgtable_dma_sg(sgt, sg, i) {
+		dma_addr_t dma_addr = sg_dma_address(sg) - sg->offset;
 		size_t size = sg_dma_len(sg) + sg->offset;
 
+		ivpu_dbg(vdev, MMU_MAP, "Unmap ctx: %u dma_addr: 0x%llx vpu_addr: 0x%llx size: %lu\n",
+			 ctx->id, dma_addr, vpu_addr, size);
+
 		ivpu_mmu_context_unmap_pages(ctx, vpu_addr, size);
-		ivpu_mmu_context_flush_page_tables(ctx, vpu_addr, size);
 		vpu_addr += size;
 	}
 
+	/* Ensure page table modifications are flushed from wc buffers to memory */
+	wmb();
+
 	mutex_unlock(&ctx->lock);
 
 	ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id);
@@ -390,28 +413,34 @@ ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ct
 }
 
 int
-ivpu_mmu_context_insert_node_locked(struct ivpu_mmu_context *ctx,
-				    const struct ivpu_addr_range *range,
-				    u64 size, struct drm_mm_node *node)
+ivpu_mmu_context_insert_node(struct ivpu_mmu_context *ctx, const struct ivpu_addr_range *range,
+			     u64 size, struct drm_mm_node *node)
 {
-	lockdep_assert_held(&ctx->lock);
+	int ret;
+
+	WARN_ON(!range);
 
+	mutex_lock(&ctx->lock);
 	if (!ivpu_disable_mmu_cont_pages && size >= IVPU_MMU_CONT_PAGES_SIZE) {
-		if (!drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_CONT_PAGES_SIZE, 0,
-						 range->start, range->end, DRM_MM_INSERT_BEST))
-			return 0;
+		ret = drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_CONT_PAGES_SIZE, 0,
+						  range->start, range->end, DRM_MM_INSERT_BEST);
+		if (!ret)
+			goto unlock;
 	}
 
-	return drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_PAGE_SIZE, 0,
-					   range->start, range->end, DRM_MM_INSERT_BEST);
+	ret = drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_PAGE_SIZE, 0,
+					  range->start, range->end, DRM_MM_INSERT_BEST);
+unlock:
+	mutex_unlock(&ctx->lock);
+	return ret;
 }
 
 void
-ivpu_mmu_context_remove_node_locked(struct ivpu_mmu_context *ctx, struct drm_mm_node *node)
+ivpu_mmu_context_remove_node(struct ivpu_mmu_context *ctx, struct drm_mm_node *node)
 {
-	lockdep_assert_held(&ctx->lock);
-
+	mutex_lock(&ctx->lock);
 	drm_mm_remove_node(node);
+	mutex_unlock(&ctx->lock);
 }
 
 static int
@@ -421,7 +450,6 @@ ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u3
 	int ret;
 
 	mutex_init(&ctx->lock);
-	INIT_LIST_HEAD(&ctx->bo_list);
 
 	ret = ivpu_mmu_pgtable_init(vdev, &ctx->pgtable);
 	if (ret) {
diff --git a/drivers/accel/ivpu/ivpu_mmu_context.h b/drivers/accel/ivpu/ivpu_mmu_context.h
index f15d8c630d..535db3a1fc 100644
--- a/drivers/accel/ivpu/ivpu_mmu_context.h
+++ b/drivers/accel/ivpu/ivpu_mmu_context.h
@@ -23,10 +23,9 @@ struct ivpu_mmu_pgtable {
 };
 
 struct ivpu_mmu_context {
-	struct mutex lock; /* protects: mm, pgtable, bo_list */
+	struct mutex lock; /* Protects: mm, pgtable */
 	struct drm_mm mm;
 	struct ivpu_mmu_pgtable pgtable;
-	struct list_head bo_list;
 	u32 id;
 };
 
@@ -39,11 +38,9 @@ int ivpu_mmu_user_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context
 void ivpu_mmu_user_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx);
 void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid);
 
-int ivpu_mmu_context_insert_node_locked(struct ivpu_mmu_context *ctx,
-					const struct ivpu_addr_range *range,
-					u64 size, struct drm_mm_node *node);
-void ivpu_mmu_context_remove_node_locked(struct ivpu_mmu_context *ctx,
-					 struct drm_mm_node *node);
+int ivpu_mmu_context_insert_node(struct ivpu_mmu_context *ctx, const struct ivpu_addr_range *range,
+				 u64 size, struct drm_mm_node *node);
+void ivpu_mmu_context_remove_node(struct ivpu_mmu_context *ctx, struct drm_mm_node *node);
 
 int ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
 			     u64 vpu_addr, struct sg_table *sgt, bool llc_coherent);
diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c
index e9b16cbc26..a15d30d094 100644
--- a/drivers/accel/ivpu/ivpu_pm.c
+++ b/drivers/accel/ivpu/ivpu_pm.c
@@ -13,8 +13,10 @@
 #include "ivpu_drv.h"
 #include "ivpu_hw.h"
 #include "ivpu_fw.h"
+#include "ivpu_fw_log.h"
 #include "ivpu_ipc.h"
 #include "ivpu_job.h"
+#include "ivpu_jsm_msg.h"
 #include "ivpu_mmu.h"
 #include "ivpu_pm.h"
 
@@ -22,6 +24,10 @@ static bool ivpu_disable_recovery;
 module_param_named_unsafe(disable_recovery, ivpu_disable_recovery, bool, 0644);
 MODULE_PARM_DESC(disable_recovery, "Disables recovery when VPU hang is detected");
 
+static unsigned long ivpu_tdr_timeout_ms;
+module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, ulong, 0644);
+MODULE_PARM_DESC(tdr_timeout_ms, "Timeout for device hang detection, in milliseconds, 0 - default");
+
 #define PM_RESCHEDULE_LIMIT     5
 
 static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev)
@@ -52,11 +58,14 @@ static int ivpu_suspend(struct ivpu_device *vdev)
 {
 	int ret;
 
+	/* Save PCI state before powering down as it sometimes gets corrupted if NPU hangs */
+	pci_save_state(to_pci_dev(vdev->drm.dev));
+
 	ret = ivpu_shutdown(vdev);
-	if (ret) {
+	if (ret)
 		ivpu_err(vdev, "Failed to shutdown VPU: %d\n", ret);
-		return ret;
-	}
+
+	pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
 
 	return ret;
 }
@@ -66,30 +75,38 @@ static int ivpu_resume(struct ivpu_device *vdev)
 	int ret;
 
 retry:
+	pci_restore_state(to_pci_dev(vdev->drm.dev));
+	pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D0);
+
 	ret = ivpu_hw_power_up(vdev);
 	if (ret) {
 		ivpu_err(vdev, "Failed to power up HW: %d\n", ret);
-		return ret;
+		goto err_power_down;
 	}
 
 	ret = ivpu_mmu_enable(vdev);
 	if (ret) {
 		ivpu_err(vdev, "Failed to resume MMU: %d\n", ret);
-		ivpu_hw_power_down(vdev);
-		return ret;
+		goto err_power_down;
 	}
 
 	ret = ivpu_boot(vdev);
-	if (ret) {
-		ivpu_mmu_disable(vdev);
-		ivpu_hw_power_down(vdev);
-		if (!ivpu_fw_is_cold_boot(vdev)) {
-			ivpu_warn(vdev, "Failed to resume the FW: %d. Retrying cold boot..\n", ret);
-			ivpu_pm_prepare_cold_boot(vdev);
-			goto retry;
-		} else {
-			ivpu_err(vdev, "Failed to resume the FW: %d\n", ret);
-		}
+	if (ret)
+		goto err_mmu_disable;
+
+	return 0;
+
+err_mmu_disable:
+	ivpu_mmu_disable(vdev);
+err_power_down:
+	ivpu_hw_power_down(vdev);
+	pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
+
+	if (!ivpu_fw_is_cold_boot(vdev)) {
+		ivpu_pm_prepare_cold_boot(vdev);
+		goto retry;
+	} else {
+		ivpu_err(vdev, "Failed to resume the FW: %d\n", ret);
 	}
 
 	return ret;
@@ -102,22 +119,37 @@ static void ivpu_pm_recovery_work(struct work_struct *work)
 	char *evt[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL};
 	int ret;
 
-retry:
-	ret = pci_try_reset_function(to_pci_dev(vdev->drm.dev));
-	if (ret == -EAGAIN && !drm_dev_is_unplugged(&vdev->drm)) {
-		cond_resched();
-		goto retry;
-	}
+	ivpu_err(vdev, "Recovering the VPU (reset #%d)\n", atomic_read(&vdev->pm->reset_counter));
+
+	ret = pm_runtime_resume_and_get(vdev->drm.dev);
+	if (ret)
+		ivpu_err(vdev, "Failed to resume VPU: %d\n", ret);
+
+	ivpu_fw_log_dump(vdev);
+
+	atomic_inc(&vdev->pm->reset_counter);
+	atomic_set(&vdev->pm->reset_pending, 1);
+	down_write(&vdev->pm->reset_lock);
+
+	ivpu_suspend(vdev);
+	ivpu_pm_prepare_cold_boot(vdev);
+	ivpu_jobs_abort_all(vdev);
+
+	ret = ivpu_resume(vdev);
+	if (ret)
+		ivpu_err(vdev, "Failed to resume NPU: %d\n", ret);
 
-	if (ret && ret != -EAGAIN)
-		ivpu_err(vdev, "Failed to reset VPU: %d\n", ret);
+	up_write(&vdev->pm->reset_lock);
+	atomic_set(&vdev->pm->reset_pending, 0);
 
 	kobject_uevent_env(&vdev->drm.dev->kobj, KOBJ_CHANGE, evt);
+	pm_runtime_mark_last_busy(vdev->drm.dev);
+	pm_runtime_put_autosuspend(vdev->drm.dev);
 }
 
-void ivpu_pm_schedule_recovery(struct ivpu_device *vdev)
+void ivpu_pm_trigger_recovery(struct ivpu_device *vdev, const char *reason)
 {
-	struct ivpu_pm_info *pm = vdev->pm;
+	ivpu_err(vdev, "Recovery triggered by %s\n", reason);
 
 	if (ivpu_disable_recovery) {
 		ivpu_err(vdev, "Recovery not available when disable_recovery param is set\n");
@@ -129,13 +161,35 @@ void ivpu_pm_schedule_recovery(struct ivpu_device *vdev)
 		return;
 	}
 
-	/* Schedule recovery if it's not in progress */
-	if (atomic_cmpxchg(&pm->in_reset, 0, 1) == 0) {
-		ivpu_hw_irq_disable(vdev);
-		queue_work(system_long_wq, &pm->recovery_work);
+	/* Trigger recovery if it's not in progress */
+	if (atomic_cmpxchg(&vdev->pm->reset_pending, 0, 1) == 0) {
+		ivpu_hw_diagnose_failure(vdev);
+		ivpu_hw_irq_disable(vdev); /* Disable IRQ early to protect from IRQ storm */
+		queue_work(system_long_wq, &vdev->pm->recovery_work);
 	}
 }
 
+static void ivpu_job_timeout_work(struct work_struct *work)
+{
+	struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, job_timeout_work.work);
+	struct ivpu_device *vdev = pm->vdev;
+
+	ivpu_pm_trigger_recovery(vdev, "TDR");
+}
+
+void ivpu_start_job_timeout_detection(struct ivpu_device *vdev)
+{
+	unsigned long timeout_ms = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr;
+
+	/* No-op if already queued */
+	queue_delayed_work(system_wq, &vdev->pm->job_timeout_work, msecs_to_jiffies(timeout_ms));
+}
+
+void ivpu_stop_job_timeout_detection(struct ivpu_device *vdev)
+{
+	cancel_delayed_work_sync(&vdev->pm->job_timeout_work);
+}
+
 int ivpu_pm_suspend_cb(struct device *dev)
 {
 	struct drm_device *drm = dev_get_drvdata(dev);
@@ -153,12 +207,11 @@ int ivpu_pm_suspend_cb(struct device *dev)
 		}
 	}
 
+	ivpu_jsm_pwr_d0i3_enter(vdev);
+
 	ivpu_suspend(vdev);
 	ivpu_pm_prepare_warm_boot(vdev);
 
-	pci_save_state(to_pci_dev(dev));
-	pci_set_power_state(to_pci_dev(dev), PCI_D3hot);
-
 	ivpu_dbg(vdev, PM, "Suspend done.\n");
 
 	return 0;
@@ -172,9 +225,6 @@ int ivpu_pm_resume_cb(struct device *dev)
 
 	ivpu_dbg(vdev, PM, "Resume..\n");
 
-	pci_set_power_state(to_pci_dev(dev), PCI_D0);
-	pci_restore_state(to_pci_dev(dev));
-
 	ret = ivpu_resume(vdev);
 	if (ret)
 		ivpu_err(vdev, "Failed to resume: %d\n", ret);
@@ -188,8 +238,12 @@ int ivpu_pm_runtime_suspend_cb(struct device *dev)
 {
 	struct drm_device *drm = dev_get_drvdata(dev);
 	struct ivpu_device *vdev = to_ivpu_device(drm);
+	bool hw_is_idle = true;
 	int ret;
 
+	drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa));
+	drm_WARN_ON(&vdev->drm, work_pending(&vdev->pm->recovery_work));
+
 	ivpu_dbg(vdev, PM, "Runtime suspend..\n");
 
 	if (!ivpu_hw_is_idle(vdev) && vdev->pm->suspend_reschedule_counter) {
@@ -200,12 +254,18 @@ int ivpu_pm_runtime_suspend_cb(struct device *dev)
 		return -EAGAIN;
 	}
 
+	if (!vdev->pm->suspend_reschedule_counter)
+		hw_is_idle = false;
+	else if (ivpu_jsm_pwr_d0i3_enter(vdev))
+		hw_is_idle = false;
+
 	ret = ivpu_suspend(vdev);
 	if (ret)
 		ivpu_err(vdev, "Failed to set suspend VPU: %d\n", ret);
 
-	if (!vdev->pm->suspend_reschedule_counter) {
-		ivpu_warn(vdev, "VPU failed to enter idle, force suspended.\n");
+	if (!hw_is_idle) {
+		ivpu_err(vdev, "VPU failed to enter idle, force suspended.\n");
+		ivpu_fw_log_dump(vdev);
 		ivpu_pm_prepare_cold_boot(vdev);
 	} else {
 		ivpu_pm_prepare_warm_boot(vdev);
@@ -266,11 +326,12 @@ void ivpu_pm_reset_prepare_cb(struct pci_dev *pdev)
 {
 	struct ivpu_device *vdev = pci_get_drvdata(pdev);
 
-	pm_runtime_get_sync(vdev->drm.dev);
-
 	ivpu_dbg(vdev, PM, "Pre-reset..\n");
 	atomic_inc(&vdev->pm->reset_counter);
-	atomic_set(&vdev->pm->in_reset, 1);
+	atomic_set(&vdev->pm->reset_pending, 1);
+
+	pm_runtime_get_sync(vdev->drm.dev);
+	down_write(&vdev->pm->reset_lock);
 	ivpu_prepare_for_reset(vdev);
 	ivpu_hw_reset(vdev);
 	ivpu_pm_prepare_cold_boot(vdev);
@@ -287,9 +348,11 @@ void ivpu_pm_reset_done_cb(struct pci_dev *pdev)
 	ret = ivpu_resume(vdev);
 	if (ret)
 		ivpu_err(vdev, "Failed to set RESUME state: %d\n", ret);
-	atomic_set(&vdev->pm->in_reset, 0);
+	up_write(&vdev->pm->reset_lock);
+	atomic_set(&vdev->pm->reset_pending, 0);
 	ivpu_dbg(vdev, PM, "Post-reset done.\n");
 
+	pm_runtime_mark_last_busy(vdev->drm.dev);
 	pm_runtime_put_autosuspend(vdev->drm.dev);
 }
 
@@ -302,8 +365,12 @@ void ivpu_pm_init(struct ivpu_device *vdev)
 	pm->vdev = vdev;
 	pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT;
 
-	atomic_set(&pm->in_reset, 0);
+	init_rwsem(&pm->reset_lock);
+	atomic_set(&pm->reset_pending, 0);
+	atomic_set(&pm->reset_counter, 0);
+
 	INIT_WORK(&pm->recovery_work, ivpu_pm_recovery_work);
+	INIT_DELAYED_WORK(&pm->job_timeout_work, ivpu_job_timeout_work);
 
 	if (ivpu_disable_recovery)
 		delay = -1;
@@ -318,6 +385,7 @@ void ivpu_pm_init(struct ivpu_device *vdev)
 
 void ivpu_pm_cancel_recovery(struct ivpu_device *vdev)
 {
+	drm_WARN_ON(&vdev->drm, delayed_work_pending(&vdev->pm->job_timeout_work));
 	cancel_work_sync(&vdev->pm->recovery_work);
 }
 
diff --git a/drivers/accel/ivpu/ivpu_pm.h b/drivers/accel/ivpu/ivpu_pm.h
index 044db150be..ec60fbeefe 100644
--- a/drivers/accel/ivpu/ivpu_pm.h
+++ b/drivers/accel/ivpu/ivpu_pm.h
@@ -6,15 +6,18 @@
 #ifndef __IVPU_PM_H__
 #define __IVPU_PM_H__
 
+#include <linux/rwsem.h>
 #include <linux/types.h>
 
 struct ivpu_device;
 
 struct ivpu_pm_info {
 	struct ivpu_device *vdev;
+	struct delayed_work job_timeout_work;
 	struct work_struct recovery_work;
-	atomic_t in_reset;
+	struct rw_semaphore reset_lock;
 	atomic_t reset_counter;
+	atomic_t reset_pending;
 	bool is_warmboot;
 	u32 suspend_reschedule_counter;
 };
@@ -36,6 +39,8 @@ int __must_check ivpu_rpm_get(struct ivpu_device *vdev);
 int __must_check ivpu_rpm_get_if_active(struct ivpu_device *vdev);
 void ivpu_rpm_put(struct ivpu_device *vdev);
 
-void ivpu_pm_schedule_recovery(struct ivpu_device *vdev);
+void ivpu_pm_trigger_recovery(struct ivpu_device *vdev, const char *reason);
+void ivpu_start_job_timeout_detection(struct ivpu_device *vdev);
+void ivpu_stop_job_timeout_detection(struct ivpu_device *vdev);
 
 #endif /* __IVPU_PM_H__ */
diff --git a/drivers/accel/ivpu/vpu_boot_api.h b/drivers/accel/ivpu/vpu_boot_api.h
index 6b71be92ba..04c9542585 100644
--- a/drivers/accel/ivpu/vpu_boot_api.h
+++ b/drivers/accel/ivpu/vpu_boot_api.h
@@ -11,7 +11,10 @@
  *  The bellow values will be used to construct the version info this way:
  *  fw_bin_header->api_version[VPU_BOOT_API_VER_ID] = (VPU_BOOT_API_VER_MAJOR << 16) |
  *  VPU_BOOT_API_VER_MINOR;
- *  VPU_BOOT_API_VER_PATCH will be ignored. KMD and compatibility is not affected if this changes.
+ *  VPU_BOOT_API_VER_PATCH will be ignored. KMD and compatibility is not affected if this changes
+ *  This information is collected by using vpuip_2/application/vpuFirmware/make_std_fw_image.py
+ *  If a header is missing this info we ignore the header, if a header is missing or contains
+ *  partial info a build error will be generated.
  */
 
 /*
@@ -24,12 +27,12 @@
  * Minor version changes when API backward compatibility is preserved.
  * Resets to 0 if Major version is incremented.
  */
-#define VPU_BOOT_API_VER_MINOR 12
+#define VPU_BOOT_API_VER_MINOR 20
 
 /*
  * API header changed (field names, documentation, formatting) but API itself has not been changed
  */
-#define VPU_BOOT_API_VER_PATCH 2
+#define VPU_BOOT_API_VER_PATCH 4
 
 /*
  * Index in the API version table
@@ -63,6 +66,12 @@ struct vpu_firmware_header {
 	/* Size of memory require for firmware execution */
 	u32 runtime_size;
 	u32 shave_nn_fw_size;
+	/* Size of primary preemption buffer. */
+	u32 preemption_buffer_1_size;
+	/* Size of secondary preemption buffer. */
+	u32 preemption_buffer_2_size;
+	/* Space reserved for future preemption-related fields. */
+	u32 preemption_reserved[6];
 };
 
 /*
@@ -89,6 +98,14 @@ enum VPU_BOOT_L2_CACHE_CFG_TYPE {
 	VPU_BOOT_L2_CACHE_CFG_NUM = 2
 };
 
+/** VPU MCA ECC signalling mode. By default, no signalling is used */
+enum VPU_BOOT_MCA_ECC_SIGNAL_TYPE {
+	VPU_BOOT_MCA_ECC_NONE = 0,
+	VPU_BOOT_MCA_ECC_CORR = 1,
+	VPU_BOOT_MCA_ECC_FATAL = 2,
+	VPU_BOOT_MCA_ECC_BOTH = 3
+};
+
 /**
  * Logging destinations.
  *
@@ -131,9 +148,11 @@ enum vpu_trace_destination {
 #define VPU_TRACE_PROC_BIT_ACT_SHV_3 22
 #define VPU_TRACE_PROC_NO_OF_HW_DEVS 23
 
-/* KMB HW component IDs are sequential, so define first and last IDs. */
-#define VPU_TRACE_PROC_BIT_KMB_FIRST VPU_TRACE_PROC_BIT_LRT
-#define VPU_TRACE_PROC_BIT_KMB_LAST  VPU_TRACE_PROC_BIT_SHV_15
+/* VPU 30xx HW component IDs are sequential, so define first and last IDs. */
+#define VPU_TRACE_PROC_BIT_30XX_FIRST VPU_TRACE_PROC_BIT_LRT
+#define VPU_TRACE_PROC_BIT_30XX_LAST  VPU_TRACE_PROC_BIT_SHV_15
+#define VPU_TRACE_PROC_BIT_KMB_FIRST  VPU_TRACE_PROC_BIT_30XX_FIRST
+#define VPU_TRACE_PROC_BIT_KMB_LAST   VPU_TRACE_PROC_BIT_30XX_LAST
 
 struct vpu_boot_l2_cache_config {
 	u8 use;
@@ -148,6 +167,25 @@ struct vpu_warm_boot_section {
 	u32 is_clear_op;
 };
 
+/*
+ * When HW scheduling mode is enabled, a present period is defined.
+ * It will be used by VPU to swap between normal and focus priorities
+ * to prevent starving of normal priority band (when implemented).
+ * Host must provide a valid value at boot time in
+ * `vpu_focus_present_timer_ms`. If the value provided by the host is not within the
+ * defined range a default value will be used. Here we define the min. and max.
+ * allowed values and the and default value of the present period. Units are milliseconds.
+ */
+#define VPU_PRESENT_CALL_PERIOD_MS_DEFAULT 50
+#define VPU_PRESENT_CALL_PERIOD_MS_MIN	   16
+#define VPU_PRESENT_CALL_PERIOD_MS_MAX	   10000
+
+/**
+ * Macros to enable various operation modes within the VPU.
+ * To be defined as part of 32 bit mask.
+ */
+#define VPU_OP_MODE_SURVIVABILITY 0x1
+
 struct vpu_boot_params {
 	u32 magic;
 	u32 vpu_id;
@@ -218,6 +256,7 @@ struct vpu_boot_params {
 	 * the threshold will not be logged); applies to every enabled logging
 	 * destination and loggable HW component. See 'mvLog_t' enum for acceptable
 	 * values.
+	 * TODO: EISW-33556: Move log level definition (mvLog_t) to this file.
 	 */
 	u32 default_trace_level;
 	u32 boot_type;
@@ -249,7 +288,36 @@ struct vpu_boot_params {
 	u32 temp_sensor_period_ms;
 	/** PLL ratio for efficient clock frequency */
 	u32 pn_freq_pll_ratio;
-	u32 pad4[28];
+	/** DVFS Mode: Default: 0, Max Performance: 1, On Demand: 2, Power Save: 3 */
+	u32 dvfs_mode;
+	/**
+	 * Depending on DVFS Mode:
+	 * On-demand: Default if 0.
+	 *    Bit 0-7   - uint8_t: Highest residency percent
+	 *    Bit 8-15  - uint8_t: High residency percent
+	 *    Bit 16-23 - uint8_t: Low residency percent
+	 *    Bit 24-31 - uint8_t: Lowest residency percent
+	 *    Bit 32-35 - unsigned 4b: PLL Ratio increase amount on highest residency
+	 *    Bit 36-39 - unsigned 4b: PLL Ratio increase amount on high residency
+	 *    Bit 40-43 - unsigned 4b: PLL Ratio decrease amount on low residency
+	 *    Bit 44-47 - unsigned 4b: PLL Ratio decrease amount on lowest frequency
+	 *    Bit 48-55 - uint8_t: Period (ms) for residency decisions
+	 *    Bit 56-63 - uint8_t: Averaging windows (as multiples of period. Max: 30 decimal)
+	 * Power Save/Max Performance: Unused
+	 */
+	u64 dvfs_param;
+	/**
+	 * D0i3 delayed entry
+	 * Bit0: Disable CPU state save on D0i2 entry flow.
+	 *       0: Every D0i2 entry saves state. Save state IPC message ignored.
+	 *       1: IPC message required to save state on D0i3 entry flow.
+	 */
+	u32 d0i3_delayed_entry;
+	/* Time spent by VPU in D0i3 state */
+	u64 d0i3_residency_time_us;
+	/* Value of VPU perf counter at the time of entering D0i3 state . */
+	u64 d0i3_entry_vpu_ts;
+	u32 pad4[20];
 	/* Warm boot information: 0x400 - 0x43F */
 	u32 warm_boot_sections_count;
 	u32 warm_boot_start_address_reference;
@@ -274,8 +342,12 @@ struct vpu_boot_params {
 	u32 vpu_scheduling_mode;
 	/* Present call period in milliseconds. */
 	u32 vpu_focus_present_timer_ms;
-	/* Unused/reserved: 0x478 - 0xFFF */
-	u32 pad6[738];
+	/* VPU ECC Signaling */
+	u32 vpu_uses_ecc_mca_signal;
+	/* Values defined by VPU_OP_MODE* macros */
+	u32 vpu_operation_mode;
+	/* Unused/reserved: 0x480 - 0xFFF */
+	u32 pad6[736];
 };
 
 /*
diff --git a/drivers/accel/ivpu/vpu_jsm_api.h b/drivers/accel/ivpu/vpu_jsm_api.h
index 2949ec8365..7da7622742 100644
--- a/drivers/accel/ivpu/vpu_jsm_api.h
+++ b/drivers/accel/ivpu/vpu_jsm_api.h
@@ -22,12 +22,12 @@
 /*
  * Minor version changes when API backward compatibility is preserved.
  */
-#define VPU_JSM_API_VER_MINOR 0
+#define VPU_JSM_API_VER_MINOR 15
 
 /*
  * API header changed (field names, documentation, formatting) but API itself has not been changed
  */
-#define VPU_JSM_API_VER_PATCH 1
+#define VPU_JSM_API_VER_PATCH 0
 
 /*
  * Index in the API version table
@@ -84,11 +84,13 @@
  * Job flags bit masks.
  */
 #define VPU_JOB_FLAGS_NULL_SUBMISSION_MASK 0x00000001
+#define VPU_JOB_FLAGS_PRIVATE_DATA_MASK	   0xFF000000
 
 /*
  * Sizes of the reserved areas in jobs, in bytes.
  */
-#define VPU_JOB_RESERVED_BYTES	     16
+#define VPU_JOB_RESERVED_BYTES 8
+
 /*
  * Sizes of the reserved areas in job queues, in bytes.
  */
@@ -108,6 +110,20 @@
  */
 #define VPU_DYNDBG_CMD_MAX_LEN 96
 
+/*
+ * For HWS command queue scheduling, we can prioritise command queues inside the
+ * same process with a relative in-process priority. Valid values for relative
+ * priority are given below - max and min.
+ */
+#define VPU_HWS_COMMAND_QUEUE_MAX_IN_PROCESS_PRIORITY 7
+#define VPU_HWS_COMMAND_QUEUE_MIN_IN_PROCESS_PRIORITY -7
+
+/*
+ * For HWS priority scheduling, we can have multiple realtime priority bands.
+ * They are numbered 0 to a MAX.
+ */
+#define VPU_HWS_MAX_REALTIME_PRIORITY_LEVEL 31U
+
 /*
  * Job format.
  */
@@ -117,8 +133,14 @@ struct vpu_job_queue_entry {
 	u32 flags; /**< Flags bit field, see VPU_JOB_FLAGS_* above */
 	u64 root_page_table_addr; /**< Address of root page table to use for this job */
 	u64 root_page_table_update_counter; /**< Page tables update events counter */
-	u64 preemption_buffer_address; /**< Address of the preemption buffer to use for this job */
-	u64 preemption_buffer_size; /**< Size of the preemption buffer to use for this job */
+	u64 primary_preempt_buf_addr;
+	/**< Address of the primary preemption buffer to use for this job */
+	u32 primary_preempt_buf_size;
+	/**< Size of the primary preemption buffer to use for this job */
+	u32 secondary_preempt_buf_size;
+	/**< Size of secondary preemption buffer to use for this job */
+	u64 secondary_preempt_buf_addr;
+	/**< Address of secondary preemption buffer to use for this job */
 	u8 reserved_0[VPU_JOB_RESERVED_BYTES];
 };
 
@@ -152,6 +174,46 @@ enum vpu_trace_entity_type {
 	VPU_TRACE_ENTITY_TYPE_HW_COMPONENT = 2,
 };
 
+/*
+ * HWS specific log buffer header details.
+ * Total size is 32 bytes.
+ */
+struct vpu_hws_log_buffer_header {
+	/* Written by VPU after adding a log entry. Initialised by host to 0. */
+	u32 first_free_entry_index;
+	/* Incremented by VPU every time the VPU overwrites the 0th entry;
+	 * initialised by host to 0.
+	 */
+	u32 wraparound_count;
+	/*
+	 * This is the number of buffers that can be stored in the log buffer provided by the host.
+	 * It is written by host before passing buffer to VPU. VPU should consider it read-only.
+	 */
+	u64 num_of_entries;
+	u64 reserved[2];
+};
+
+/*
+ * HWS specific log buffer entry details.
+ * Total size is 32 bytes.
+ */
+struct vpu_hws_log_buffer_entry {
+	/* VPU timestamp must be an invariant timer tick (not impacted by DVFS) */
+	u64 vpu_timestamp;
+	/*
+	 * Operation type:
+	 *     0 - context state change
+	 *     1 - queue new work
+	 *     2 - queue unwait sync object
+	 *     3 - queue no more work
+	 *     4 - queue wait sync object
+	 */
+	u32 operation_type;
+	u32 reserved;
+	/* Operation data depends on operation type */
+	u64 operation_data[2];
+};
+
 /*
  * Host <-> VPU IPC messages types.
  */
@@ -228,6 +290,23 @@ enum vpu_ipc_msg_type {
 	 * deallocated or reassigned to another context.
 	 */
 	VPU_JSM_MSG_HWS_REGISTER_DB = 0x1117,
+	/** Control command: Log buffer setting */
+	VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG = 0x1118,
+	/* Control command: Suspend command queue. */
+	VPU_JSM_MSG_HWS_SUSPEND_CMDQ = 0x1119,
+	/* Control command: Resume command queue */
+	VPU_JSM_MSG_HWS_RESUME_CMDQ = 0x111a,
+	/* Control command: Resume engine after reset */
+	VPU_JSM_MSG_HWS_ENGINE_RESUME = 0x111b,
+	/* Control command: Enable survivability/DCT mode */
+	VPU_JSM_MSG_DCT_ENABLE = 0x111c,
+	/* Control command: Disable survivability/DCT mode */
+	VPU_JSM_MSG_DCT_DISABLE = 0x111d,
+	/**
+	 * Dump VPU state. To be used for debug purposes only.
+	 * NOTE: Please introduce new ASYNC commands before this one. *
+	 */
+	VPU_JSM_MSG_STATE_DUMP = 0x11FF,
 	/* IPC Host -> Device, General commands */
 	VPU_JSM_MSG_GENERAL_CMD = 0x1200,
 	VPU_JSM_MSG_BLOB_DEINIT = VPU_JSM_MSG_GENERAL_CMD,
@@ -236,6 +315,10 @@ enum vpu_ipc_msg_type {
 	 * Linux command: `echo '<dyndbg_cmd>' > <debugfs>/dynamic_debug/control`.
 	 */
 	VPU_JSM_MSG_DYNDBG_CONTROL = 0x1201,
+	/**
+	 * Perform the save procedure for the D0i3 entry
+	 */
+	VPU_JSM_MSG_PWR_D0I3_ENTER = 0x1202,
 	/* IPC Device -> Host, Job completion */
 	VPU_JSM_MSG_JOB_DONE = 0x2100,
 	/* IPC Device -> Host, Async command completion */
@@ -304,11 +387,35 @@ enum vpu_ipc_msg_type {
 	VPU_JSM_MSG_DESTROY_CMD_QUEUE_RSP = 0x2216,
 	/** Response to control command: Set context scheduling properties */
 	VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES_RSP = 0x2217,
+	/** Response to control command: Log buffer setting */
+	VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG_RSP = 0x2218,
+	/* IPC Device -> Host, HWS notify index entry of log buffer written */
+	VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION = 0x2219,
+	/* IPC Device -> Host, HWS completion of a context suspend request */
+	VPU_JSM_MSG_HWS_SUSPEND_CMDQ_DONE = 0x221a,
+	/* Response to control command: Resume command queue */
+	VPU_JSM_MSG_HWS_RESUME_CMDQ_RSP = 0x221b,
+	/* Response to control command: Resume engine command response */
+	VPU_JSM_MSG_HWS_RESUME_ENGINE_DONE = 0x221c,
+	/* Response to control command: Enable survivability/DCT mode */
+	VPU_JSM_MSG_DCT_ENABLE_DONE = 0x221d,
+	/* Response to control command: Disable survivability/DCT mode */
+	VPU_JSM_MSG_DCT_DISABLE_DONE = 0x221e,
+	/**
+	 * Response to state dump control command.
+	 * NOTE: Please introduce new ASYNC responses before this one. *
+	 */
+	VPU_JSM_MSG_STATE_DUMP_RSP = 0x22FF,
 	/* IPC Device -> Host, General command completion */
 	VPU_JSM_MSG_GENERAL_CMD_DONE = 0x2300,
 	VPU_JSM_MSG_BLOB_DEINIT_DONE = VPU_JSM_MSG_GENERAL_CMD_DONE,
 	/** Response to VPU_JSM_MSG_DYNDBG_CONTROL. */
 	VPU_JSM_MSG_DYNDBG_CONTROL_RSP = 0x2301,
+	/**
+	 * Acknowledgment of completion of the save procedure initiated by
+	 * VPU_JSM_MSG_PWR_D0I3_ENTER
+	 */
+	VPU_JSM_MSG_PWR_D0I3_ENTER_DONE = 0x2302,
 };
 
 enum vpu_ipc_msg_status { VPU_JSM_MSG_FREE, VPU_JSM_MSG_ALLOCATED };
@@ -593,12 +700,12 @@ struct vpu_ipc_msg_payload_hws_priority_band_setup {
 	 * Default quantum in 100ns units for scheduling across processes
 	 * within a priority band
 	 */
-	u64 process_quantum[VPU_HWS_NUM_PRIORITY_BANDS];
+	u32 process_quantum[VPU_HWS_NUM_PRIORITY_BANDS];
 	/*
 	 * Default grace period in 100ns units for processes that preempt each
 	 * other within a priority band
 	 */
-	u64 process_grace_period[VPU_HWS_NUM_PRIORITY_BANDS];
+	u32 process_grace_period[VPU_HWS_NUM_PRIORITY_BANDS];
 	/*
 	 * For normal priority band, specifies the target VPU percentage
 	 * in situations when it's starved by the focus band.
@@ -608,32 +715,51 @@ struct vpu_ipc_msg_payload_hws_priority_band_setup {
 	u32 reserved_0;
 };
 
-/* HWS create command queue request */
+/*
+ * @brief HWS create command queue request.
+ * Host will create a command queue via this command.
+ * Note: Cmdq group is a handle of an object which
+ * may contain one or more command queues.
+ * @see VPU_JSM_MSG_CREATE_CMD_QUEUE
+ * @see VPU_JSM_MSG_CREATE_CMD_QUEUE_RSP
+ */
 struct vpu_ipc_msg_payload_hws_create_cmdq {
 	/* Process id */
 	u64 process_id;
 	/* Host SSID */
 	u32 host_ssid;
-	/* Zero Padding */
-	u32 reserved;
+	/* Engine for which queue is being created */
+	u32 engine_idx;
+	/*
+	 * Cmdq group may be set to 0 or equal to
+	 * cmdq_id while each priority band contains
+	 * only single engine instances.
+	 */
+	u64 cmdq_group;
 	/* Command queue id */
 	u64 cmdq_id;
 	/* Command queue base */
 	u64 cmdq_base;
 	/* Command queue size */
 	u32 cmdq_size;
-	/* Reserved */
+	/* Zero padding */
 	u32 reserved_0;
 };
 
-/* HWS create command queue response */
+/*
+ * @brief HWS create command queue response.
+ * @see VPU_JSM_MSG_CREATE_CMD_QUEUE
+ * @see VPU_JSM_MSG_CREATE_CMD_QUEUE_RSP
+ */
 struct vpu_ipc_msg_payload_hws_create_cmdq_rsp {
 	/* Process id */
 	u64 process_id;
 	/* Host SSID */
 	u32 host_ssid;
-	/* Zero Padding */
-	u32 reserved;
+	/* Engine for which queue is being created */
+	u32 engine_idx;
+	/* Command queue group */
+	u64 cmdq_group;
 	/* Command queue id */
 	u64 cmdq_id;
 };
@@ -661,7 +787,7 @@ struct vpu_ipc_msg_payload_hws_set_context_sched_properties {
 	/* Inside realtime band assigns a further priority */
 	u32 realtime_priority_level;
 	/* Priority relative to other contexts in the same process */
-	u32 in_process_priority;
+	s32 in_process_priority;
 	/* Zero padding / Reserved */
 	u32 reserved_1;
 	/* Context quantum relative to other contexts of same priority in the same process */
@@ -694,6 +820,123 @@ struct vpu_jsm_hws_register_db {
 	u64 cmdq_size;
 };
 
+/*
+ * @brief Structure to set another buffer to be used for scheduling-related logging.
+ * The size of the logging buffer and the number of entries is defined as part of the
+ * buffer itself as described next.
+ * The log buffer received from the host is made up of;
+ *   - header:     32 bytes in size, as shown in 'struct vpu_hws_log_buffer_header'.
+ *                 The header contains the number of log entries in the buffer.
+ *   - log entry:  0 to n-1, each log entry is 32 bytes in size, as shown in
+ *                 'struct vpu_hws_log_buffer_entry'.
+ *                 The entry contains the VPU timestamp, operation type and data.
+ * The host should provide the notify index value of log buffer to VPU. This is a
+ * value defined within the log buffer and when written to will generate the
+ * scheduling log notification.
+ * The host should set engine_idx and vpu_log_buffer_va to 0 to disable logging
+ * for a particular engine.
+ * VPU will handle one log buffer for each of supported engines.
+ * VPU should allow the logging to consume one host_ssid.
+ * @see VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG
+ * @see VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG_RSP
+ * @see VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION
+ */
+struct vpu_ipc_msg_payload_hws_set_scheduling_log {
+	/* Engine ordinal */
+	u32 engine_idx;
+	/* Host SSID */
+	u32 host_ssid;
+	/*
+	 * VPU log buffer virtual address.
+	 * Set to 0 to disable logging for this engine.
+	 */
+	u64 vpu_log_buffer_va;
+	/*
+	 * Notify index of log buffer. VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION
+	 * is generated when an event log is written to this index.
+	 */
+	u64 notify_index;
+};
+
+/*
+ * @brief The scheduling log notification is generated by VPU when it writes
+ * an event into the log buffer at the notify_index. VPU notifies host with
+ * VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION. This is an asynchronous
+ * message from VPU to host.
+ * @see VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION
+ * @see VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG
+ */
+struct vpu_ipc_msg_payload_hws_scheduling_log_notification {
+	/* Engine ordinal */
+	u32 engine_idx;
+	/* Zero Padding */
+	u32 reserved_0;
+};
+
+/*
+ * @brief HWS suspend command queue request and done structure.
+ * Host will request the suspend of contexts and VPU will;
+ *   - Suspend all work on this context
+ *   - Preempt any running work
+ *   - Asynchronously perform the above and return success immediately once
+ *     all items above are started successfully
+ *   - Notify the host of completion of these operations via
+ *     VPU_JSM_MSG_HWS_SUSPEND_CMDQ_DONE
+ *   - Reject any other context operations on a context with an in-flight
+ *     suspend request running
+ * Same structure used when VPU notifies host of completion of a context suspend
+ * request. The ids and suspend fence value reported in this command will match
+ * the one in the request from the host to suspend the context. Once suspend is
+ * complete, VPU will not access any data relating to this command queue until
+ * it is resumed.
+ * @see VPU_JSM_MSG_HWS_SUSPEND_CMDQ
+ * @see VPU_JSM_MSG_HWS_SUSPEND_CMDQ_DONE
+ */
+struct vpu_ipc_msg_payload_hws_suspend_cmdq {
+	/* Host SSID */
+	u32 host_ssid;
+	/* Zero Padding */
+	u32 reserved_0;
+	/* Command queue id */
+	u64 cmdq_id;
+	/*
+	 * Suspend fence value - reported by the VPU suspend context
+	 * completed once suspend is complete.
+	 */
+	u64 suspend_fence_value;
+};
+
+/*
+ * @brief HWS Resume command queue request / response structure.
+ * Host will request the resume of a context;
+ *  - VPU will resume all work on this context
+ *  - Scheduler will allow this context to be scheduled
+ * @see VPU_JSM_MSG_HWS_RESUME_CMDQ
+ * @see VPU_JSM_MSG_HWS_RESUME_CMDQ_RSP
+ */
+struct vpu_ipc_msg_payload_hws_resume_cmdq {
+	/* Host SSID */
+	u32 host_ssid;
+	/* Zero Padding */
+	u32 reserved_0;
+	/* Command queue id */
+	u64 cmdq_id;
+};
+
+/*
+ * @brief HWS Resume engine request / response structure.
+ * After a HWS engine reset, all scheduling is stopped on VPU until a engine resume.
+ * Host shall send this command to resume scheduling of any valid queue.
+ * @see VPU_JSM_MSG_HWS_RESUME_ENGINE
+ * @see VPU_JSM_MSG_HWS_RESUME_ENGINE_DONE
+ */
+struct vpu_ipc_msg_payload_hws_resume_engine {
+	/* Engine to be resumed */
+	u32 engine_idx;
+	/* Reserved */
+	u32 reserved_0;
+};
+
 /**
  * Payload for VPU_JSM_MSG_TRACE_SET_CONFIG[_RSP] and
  * VPU_JSM_MSG_TRACE_GET_CONFIG_RSP messages.
@@ -938,6 +1181,35 @@ struct vpu_ipc_msg_payload_dyndbg_control {
 	char dyndbg_cmd[VPU_DYNDBG_CMD_MAX_LEN];
 };
 
+/**
+ * Payload for VPU_JSM_MSG_PWR_D0I3_ENTER
+ *
+ * This is a bi-directional payload.
+ */
+struct vpu_ipc_msg_payload_pwr_d0i3_enter {
+	/**
+	 * 0: VPU_JSM_MSG_PWR_D0I3_ENTER_DONE is not sent to the host driver
+	 *    The driver will poll for D0i2 Idle state transitions.
+	 * 1: VPU_JSM_MSG_PWR_D0I3_ENTER_DONE is sent after VPU state save is complete
+	 */
+	u32 send_response;
+	u32 reserved_0;
+};
+
+/**
+ * Payload for VPU_JSM_MSG_DCT_ENABLE message.
+ *
+ * Default values for DCT active/inactive times are 5.3ms and 30ms respectively,
+ * corresponding to a 85% duty cycle. This payload allows the host to tune these
+ * values according to application requirements.
+ */
+struct vpu_ipc_msg_payload_pwr_dct_control {
+	/** Duty cycle active time in microseconds */
+	u32 dct_active_us;
+	/** Duty cycle inactive time in microseconds */
+	u32 dct_inactive_us;
+};
+
 /*
  * Payloads union, used to define complete message format.
  */
@@ -974,6 +1246,13 @@ union vpu_ipc_msg_payload {
 	struct vpu_ipc_msg_payload_hws_destroy_cmdq hws_destroy_cmdq;
 	struct vpu_ipc_msg_payload_hws_set_context_sched_properties
 		hws_set_context_sched_properties;
+	struct vpu_ipc_msg_payload_hws_set_scheduling_log hws_set_scheduling_log;
+	struct vpu_ipc_msg_payload_hws_scheduling_log_notification hws_scheduling_log_notification;
+	struct vpu_ipc_msg_payload_hws_suspend_cmdq hws_suspend_cmdq;
+	struct vpu_ipc_msg_payload_hws_resume_cmdq hws_resume_cmdq;
+	struct vpu_ipc_msg_payload_hws_resume_engine hws_resume_engine;
+	struct vpu_ipc_msg_payload_pwr_d0i3_enter pwr_d0i3_enter;
+	struct vpu_ipc_msg_payload_pwr_dct_control pwr_dct_control;
 };
 
 /*
diff --git a/drivers/accel/qaic/Makefile b/drivers/accel/qaic/Makefile
index 2418418f7a..3f7f6dfde7 100644
--- a/drivers/accel/qaic/Makefile
+++ b/drivers/accel/qaic/Makefile
@@ -9,4 +9,5 @@ qaic-y := \
 	mhi_controller.o \
 	qaic_control.o \
 	qaic_data.o \
-	qaic_drv.o
+	qaic_drv.o \
+	qaic_timesync.o
diff --git a/drivers/accel/qaic/mhi_controller.c b/drivers/accel/qaic/mhi_controller.c
index 1405623b03..cb77d048ed 100644
--- a/drivers/accel/qaic/mhi_controller.c
+++ b/drivers/accel/qaic/mhi_controller.c
@@ -348,7 +348,7 @@ static struct mhi_channel_config aic100_channels[] = {
 		.local_elements = 0,
 		.event_ring = 0,
 		.dir = DMA_TO_DEVICE,
-		.ee_mask = MHI_CH_EE_SBL | MHI_CH_EE_AMSS,
+		.ee_mask = MHI_CH_EE_SBL,
 		.pollcfg = 0,
 		.doorbell = MHI_DB_BRST_DISABLE,
 		.lpm_notify = false,
@@ -364,7 +364,39 @@ static struct mhi_channel_config aic100_channels[] = {
 		.local_elements = 0,
 		.event_ring = 0,
 		.dir = DMA_FROM_DEVICE,
-		.ee_mask = MHI_CH_EE_SBL | MHI_CH_EE_AMSS,
+		.ee_mask = MHI_CH_EE_SBL,
+		.pollcfg = 0,
+		.doorbell = MHI_DB_BRST_DISABLE,
+		.lpm_notify = false,
+		.offload_channel = false,
+		.doorbell_mode_switch = false,
+		.auto_queue = false,
+		.wake_capable = false,
+	},
+	{
+		.name = "QAIC_TIMESYNC_PERIODIC",
+		.num = 22,
+		.num_elements = 32,
+		.local_elements = 0,
+		.event_ring = 0,
+		.dir = DMA_TO_DEVICE,
+		.ee_mask = MHI_CH_EE_AMSS,
+		.pollcfg = 0,
+		.doorbell = MHI_DB_BRST_DISABLE,
+		.lpm_notify = false,
+		.offload_channel = false,
+		.doorbell_mode_switch = false,
+		.auto_queue = false,
+		.wake_capable = false,
+	},
+	{
+		.num = 23,
+		.name = "QAIC_TIMESYNC_PERIODIC",
+		.num_elements = 32,
+		.local_elements = 0,
+		.event_ring = 0,
+		.dir = DMA_FROM_DEVICE,
+		.ee_mask = MHI_CH_EE_AMSS,
 		.pollcfg = 0,
 		.doorbell = MHI_DB_BRST_DISABLE,
 		.lpm_notify = false,
@@ -450,7 +482,7 @@ static void mhi_status_cb(struct mhi_controller *mhi_cntrl, enum mhi_callback re
 		pci_err(qdev->pdev, "Fatal error received from device. Attempting to recover\n");
 	/* this event occurs in non-atomic context */
 	if (reason == MHI_CB_SYS_ERROR)
-		qaic_dev_reset_clean_local_state(qdev, true);
+		qaic_dev_reset_clean_local_state(qdev);
 }
 
 static int mhi_reset_and_async_power_up(struct mhi_controller *mhi_cntrl)
@@ -481,7 +513,7 @@ static int mhi_reset_and_async_power_up(struct mhi_controller *mhi_cntrl)
 }
 
 struct mhi_controller *qaic_mhi_register_controller(struct pci_dev *pci_dev, void __iomem *mhi_bar,
-						    int mhi_irq)
+						    int mhi_irq, bool shared_msi)
 {
 	struct mhi_controller *mhi_cntrl;
 	int ret;
@@ -513,6 +545,10 @@ struct mhi_controller *qaic_mhi_register_controller(struct pci_dev *pci_dev, voi
 		return ERR_PTR(-ENOMEM);
 
 	mhi_cntrl->irq[0] = mhi_irq;
+
+	if (shared_msi) /* MSI shared with data path, no IRQF_NO_SUSPEND */
+		mhi_cntrl->irq_flags = IRQF_SHARED;
+
 	mhi_cntrl->fw_image = "qcom/aic100/sbl.bin";
 
 	/* use latest configured timeout */
diff --git a/drivers/accel/qaic/mhi_controller.h b/drivers/accel/qaic/mhi_controller.h
index 2ae45d768e..500e7f4af2 100644
--- a/drivers/accel/qaic/mhi_controller.h
+++ b/drivers/accel/qaic/mhi_controller.h
@@ -8,7 +8,7 @@
 #define MHICONTROLLERQAIC_H_
 
 struct mhi_controller *qaic_mhi_register_controller(struct pci_dev *pci_dev, void __iomem *mhi_bar,
-						    int mhi_irq);
+						    int mhi_irq, bool shared_msi);
 void qaic_mhi_free_controller(struct mhi_controller *mhi_cntrl, bool link_up);
 void qaic_mhi_start_reset(struct mhi_controller *mhi_cntrl);
 void qaic_mhi_reset_done(struct mhi_controller *mhi_cntrl);
diff --git a/drivers/accel/qaic/qaic.h b/drivers/accel/qaic/qaic.h
index e3f4c30f3f..582836f953 100644
--- a/drivers/accel/qaic/qaic.h
+++ b/drivers/accel/qaic/qaic.h
@@ -31,6 +31,15 @@
 #define to_drm(qddev) (&(qddev)->drm)
 #define to_accel_kdev(qddev) (to_drm(qddev)->accel->kdev) /* Return Linux device of accel node */
 
+enum __packed dev_states {
+	/* Device is offline or will be very soon */
+	QAIC_OFFLINE,
+	/* Device is booting, not clear if it's in a usable state */
+	QAIC_BOOT,
+	/* Device is fully operational */
+	QAIC_ONLINE,
+};
+
 extern bool datapath_polling;
 
 struct qaic_user {
@@ -121,8 +130,10 @@ struct qaic_device {
 	struct workqueue_struct	*cntl_wq;
 	/* Synchronizes all the users of device during cleanup */
 	struct srcu_struct	dev_lock;
-	/* true: Device under reset; false: Device not under reset */
-	bool			in_reset;
+	/* Track the state of the device during resets */
+	enum dev_states		dev_state;
+	/* true: single MSI is used to operate device */
+	bool			single_msi;
 	/*
 	 * true: A tx MHI transaction has failed and a rx buffer is still queued
 	 * in control device. Such a buffer is considered lost rx buffer
@@ -137,6 +148,10 @@ struct qaic_device {
 	u32 (*gen_crc)(void *msg);
 	/* Validate the CRC of a control message */
 	bool (*valid_crc)(void *msg);
+	/* MHI "QAIC_TIMESYNC" channel device */
+	struct mhi_device	*qts_ch;
+	/* Work queue for tasks related to MHI "QAIC_TIMESYNC" channel */
+	struct workqueue_struct	*qts_wq;
 };
 
 struct qaic_drm_device {
@@ -268,7 +283,7 @@ void wakeup_dbc(struct qaic_device *qdev, u32 dbc_id);
 void release_dbc(struct qaic_device *qdev, u32 dbc_id);
 
 void wake_all_cntl(struct qaic_device *qdev);
-void qaic_dev_reset_clean_local_state(struct qaic_device *qdev, bool exit_reset);
+void qaic_dev_reset_clean_local_state(struct qaic_device *qdev);
 
 struct drm_gem_object *qaic_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf);
 
diff --git a/drivers/accel/qaic/qaic_control.c b/drivers/accel/qaic/qaic_control.c
index 388abd4002..9e8a8cbadf 100644
--- a/drivers/accel/qaic/qaic_control.c
+++ b/drivers/accel/qaic/qaic_control.c
@@ -1022,7 +1022,8 @@ static void *msg_xfer(struct qaic_device *qdev, struct wrapper_list *wrappers, u
 	int xfer_count = 0;
 	int retry_count;
 
-	if (qdev->in_reset) {
+	/* Allow QAIC_BOOT state since we need to check control protocol version */
+	if (qdev->dev_state == QAIC_OFFLINE) {
 		mutex_unlock(&qdev->cntl_mutex);
 		return ERR_PTR(-ENODEV);
 	}
@@ -1138,7 +1139,7 @@ static int abort_dma_cont(struct qaic_device *qdev, struct wrapper_list *wrapper
 		if (!list_is_first(&wrapper->list, &wrappers->list))
 			kref_put(&wrapper->ref_count, free_wrapper);
 
-	wrapper = add_wrapper(wrappers, offsetof(struct wrapper_msg, trans) + sizeof(*out_trans));
+	wrapper = add_wrapper(wrappers, sizeof(*wrapper));
 
 	if (!wrapper)
 		return -ENOMEM;
@@ -1306,7 +1307,7 @@ int qaic_manage_ioctl(struct drm_device *dev, void *data, struct drm_file *file_
 	qdev = usr->qddev->qdev;
 
 	qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
-	if (qdev->in_reset) {
+	if (qdev->dev_state != QAIC_ONLINE) {
 		srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
 		srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
 		return -ENODEV;
diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c
index d42f002bc0..03c9a793da 100644
--- a/drivers/accel/qaic/qaic_data.c
+++ b/drivers/accel/qaic/qaic_data.c
@@ -51,6 +51,7 @@
 		})
 #define NUM_EVENTS	128
 #define NUM_DELAYS	10
+#define fifo_at(base, offset) ((base) + (offset) * get_dbc_req_elem_size())
 
 static unsigned int wait_exec_default_timeout_ms = 5000; /* 5 sec default */
 module_param(wait_exec_default_timeout_ms, uint, 0600);
@@ -451,7 +452,7 @@ static int create_sgt(struct qaic_device *qdev, struct sg_table **sgt_out, u64 s
 		 * later
 		 */
 		buf_extra = (PAGE_SIZE - size % PAGE_SIZE) % PAGE_SIZE;
-		max_order = min(MAX_ORDER - 1, get_order(size));
+		max_order = min(MAX_PAGE_ORDER, get_order(size));
 	} else {
 		/* allocate a single page for book keeping */
 		nr_pages = 1;
@@ -689,7 +690,7 @@ int qaic_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *fi
 
 	qdev = usr->qddev->qdev;
 	qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
-	if (qdev->in_reset) {
+	if (qdev->dev_state != QAIC_ONLINE) {
 		ret = -ENODEV;
 		goto unlock_dev_srcu;
 	}
@@ -748,7 +749,7 @@ int qaic_mmap_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file
 
 	qdev = usr->qddev->qdev;
 	qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
-	if (qdev->in_reset) {
+	if (qdev->dev_state != QAIC_ONLINE) {
 		ret = -ENODEV;
 		goto unlock_dev_srcu;
 	}
@@ -967,7 +968,7 @@ int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi
 
 	qdev = usr->qddev->qdev;
 	qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
-	if (qdev->in_reset) {
+	if (qdev->dev_state != QAIC_ONLINE) {
 		ret = -ENODEV;
 		goto unlock_dev_srcu;
 	}
@@ -1056,6 +1057,16 @@ unlock_usr_srcu:
 	return ret;
 }
 
+static inline u32 fifo_space_avail(u32 head, u32 tail, u32 q_size)
+{
+	u32 avail = head - tail - 1;
+
+	if (head <= tail)
+		avail += q_size;
+
+	return avail;
+}
+
 static inline int copy_exec_reqs(struct qaic_device *qdev, struct bo_slice *slice, u32 dbc_id,
 				 u32 head, u32 *ptail)
 {
@@ -1064,27 +1075,20 @@ static inline int copy_exec_reqs(struct qaic_device *qdev, struct bo_slice *slic
 	u32 tail = *ptail;
 	u32 avail;
 
-	avail = head - tail;
-	if (head <= tail)
-		avail += dbc->nelem;
-
-	--avail;
-
+	avail = fifo_space_avail(head, tail, dbc->nelem);
 	if (avail < slice->nents)
 		return -EAGAIN;
 
 	if (tail + slice->nents > dbc->nelem) {
 		avail = dbc->nelem - tail;
 		avail = min_t(u32, avail, slice->nents);
-		memcpy(dbc->req_q_base + tail * get_dbc_req_elem_size(), reqs,
-		       sizeof(*reqs) * avail);
+		memcpy(fifo_at(dbc->req_q_base, tail), reqs, sizeof(*reqs) * avail);
 		reqs += avail;
 		avail = slice->nents - avail;
 		if (avail)
 			memcpy(dbc->req_q_base, reqs, sizeof(*reqs) * avail);
 	} else {
-		memcpy(dbc->req_q_base + tail * get_dbc_req_elem_size(), reqs,
-		       sizeof(*reqs) * slice->nents);
+		memcpy(fifo_at(dbc->req_q_base, tail), reqs, sizeof(*reqs) * slice->nents);
 	}
 
 	*ptail = (tail + slice->nents) % dbc->nelem;
@@ -1092,46 +1096,31 @@ static inline int copy_exec_reqs(struct qaic_device *qdev, struct bo_slice *slic
 	return 0;
 }
 
-/*
- * Based on the value of resize we may only need to transmit first_n
- * entries and the last entry, with last_bytes to send from the last entry.
- * Note that first_n could be 0.
- */
 static inline int copy_partial_exec_reqs(struct qaic_device *qdev, struct bo_slice *slice,
-					 u64 resize, u32 dbc_id, u32 head, u32 *ptail)
+					 u64 resize, struct dma_bridge_chan *dbc, u32 head,
+					 u32 *ptail)
 {
-	struct dma_bridge_chan *dbc = &qdev->dbc[dbc_id];
 	struct dbc_req *reqs = slice->reqs;
 	struct dbc_req *last_req;
 	u32 tail = *ptail;
-	u64 total_bytes;
 	u64 last_bytes;
 	u32 first_n;
 	u32 avail;
-	int ret;
-	int i;
-
-	avail = head - tail;
-	if (head <= tail)
-		avail += dbc->nelem;
 
-	--avail;
+	avail = fifo_space_avail(head, tail, dbc->nelem);
 
-	total_bytes = 0;
-	for (i = 0; i < slice->nents; i++) {
-		total_bytes += le32_to_cpu(reqs[i].len);
-		if (total_bytes >= resize)
+	/*
+	 * After this for loop is complete, first_n represents the index
+	 * of the last DMA request of this slice that needs to be
+	 * transferred after resizing and last_bytes represents DMA size
+	 * of that request.
+	 */
+	last_bytes = resize;
+	for (first_n = 0; first_n < slice->nents; first_n++)
+		if (last_bytes > le32_to_cpu(reqs[first_n].len))
+			last_bytes -= le32_to_cpu(reqs[first_n].len);
+		else
 			break;
-	}
-
-	if (total_bytes < resize) {
-		/* User space should have used the full buffer path. */
-		ret = -EINVAL;
-		return ret;
-	}
-
-	first_n = i;
-	last_bytes = i ? resize + le32_to_cpu(reqs[i].len) - total_bytes : resize;
 
 	if (avail < (first_n + 1))
 		return -EAGAIN;
@@ -1140,22 +1129,21 @@ static inline int copy_partial_exec_reqs(struct qaic_device *qdev, struct bo_sli
 		if (tail + first_n > dbc->nelem) {
 			avail = dbc->nelem - tail;
 			avail = min_t(u32, avail, first_n);
-			memcpy(dbc->req_q_base + tail * get_dbc_req_elem_size(), reqs,
-			       sizeof(*reqs) * avail);
+			memcpy(fifo_at(dbc->req_q_base, tail), reqs, sizeof(*reqs) * avail);
 			last_req = reqs + avail;
 			avail = first_n - avail;
 			if (avail)
 				memcpy(dbc->req_q_base, last_req, sizeof(*reqs) * avail);
 		} else {
-			memcpy(dbc->req_q_base + tail * get_dbc_req_elem_size(), reqs,
-			       sizeof(*reqs) * first_n);
+			memcpy(fifo_at(dbc->req_q_base, tail), reqs, sizeof(*reqs) * first_n);
 		}
 	}
 
-	/* Copy over the last entry. Here we need to adjust len to the left over
+	/*
+	 * Copy over the last entry. Here we need to adjust len to the left over
 	 * size, and set src and dst to the entry it is copied to.
 	 */
-	last_req = dbc->req_q_base + (tail + first_n) % dbc->nelem * get_dbc_req_elem_size();
+	last_req = fifo_at(dbc->req_q_base, (tail + first_n) % dbc->nelem);
 	memcpy(last_req, reqs + slice->nents - 1, sizeof(*reqs));
 
 	/*
@@ -1166,6 +1154,9 @@ static inline int copy_partial_exec_reqs(struct qaic_device *qdev, struct bo_sli
 	last_req->len = cpu_to_le32((u32)last_bytes);
 	last_req->src_addr = reqs[first_n].src_addr;
 	last_req->dest_addr = reqs[first_n].dest_addr;
+	if (!last_bytes)
+		/* Disable DMA transfer */
+		last_req->cmd = GENMASK(7, 2) & reqs[first_n].cmd;
 
 	*ptail = (tail + first_n + 1) % dbc->nelem;
 
@@ -1225,26 +1216,17 @@ static int send_bo_list_to_device(struct qaic_device *qdev, struct drm_file *fil
 		bo->req_id = dbc->next_req_id++;
 
 		list_for_each_entry(slice, &bo->slices, slice) {
-			/*
-			 * If this slice does not fall under the given
-			 * resize then skip this slice and continue the loop
-			 */
-			if (is_partial && pexec[i].resize && pexec[i].resize <= slice->offset)
-				continue;
-
 			for (j = 0; j < slice->nents; j++)
 				slice->reqs[j].req_id = cpu_to_le16(bo->req_id);
 
-			/*
-			 * If it is a partial execute ioctl call then check if
-			 * resize has cut this slice short then do a partial copy
-			 * else do complete copy
-			 */
-			if (is_partial && pexec[i].resize &&
-			    pexec[i].resize < slice->offset + slice->size)
+			if (is_partial && (!pexec[i].resize || pexec[i].resize <= slice->offset))
+				/* Configure the slice for no DMA transfer */
+				ret = copy_partial_exec_reqs(qdev, slice, 0, dbc, head, tail);
+			else if (is_partial && pexec[i].resize < slice->offset + slice->size)
+				/* Configure the slice to be partially DMA transferred */
 				ret = copy_partial_exec_reqs(qdev, slice,
-							     pexec[i].resize - slice->offset,
-							     dbc->id, head, tail);
+							     pexec[i].resize - slice->offset, dbc,
+							     head, tail);
 			else
 				ret = copy_exec_reqs(qdev, slice, dbc->id, head, tail);
 			if (ret) {
@@ -1357,7 +1339,7 @@ static int __qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct dr
 
 	qdev = usr->qddev->qdev;
 	qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
-	if (qdev->in_reset) {
+	if (qdev->dev_state != QAIC_ONLINE) {
 		ret = -ENODEV;
 		goto unlock_dev_srcu;
 	}
@@ -1464,6 +1446,16 @@ irqreturn_t dbc_irq_handler(int irq, void *data)
 
 	rcu_id = srcu_read_lock(&dbc->ch_lock);
 
+	if (datapath_polling) {
+		srcu_read_unlock(&dbc->ch_lock, rcu_id);
+		/*
+		 * Normally datapath_polling will not have irqs enabled, but
+		 * when running with only one MSI the interrupt is shared with
+		 * MHI so it cannot be disabled. Return ASAP instead.
+		 */
+		return IRQ_HANDLED;
+	}
+
 	if (!dbc->usr) {
 		srcu_read_unlock(&dbc->ch_lock, rcu_id);
 		return IRQ_HANDLED;
@@ -1486,7 +1478,8 @@ irqreturn_t dbc_irq_handler(int irq, void *data)
 		return IRQ_NONE;
 	}
 
-	disable_irq_nosync(irq);
+	if (!dbc->qdev->single_msi)
+		disable_irq_nosync(irq);
 	srcu_read_unlock(&dbc->ch_lock, rcu_id);
 	return IRQ_WAKE_THREAD;
 }
@@ -1502,7 +1495,7 @@ void irq_polling_work(struct work_struct *work)
 	rcu_id = srcu_read_lock(&dbc->ch_lock);
 
 	while (1) {
-		if (dbc->qdev->in_reset) {
+		if (dbc->qdev->dev_state != QAIC_ONLINE) {
 			srcu_read_unlock(&dbc->ch_lock, rcu_id);
 			return;
 		}
@@ -1557,12 +1550,12 @@ irqreturn_t dbc_irq_threaded_fn(int irq, void *data)
 	u32 tail;
 
 	rcu_id = srcu_read_lock(&dbc->ch_lock);
+	qdev = dbc->qdev;
 
 	head = readl(dbc->dbc_base + RSPHP_OFF);
 	if (head == U32_MAX) /* PCI link error */
 		goto error_out;
 
-	qdev = dbc->qdev;
 read_fifo:
 
 	if (!event_count) {
@@ -1643,14 +1636,14 @@ read_fifo:
 	goto read_fifo;
 
 normal_out:
-	if (likely(!datapath_polling))
+	if (!qdev->single_msi && likely(!datapath_polling))
 		enable_irq(irq);
-	else
+	else if (unlikely(datapath_polling))
 		schedule_work(&dbc->poll_work);
 	/* checking the fifo and enabling irqs is a race, missed event check */
 	tail = readl(dbc->dbc_base + RSPTP_OFF);
 	if (tail != U32_MAX && head != tail) {
-		if (likely(!datapath_polling))
+		if (!qdev->single_msi && likely(!datapath_polling))
 			disable_irq_nosync(irq);
 		goto read_fifo;
 	}
@@ -1659,9 +1652,9 @@ normal_out:
 
 error_out:
 	srcu_read_unlock(&dbc->ch_lock, rcu_id);
-	if (likely(!datapath_polling))
+	if (!qdev->single_msi && likely(!datapath_polling))
 		enable_irq(irq);
-	else
+	else if (unlikely(datapath_polling))
 		schedule_work(&dbc->poll_work);
 
 	return IRQ_HANDLED;
@@ -1692,7 +1685,7 @@ int qaic_wait_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file
 
 	qdev = usr->qddev->qdev;
 	qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
-	if (qdev->in_reset) {
+	if (qdev->dev_state != QAIC_ONLINE) {
 		ret = -ENODEV;
 		goto unlock_dev_srcu;
 	}
@@ -1761,7 +1754,7 @@ int qaic_perf_stats_bo_ioctl(struct drm_device *dev, void *data, struct drm_file
 
 	qdev = usr->qddev->qdev;
 	qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
-	if (qdev->in_reset) {
+	if (qdev->dev_state != QAIC_ONLINE) {
 		ret = -ENODEV;
 		goto unlock_dev_srcu;
 	}
@@ -1852,7 +1845,7 @@ int qaic_detach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi
 
 	qdev = usr->qddev->qdev;
 	qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
-	if (qdev->in_reset) {
+	if (qdev->dev_state != QAIC_ONLINE) {
 		ret = -ENODEV;
 		goto unlock_dev_srcu;
 	}
diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c
index 6f58095767..2a313eb69b 100644
--- a/drivers/accel/qaic/qaic_drv.c
+++ b/drivers/accel/qaic/qaic_drv.c
@@ -8,6 +8,7 @@
 #include <linux/idr.h>
 #include <linux/interrupt.h>
 #include <linux/list.h>
+#include <linux/kobject.h>
 #include <linux/kref.h>
 #include <linux/mhi.h>
 #include <linux/module.h>
@@ -27,6 +28,7 @@
 
 #include "mhi_controller.h"
 #include "qaic.h"
+#include "qaic_timesync.h"
 
 MODULE_IMPORT_NS(DMA_BUF);
 
@@ -42,9 +44,6 @@ MODULE_PARM_DESC(datapath_polling, "Operate the datapath in polling mode");
 static bool link_up;
 static DEFINE_IDA(qaic_usrs);
 
-static int qaic_create_drm_device(struct qaic_device *qdev, s32 partition_id);
-static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 partition_id);
-
 static void free_usr(struct kref *kref)
 {
 	struct qaic_user *usr = container_of(kref, struct qaic_user, ref_count);
@@ -63,7 +62,7 @@ static int qaic_open(struct drm_device *dev, struct drm_file *file)
 	int ret;
 
 	rcu_id = srcu_read_lock(&qdev->dev_lock);
-	if (qdev->in_reset) {
+	if (qdev->dev_state != QAIC_ONLINE) {
 		ret = -ENODEV;
 		goto dev_unlock;
 	}
@@ -120,7 +119,7 @@ static void qaic_postclose(struct drm_device *dev, struct drm_file *file)
 	if (qddev) {
 		qdev = qddev->qdev;
 		qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
-		if (!qdev->in_reset) {
+		if (qdev->dev_state == QAIC_ONLINE) {
 			qaic_release_usr(qdev, usr);
 			for (i = 0; i < qdev->num_dbc; ++i)
 				if (qdev->dbc[i].usr && qdev->dbc[i].usr->handle == usr->handle)
@@ -182,13 +181,6 @@ static int qaic_create_drm_device(struct qaic_device *qdev, s32 partition_id)
 
 	qddev->partition_id = partition_id;
 
-	/*
-	 * drm_dev_unregister() sets the driver data to NULL and
-	 * drm_dev_register() does not update the driver data. During a SOC
-	 * reset drm dev is unregistered and registered again leaving the
-	 * driver data to NULL.
-	 */
-	dev_set_drvdata(to_accel_kdev(qddev), drm->accel);
 	ret = drm_dev_register(drm, 0);
 	if (ret)
 		pci_dbg(qdev->pdev, "drm_dev_register failed %d\n", ret);
@@ -202,7 +194,6 @@ static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 partition_id)
 	struct drm_device *drm = to_drm(qddev);
 	struct qaic_user *usr;
 
-	drm_dev_get(drm);
 	drm_dev_unregister(drm);
 	qddev->partition_id = 0;
 	/*
@@ -231,7 +222,6 @@ static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 partition_id)
 		mutex_lock(&qddev->users_mutex);
 	}
 	mutex_unlock(&qddev->users_mutex);
-	drm_dev_put(drm);
 }
 
 static int qaic_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id)
@@ -253,8 +243,6 @@ static int qaic_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id
 
 	qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev));
 
-	qdev->in_reset = false;
-
 	dev_set_drvdata(&mhi_dev->dev, qdev);
 	qdev->cntl_ch = mhi_dev;
 
@@ -264,6 +252,7 @@ static int qaic_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id
 		return ret;
 	}
 
+	qdev->dev_state = QAIC_BOOT;
 	ret = get_cntl_version(qdev, NULL, &major, &minor);
 	if (ret || major != CNTL_MAJOR || minor > CNTL_MINOR) {
 		pci_err(qdev->pdev, "%s: Control protocol version (%d.%d) not supported. Supported version is (%d.%d). Ret: %d\n",
@@ -271,8 +260,8 @@ static int qaic_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id
 		ret = -EINVAL;
 		goto close_control;
 	}
-
-	ret = qaic_create_drm_device(qdev, QAIC_NO_PARTITION);
+	qdev->dev_state = QAIC_ONLINE;
+	kobject_uevent(&(to_accel_kdev(qdev->qddev))->kobj, KOBJ_ONLINE);
 
 	return ret;
 
@@ -290,7 +279,8 @@ static void qaic_notify_reset(struct qaic_device *qdev)
 {
 	int i;
 
-	qdev->in_reset = true;
+	kobject_uevent(&(to_accel_kdev(qdev->qddev))->kobj, KOBJ_OFFLINE);
+	qdev->dev_state = QAIC_OFFLINE;
 	/* wake up any waiters to avoid waiting for timeouts at sync */
 	wake_all_cntl(qdev);
 	for (i = 0; i < qdev->num_dbc; ++i)
@@ -298,21 +288,15 @@ static void qaic_notify_reset(struct qaic_device *qdev)
 	synchronize_srcu(&qdev->dev_lock);
 }
 
-void qaic_dev_reset_clean_local_state(struct qaic_device *qdev, bool exit_reset)
+void qaic_dev_reset_clean_local_state(struct qaic_device *qdev)
 {
 	int i;
 
 	qaic_notify_reset(qdev);
 
-	/* remove drmdevs to prevent new users from coming in */
-	qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION);
-
 	/* start tearing things down */
 	for (i = 0; i < qdev->num_dbc; ++i)
 		release_dbc(qdev, i);
-
-	if (exit_reset)
-		qdev->in_reset = false;
 }
 
 static void cleanup_qdev(struct qaic_device *qdev)
@@ -324,6 +308,7 @@ static void cleanup_qdev(struct qaic_device *qdev)
 	cleanup_srcu_struct(&qdev->dev_lock);
 	pci_set_drvdata(qdev->pdev, NULL);
 	destroy_workqueue(qdev->cntl_wq);
+	destroy_workqueue(qdev->qts_wq);
 }
 
 static struct qaic_device *create_qdev(struct pci_dev *pdev, const struct pci_device_id *id)
@@ -336,6 +321,7 @@ static struct qaic_device *create_qdev(struct pci_dev *pdev, const struct pci_de
 	if (!qdev)
 		return NULL;
 
+	qdev->dev_state = QAIC_OFFLINE;
 	if (id->device == PCI_DEV_AIC100) {
 		qdev->num_dbc = 16;
 		qdev->dbc = devm_kcalloc(&pdev->dev, qdev->num_dbc, sizeof(*qdev->dbc), GFP_KERNEL);
@@ -347,6 +333,12 @@ static struct qaic_device *create_qdev(struct pci_dev *pdev, const struct pci_de
 	if (!qdev->cntl_wq)
 		return NULL;
 
+	qdev->qts_wq = alloc_workqueue("qaic_ts", WQ_UNBOUND, 0);
+	if (!qdev->qts_wq) {
+		destroy_workqueue(qdev->cntl_wq);
+		return NULL;
+	}
+
 	pci_set_drvdata(pdev, qdev);
 	qdev->pdev = pdev;
 
@@ -424,14 +416,24 @@ static int init_msi(struct qaic_device *qdev, struct pci_dev *pdev)
 	int i;
 
 	/* Managed release since we use pcim_enable_device */
-	ret = pci_alloc_irq_vectors(pdev, 1, 32, PCI_IRQ_MSI);
-	if (ret < 0)
-		return ret;
+	ret = pci_alloc_irq_vectors(pdev, 32, 32, PCI_IRQ_MSI);
+	if (ret == -ENOSPC) {
+		ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI);
+		if (ret < 0)
+			return ret;
 
-	if (ret < 32) {
-		pci_err(pdev, "%s: Requested 32 MSIs. Obtained %d MSIs which is less than the 32 required.\n",
-			__func__, ret);
-		return -ENODEV;
+		/*
+		 * Operate in one MSI mode. All interrupts will be directed to
+		 * MSI0; every interrupt will wake up all the interrupt handlers
+		 * (MHI and DBC[0-15]). Since the interrupt is now shared, it is
+		 * not disabled during DBC threaded handler, but only one thread
+		 * will be allowed to run per DBC, so while it can be
+		 * interrupted, it shouldn't race with itself.
+		 */
+		qdev->single_msi = true;
+		pci_info(pdev, "Allocating 32 MSIs failed, operating in 1 MSI mode. Performance may be impacted.\n");
+	} else if (ret < 0) {
+		return ret;
 	}
 
 	mhi_irq = pci_irq_vector(pdev, 0);
@@ -439,15 +441,17 @@ static int init_msi(struct qaic_device *qdev, struct pci_dev *pdev)
 		return mhi_irq;
 
 	for (i = 0; i < qdev->num_dbc; ++i) {
-		ret = devm_request_threaded_irq(&pdev->dev, pci_irq_vector(pdev, i + 1),
+		ret = devm_request_threaded_irq(&pdev->dev,
+						pci_irq_vector(pdev, qdev->single_msi ? 0 : i + 1),
 						dbc_irq_handler, dbc_irq_threaded_fn, IRQF_SHARED,
 						"qaic_dbc", &qdev->dbc[i]);
 		if (ret)
 			return ret;
 
 		if (datapath_polling) {
-			qdev->dbc[i].irq = pci_irq_vector(pdev, i + 1);
-			disable_irq_nosync(qdev->dbc[i].irq);
+			qdev->dbc[i].irq = pci_irq_vector(pdev, qdev->single_msi ? 0 : i + 1);
+			if (!qdev->single_msi)
+				disable_irq_nosync(qdev->dbc[i].irq);
 			INIT_WORK(&qdev->dbc[i].poll_work, irq_polling_work);
 		}
 	}
@@ -479,14 +483,21 @@ static int qaic_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto cleanup_qdev;
 	}
 
-	qdev->mhi_cntrl = qaic_mhi_register_controller(pdev, qdev->bar_0, mhi_irq);
+	ret = qaic_create_drm_device(qdev, QAIC_NO_PARTITION);
+	if (ret)
+		goto cleanup_qdev;
+
+	qdev->mhi_cntrl = qaic_mhi_register_controller(pdev, qdev->bar_0, mhi_irq,
+						       qdev->single_msi);
 	if (IS_ERR(qdev->mhi_cntrl)) {
 		ret = PTR_ERR(qdev->mhi_cntrl);
-		goto cleanup_qdev;
+		goto cleanup_drm_dev;
 	}
 
 	return 0;
 
+cleanup_drm_dev:
+	qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION);
 cleanup_qdev:
 	cleanup_qdev(qdev);
 	return ret;
@@ -499,7 +510,8 @@ static void qaic_pci_remove(struct pci_dev *pdev)
 	if (!qdev)
 		return;
 
-	qaic_dev_reset_clean_local_state(qdev, false);
+	qaic_dev_reset_clean_local_state(qdev);
+	qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION);
 	qaic_mhi_free_controller(qdev->mhi_cntrl, link_up);
 	cleanup_qdev(qdev);
 }
@@ -522,14 +534,13 @@ static void qaic_pci_reset_prepare(struct pci_dev *pdev)
 
 	qaic_notify_reset(qdev);
 	qaic_mhi_start_reset(qdev->mhi_cntrl);
-	qaic_dev_reset_clean_local_state(qdev, false);
+	qaic_dev_reset_clean_local_state(qdev);
 }
 
 static void qaic_pci_reset_done(struct pci_dev *pdev)
 {
 	struct qaic_device *qdev = pci_get_drvdata(pdev);
 
-	qdev->in_reset = false;
 	qaic_mhi_reset_done(qdev->mhi_cntrl);
 }
 
@@ -586,6 +597,10 @@ static int __init qaic_init(void)
 		goto free_pci;
 	}
 
+	ret = qaic_timesync_init();
+	if (ret)
+		pr_debug("qaic: qaic_timesync_init failed %d\n", ret);
+
 	return 0;
 
 free_pci:
@@ -611,6 +626,7 @@ static void __exit qaic_exit(void)
 	 * reinitializing the link_up state after the cleanup is done.
 	 */
 	link_up = true;
+	qaic_timesync_deinit();
 	mhi_driver_unregister(&qaic_mhi_driver);
 	pci_unregister_driver(&qaic_pci_driver);
 }
diff --git a/drivers/accel/qaic/qaic_timesync.c b/drivers/accel/qaic/qaic_timesync.c
new file mode 100644
index 0000000000..301f4462d5
--- /dev/null
+++ b/drivers/accel/qaic/qaic_timesync.c
@@ -0,0 +1,395 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. */
+
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/mhi.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/time64.h>
+#include <linux/timer.h>
+
+#include "qaic.h"
+#include "qaic_timesync.h"
+
+#define QTIMER_REG_OFFSET			0xa28
+#define QAIC_TIMESYNC_SIGNATURE			0x55aa
+#define QAIC_CONV_QTIMER_TO_US(qtimer)		(mul_u64_u32_div(qtimer, 10, 192))
+
+static unsigned int timesync_delay_ms = 1000; /* 1 sec default */
+module_param(timesync_delay_ms, uint, 0600);
+MODULE_PARM_DESC(timesync_delay_ms, "Delay in ms between two consecutive timesync operations");
+
+enum qts_msg_type {
+	QAIC_TS_CMD_TO_HOST,
+	QAIC_TS_SYNC_REQ,
+	QAIC_TS_ACK_TO_HOST,
+	QAIC_TS_MSG_TYPE_MAX
+};
+
+/**
+ * struct qts_hdr - Timesync message header structure.
+ * @signature: Unique signature to identify the timesync message.
+ * @reserved_1: Reserved for future use.
+ * @reserved_2: Reserved for future use.
+ * @msg_type: sub-type of the timesync message.
+ * @reserved_3: Reserved for future use.
+ */
+struct qts_hdr {
+	__le16	signature;
+	__le16	reserved_1;
+	u8	reserved_2;
+	u8	msg_type;
+	__le16	reserved_3;
+} __packed;
+
+/**
+ * struct qts_timeval - Structure to carry time information.
+ * @tv_sec: Seconds part of the time.
+ * @tv_usec: uS (microseconds) part of the time.
+ */
+struct qts_timeval {
+	__le64	tv_sec;
+	__le64	tv_usec;
+} __packed;
+
+/**
+ * struct qts_host_time_sync_msg_data - Structure to denote the timesync message.
+ * @header: Header of the timesync message.
+ * @data: Time information.
+ */
+struct qts_host_time_sync_msg_data {
+	struct	qts_hdr header;
+	struct	qts_timeval data;
+} __packed;
+
+/**
+ * struct mqts_dev - MHI QAIC Timesync Control device.
+ * @qdev: Pointer to the root device struct driven by QAIC driver.
+ * @mhi_dev: Pointer to associated MHI device.
+ * @timer: Timer handle used for timesync.
+ * @qtimer_addr: Device QTimer register pointer.
+ * @buff_in_use: atomic variable to track if the sync_msg buffer is in use.
+ * @dev: Device pointer to qdev->pdev->dev stored for easy access.
+ * @sync_msg: Buffer used to send timesync message over MHI.
+ */
+struct mqts_dev {
+	struct qaic_device *qdev;
+	struct mhi_device *mhi_dev;
+	struct timer_list timer;
+	void __iomem *qtimer_addr;
+	atomic_t buff_in_use;
+	struct device *dev;
+	struct qts_host_time_sync_msg_data *sync_msg;
+};
+
+struct qts_resp_msg {
+	struct qts_hdr	hdr;
+} __packed;
+
+struct qts_resp {
+	struct qts_resp_msg	data;
+	struct work_struct	work;
+	struct qaic_device	*qdev;
+};
+
+#ifdef readq
+static u64 read_qtimer(const volatile void __iomem *addr)
+{
+	return readq(addr);
+}
+#else
+static u64 read_qtimer(const volatile void __iomem *addr)
+{
+	u64 low, high;
+
+	low = readl(addr);
+	high = readl(addr + sizeof(u32));
+	return low | (high << 32);
+}
+#endif
+
+static void qaic_timesync_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result)
+{
+	struct mqts_dev *mqtsdev = dev_get_drvdata(&mhi_dev->dev);
+
+	dev_dbg(mqtsdev->dev, "%s status: %d xfer_len: %zu\n", __func__,
+		mhi_result->transaction_status, mhi_result->bytes_xferd);
+
+	atomic_set(&mqtsdev->buff_in_use, 0);
+}
+
+static void qaic_timesync_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result)
+{
+	struct mqts_dev *mqtsdev = dev_get_drvdata(&mhi_dev->dev);
+
+	dev_err(mqtsdev->dev, "%s no data expected on dl channel\n", __func__);
+}
+
+static void qaic_timesync_timer(struct timer_list *t)
+{
+	struct mqts_dev *mqtsdev = from_timer(mqtsdev, t, timer);
+	struct qts_host_time_sync_msg_data *sync_msg;
+	u64 device_qtimer_us;
+	u64 device_qtimer;
+	u64 host_time_us;
+	u64 offset_us;
+	u64 host_sec;
+	int ret;
+
+	if (atomic_read(&mqtsdev->buff_in_use)) {
+		dev_dbg(mqtsdev->dev, "%s buffer not free, schedule next cycle\n", __func__);
+		goto mod_timer;
+	}
+	atomic_set(&mqtsdev->buff_in_use, 1);
+
+	sync_msg = mqtsdev->sync_msg;
+	sync_msg->header.signature = cpu_to_le16(QAIC_TIMESYNC_SIGNATURE);
+	sync_msg->header.msg_type = QAIC_TS_SYNC_REQ;
+	/* Read host UTC time and convert to uS*/
+	host_time_us = div_u64(ktime_get_real_ns(), NSEC_PER_USEC);
+	device_qtimer = read_qtimer(mqtsdev->qtimer_addr);
+	device_qtimer_us = QAIC_CONV_QTIMER_TO_US(device_qtimer);
+	/* Offset between host UTC and device time */
+	offset_us = host_time_us - device_qtimer_us;
+
+	host_sec = div_u64(offset_us, USEC_PER_SEC);
+	sync_msg->data.tv_usec = cpu_to_le64(offset_us - host_sec * USEC_PER_SEC);
+	sync_msg->data.tv_sec = cpu_to_le64(host_sec);
+	ret = mhi_queue_buf(mqtsdev->mhi_dev, DMA_TO_DEVICE, sync_msg, sizeof(*sync_msg), MHI_EOT);
+	if (ret && (ret != -EAGAIN)) {
+		dev_err(mqtsdev->dev, "%s unable to queue to mhi:%d\n", __func__, ret);
+		return;
+	} else if (ret == -EAGAIN) {
+		atomic_set(&mqtsdev->buff_in_use, 0);
+	}
+
+mod_timer:
+	ret = mod_timer(t, jiffies + msecs_to_jiffies(timesync_delay_ms));
+	if (ret)
+		dev_err(mqtsdev->dev, "%s mod_timer error:%d\n", __func__, ret);
+}
+
+static int qaic_timesync_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id)
+{
+	struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev));
+	struct mqts_dev *mqtsdev;
+	struct timer_list *timer;
+	int ret;
+
+	mqtsdev = kzalloc(sizeof(*mqtsdev), GFP_KERNEL);
+	if (!mqtsdev) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	timer = &mqtsdev->timer;
+	mqtsdev->mhi_dev = mhi_dev;
+	mqtsdev->qdev = qdev;
+	mqtsdev->dev = &qdev->pdev->dev;
+
+	mqtsdev->sync_msg = kzalloc(sizeof(*mqtsdev->sync_msg), GFP_KERNEL);
+	if (!mqtsdev->sync_msg) {
+		ret = -ENOMEM;
+		goto free_mqts_dev;
+	}
+	atomic_set(&mqtsdev->buff_in_use, 0);
+
+	ret = mhi_prepare_for_transfer(mhi_dev);
+	if (ret)
+		goto free_sync_msg;
+
+	/* Qtimer register pointer */
+	mqtsdev->qtimer_addr = qdev->bar_0 + QTIMER_REG_OFFSET;
+	timer_setup(timer, qaic_timesync_timer, 0);
+	timer->expires = jiffies + msecs_to_jiffies(timesync_delay_ms);
+	add_timer(timer);
+	dev_set_drvdata(&mhi_dev->dev, mqtsdev);
+
+	return 0;
+
+free_sync_msg:
+	kfree(mqtsdev->sync_msg);
+free_mqts_dev:
+	kfree(mqtsdev);
+out:
+	return ret;
+};
+
+static void qaic_timesync_remove(struct mhi_device *mhi_dev)
+{
+	struct mqts_dev *mqtsdev = dev_get_drvdata(&mhi_dev->dev);
+
+	del_timer_sync(&mqtsdev->timer);
+	mhi_unprepare_from_transfer(mqtsdev->mhi_dev);
+	kfree(mqtsdev->sync_msg);
+	kfree(mqtsdev);
+}
+
+static const struct mhi_device_id qaic_timesync_match_table[] = {
+	{ .chan = "QAIC_TIMESYNC_PERIODIC"},
+	{},
+};
+
+MODULE_DEVICE_TABLE(mhi, qaic_timesync_match_table);
+
+static struct mhi_driver qaic_timesync_driver = {
+	.id_table = qaic_timesync_match_table,
+	.remove = qaic_timesync_remove,
+	.probe = qaic_timesync_probe,
+	.ul_xfer_cb = qaic_timesync_ul_xfer_cb,
+	.dl_xfer_cb = qaic_timesync_dl_xfer_cb,
+	.driver = {
+		.name = "qaic_timesync_periodic",
+	},
+};
+
+static void qaic_boot_timesync_worker(struct work_struct *work)
+{
+	struct qts_resp *resp = container_of(work, struct qts_resp, work);
+	struct qts_host_time_sync_msg_data *req;
+	struct qts_resp_msg data = resp->data;
+	struct qaic_device *qdev = resp->qdev;
+	struct mhi_device *mhi_dev;
+	struct timespec64 ts;
+	int ret;
+
+	mhi_dev = qdev->qts_ch;
+	/* Queue the response message beforehand to avoid race conditions */
+	ret = mhi_queue_buf(mhi_dev, DMA_FROM_DEVICE, &resp->data, sizeof(resp->data), MHI_EOT);
+	if (ret) {
+		kfree(resp);
+		dev_warn(&mhi_dev->dev, "Failed to re-queue response buffer %d\n", ret);
+		return;
+	}
+
+	switch (data.hdr.msg_type) {
+	case QAIC_TS_CMD_TO_HOST:
+		req = kzalloc(sizeof(*req), GFP_KERNEL);
+		if (!req)
+			break;
+
+		req->header = data.hdr;
+		req->header.msg_type = QAIC_TS_SYNC_REQ;
+		ktime_get_real_ts64(&ts);
+		req->data.tv_sec = cpu_to_le64(ts.tv_sec);
+		req->data.tv_usec = cpu_to_le64(div_u64(ts.tv_nsec, NSEC_PER_USEC));
+
+		ret = mhi_queue_buf(mhi_dev, DMA_TO_DEVICE, req, sizeof(*req), MHI_EOT);
+		if (ret) {
+			kfree(req);
+			dev_dbg(&mhi_dev->dev, "Failed to send request message. Error %d\n", ret);
+		}
+		break;
+	case QAIC_TS_ACK_TO_HOST:
+		dev_dbg(&mhi_dev->dev, "ACK received from device\n");
+		break;
+	default:
+		dev_err(&mhi_dev->dev, "Invalid message type %u.\n", data.hdr.msg_type);
+	}
+}
+
+static int qaic_boot_timesync_queue_resp(struct mhi_device *mhi_dev, struct qaic_device *qdev)
+{
+	struct qts_resp *resp;
+	int ret;
+
+	resp = kzalloc(sizeof(*resp), GFP_KERNEL);
+	if (!resp)
+		return -ENOMEM;
+
+	resp->qdev = qdev;
+	INIT_WORK(&resp->work, qaic_boot_timesync_worker);
+
+	ret = mhi_queue_buf(mhi_dev, DMA_FROM_DEVICE, &resp->data, sizeof(resp->data), MHI_EOT);
+	if (ret) {
+		kfree(resp);
+		dev_warn(&mhi_dev->dev, "Failed to queue response buffer %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void qaic_boot_timesync_remove(struct mhi_device *mhi_dev)
+{
+	struct qaic_device *qdev;
+
+	qdev = dev_get_drvdata(&mhi_dev->dev);
+	mhi_unprepare_from_transfer(qdev->qts_ch);
+	qdev->qts_ch = NULL;
+}
+
+static int qaic_boot_timesync_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id)
+{
+	struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev));
+	int ret;
+
+	ret = mhi_prepare_for_transfer(mhi_dev);
+	if (ret)
+		return ret;
+
+	qdev->qts_ch = mhi_dev;
+	dev_set_drvdata(&mhi_dev->dev, qdev);
+
+	ret = qaic_boot_timesync_queue_resp(mhi_dev, qdev);
+	if (ret) {
+		dev_set_drvdata(&mhi_dev->dev, NULL);
+		qdev->qts_ch = NULL;
+		mhi_unprepare_from_transfer(mhi_dev);
+	}
+
+	return ret;
+}
+
+static void qaic_boot_timesync_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result)
+{
+	kfree(mhi_result->buf_addr);
+}
+
+static void qaic_boot_timesync_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result)
+{
+	struct qts_resp *resp = container_of(mhi_result->buf_addr, struct qts_resp, data);
+
+	if (mhi_result->transaction_status || mhi_result->bytes_xferd != sizeof(resp->data)) {
+		kfree(resp);
+		return;
+	}
+
+	queue_work(resp->qdev->qts_wq, &resp->work);
+}
+
+static const struct mhi_device_id qaic_boot_timesync_match_table[] = {
+	{ .chan = "QAIC_TIMESYNC"},
+	{},
+};
+
+static struct mhi_driver qaic_boot_timesync_driver = {
+	.id_table = qaic_boot_timesync_match_table,
+	.remove = qaic_boot_timesync_remove,
+	.probe = qaic_boot_timesync_probe,
+	.ul_xfer_cb = qaic_boot_timesync_ul_xfer_cb,
+	.dl_xfer_cb = qaic_boot_timesync_dl_xfer_cb,
+	.driver = {
+		.name = "qaic_timesync",
+	},
+};
+
+int qaic_timesync_init(void)
+{
+	int ret;
+
+	ret = mhi_driver_register(&qaic_timesync_driver);
+	if (ret)
+		return ret;
+	ret = mhi_driver_register(&qaic_boot_timesync_driver);
+
+	return ret;
+}
+
+void qaic_timesync_deinit(void)
+{
+	mhi_driver_unregister(&qaic_boot_timesync_driver);
+	mhi_driver_unregister(&qaic_timesync_driver);
+}
diff --git a/drivers/accel/qaic/qaic_timesync.h b/drivers/accel/qaic/qaic_timesync.h
new file mode 100644
index 0000000000..851b7acd43
--- /dev/null
+++ b/drivers/accel/qaic/qaic_timesync.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef __QAIC_TIMESYNC_H__
+#define __QAIC_TIMESYNC_H__
+
+int qaic_timesync_init(void);
+void qaic_timesync_deinit(void);
+#endif /* __QAIC_TIMESYNC_H__ */
-- 
cgit v1.2.3