From ace9429bb58fd418f0c81d4c2835699bddf6bde6 Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Thu, 11 Apr 2024 10:27:49 +0200
Subject: Adding upstream version 6.6.15.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 drivers/gpu/drm/amd/amdkfd/Kconfig                 |   40 +
 drivers/gpu/drm/amd/amdkfd/Makefile                |   69 +
 drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c   |  130 +
 drivers/gpu/drm/amd/amdkfd/cik_int.h               |   43 +
 drivers/gpu/drm/amd/amdkfd/cik_regs.h              |   71 +
 drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h     | 3440 ++++++++++++++++
 .../gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm | 1231 ++++++
 .../gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm  |  747 ++++
 .../gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm  | 1099 ++++++
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c           | 3410 ++++++++++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_crat.c              | 2277 +++++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_crat.h              |  321 ++
 drivers/gpu/drm/amd/amdkfd/kfd_debug.c             | 1120 ++++++
 drivers/gpu/drm/amd/amdkfd/kfd_debug.h             |  139 +
 drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c           |  111 +
 drivers/gpu/drm/amd/amdkfd/kfd_device.c            | 1406 +++++++
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 3253 +++++++++++++++
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h  |  337 ++
 .../drm/amd/amdkfd/kfd_device_queue_manager_cik.c  |  145 +
 .../drm/amd/amdkfd/kfd_device_queue_manager_v10.c  |   81 +
 .../drm/amd/amdkfd/kfd_device_queue_manager_v11.c  |   81 +
 .../drm/amd/amdkfd/kfd_device_queue_manager_v9.c   |   94 +
 .../drm/amd/amdkfd/kfd_device_queue_manager_vi.c   |  151 +
 drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c          |  303 ++
 drivers/gpu/drm/amd/amdkfd/kfd_events.c            | 1327 +++++++
 drivers/gpu/drm/amd/amdkfd/kfd_events.h            |   90 +
 drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c       |  431 ++
 drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c   |  405 ++
 drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c   |  385 ++
 drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c    |  481 +++
 drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c         |  180 +
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c      |  376 ++
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h      |   86 +
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c           | 1055 +++++
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h           |   52 +
 drivers/gpu/drm/amd/amdkfd/kfd_module.c            |   96 +
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c       |  292 ++
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h       |  176 +
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c   |  464 +++
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c   |  531 +++
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c   |  568 +++
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c    |  921 +++++
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c    |  523 +++
 drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c    |  493 +++
 drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c |  437 +++
 drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c |  317 ++
 drivers/gpu/drm/amd/amdkfd/kfd_pasid.c             |   70 +
 drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h       |  156 +
 drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h    |  659 ++++
 .../gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h |   94 +
 drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h    |  506 +++
 drivers/gpu/drm/amd/amdkfd/kfd_pm4_opcodes.h       |  108 +
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h              | 1526 ++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_process.c           | 2268 +++++++++++
 .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 1076 +++++
 drivers/gpu/drm/amd/amdkfd/kfd_queue.c             |   84 +
 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c        |  379 ++
 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h        |   52 +
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c               | 4131 ++++++++++++++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h               |  275 ++
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c          | 2295 +++++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_topology.h          |  178 +
 drivers/gpu/drm/amd/amdkfd/soc15_int.h             |   50 +
 63 files changed, 43692 insertions(+)
 create mode 100644 drivers/gpu/drm/amd/amdkfd/Kconfig
 create mode 100644 drivers/gpu/drm/amd/amdkfd/Makefile
 create mode 100644 drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/cik_int.h
 create mode 100644 drivers/gpu/drm/amd/amdkfd/cik_regs.h
 create mode 100644 drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
 create mode 100644 drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
 create mode 100644 drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm
 create mode 100644 drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_crat.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_crat.h
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_debug.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_debug.h
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_device.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_events.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_events.h
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_module.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_pm4_opcodes.h
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_priv.h
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_process.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_queue.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_svm.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_svm.h
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_topology.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_topology.h
 create mode 100644 drivers/gpu/drm/amd/amdkfd/soc15_int.h

(limited to 'drivers/gpu/drm/amd/amdkfd')

diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig
new file mode 100644
index 000000000..d3c3d3ab7
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/Kconfig
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: MIT
+#
+# Heterogeneous system architecture configuration
+#
+
+config HSA_AMD
+	bool "HSA kernel driver for AMD GPU devices"
+	depends on DRM_AMDGPU && (X86_64 || ARM64 || PPC64)
+	select HMM_MIRROR
+	select MMU_NOTIFIER
+	select DRM_AMDGPU_USERPTR
+	help
+	  Enable this if you want to use HSA features on AMD GPU devices.
+
+config HSA_AMD_SVM
+	bool "Enable HMM-based shared virtual memory manager"
+	depends on HSA_AMD && DEVICE_PRIVATE
+	default y
+	select HMM_MIRROR
+	select MMU_NOTIFIER
+	help
+	  Enable this to use unified memory and managed memory in HIP. This
+	  memory manager supports two modes of operation. One based on
+	  preemptions and one based on page faults. To enable page fault
+	  based memory management on most GFXv9 GPUs, set the module
+	  parameter amdgpu.noretry=0.
+
+config HSA_AMD_P2P
+	bool "HSA kernel driver support for peer-to-peer for AMD GPU devices"
+	depends on HSA_AMD && PCI_P2PDMA && DMABUF_MOVE_NOTIFY
+	help
+	  Enable peer-to-peer (P2P) communication between AMD GPUs over
+	  the PCIe bus. This can improve performance of multi-GPU compute
+	  applications and libraries by enabling GPUs to access data directly
+	  in peer GPUs' memory without intermediate copies in system memory.
+
+	  This P2P feature is only enabled on compatible chipsets, and between
+	  GPUs with large memory BARs that expose the entire VRAM in PCIe bus
+	  address space within the physical address limits of the GPUs.
+
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
new file mode 100644
index 000000000..a5ae7bcf4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -0,0 +1,69 @@
+#
+# Copyright 2017 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+#
+# Makefile for Heterogenous System Architecture support for AMD GPU devices
+#
+
+AMDKFD_FILES	:= $(AMDKFD_PATH)/kfd_module.o \
+		$(AMDKFD_PATH)/kfd_device.o \
+		$(AMDKFD_PATH)/kfd_chardev.o \
+		$(AMDKFD_PATH)/kfd_topology.o \
+		$(AMDKFD_PATH)/kfd_pasid.o \
+		$(AMDKFD_PATH)/kfd_doorbell.o \
+		$(AMDKFD_PATH)/kfd_flat_memory.o \
+		$(AMDKFD_PATH)/kfd_process.o \
+		$(AMDKFD_PATH)/kfd_queue.o \
+		$(AMDKFD_PATH)/kfd_mqd_manager.o \
+		$(AMDKFD_PATH)/kfd_mqd_manager_cik.o \
+		$(AMDKFD_PATH)/kfd_mqd_manager_vi.o \
+		$(AMDKFD_PATH)/kfd_mqd_manager_v9.o \
+		$(AMDKFD_PATH)/kfd_mqd_manager_v10.o \
+		$(AMDKFD_PATH)/kfd_mqd_manager_v11.o \
+		$(AMDKFD_PATH)/kfd_kernel_queue.o \
+		$(AMDKFD_PATH)/kfd_packet_manager.o \
+		$(AMDKFD_PATH)/kfd_packet_manager_vi.o \
+		$(AMDKFD_PATH)/kfd_packet_manager_v9.o \
+		$(AMDKFD_PATH)/kfd_process_queue_manager.o \
+		$(AMDKFD_PATH)/kfd_device_queue_manager.o \
+		$(AMDKFD_PATH)/kfd_device_queue_manager_cik.o \
+		$(AMDKFD_PATH)/kfd_device_queue_manager_vi.o \
+		$(AMDKFD_PATH)/kfd_device_queue_manager_v9.o \
+		$(AMDKFD_PATH)/kfd_device_queue_manager_v10.o \
+		$(AMDKFD_PATH)/kfd_device_queue_manager_v11.o \
+		$(AMDKFD_PATH)/kfd_interrupt.o \
+		$(AMDKFD_PATH)/kfd_events.o \
+		$(AMDKFD_PATH)/cik_event_interrupt.o \
+		$(AMDKFD_PATH)/kfd_int_process_v9.o \
+		$(AMDKFD_PATH)/kfd_int_process_v10.o \
+		$(AMDKFD_PATH)/kfd_int_process_v11.o \
+		$(AMDKFD_PATH)/kfd_smi_events.o \
+		$(AMDKFD_PATH)/kfd_crat.o \
+		$(AMDKFD_PATH)/kfd_debug.o
+
+ifneq ($(CONFIG_DEBUG_FS),)
+AMDKFD_FILES += $(AMDKFD_PATH)/kfd_debugfs.o
+endif
+
+ifneq ($(CONFIG_HSA_AMD_SVM),)
+AMDKFD_FILES += $(AMDKFD_PATH)/kfd_svm.o \
+		$(AMDKFD_PATH)/kfd_migrate.o
+endif
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
new file mode 100644
index 000000000..795382b55
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
@@ -0,0 +1,130 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "kfd_priv.h"
+#include "kfd_events.h"
+#include "cik_int.h"
+#include "amdgpu_amdkfd.h"
+#include "kfd_smi_events.h"
+
+static bool cik_event_interrupt_isr(struct kfd_node *dev,
+					const uint32_t *ih_ring_entry,
+					uint32_t *patched_ihre,
+					bool *patched_flag)
+{
+	const struct cik_ih_ring_entry *ihre =
+			(const struct cik_ih_ring_entry *)ih_ring_entry;
+	const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
+	unsigned int vmid;
+	uint16_t pasid;
+	bool ret;
+
+	/* This workaround is due to HW/FW limitation on Hawaii that
+	 * VMID and PASID are not written into ih_ring_entry
+	 */
+	if ((ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT ||
+		ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) &&
+		dev->adev->asic_type == CHIP_HAWAII) {
+		struct cik_ih_ring_entry *tmp_ihre =
+			(struct cik_ih_ring_entry *)patched_ihre;
+
+		*patched_flag = true;
+		*tmp_ihre = *ihre;
+
+		vmid = f2g->read_vmid_from_vmfault_reg(dev->adev);
+		ret = f2g->get_atc_vmid_pasid_mapping_info(dev->adev, vmid, &pasid);
+
+		tmp_ihre->ring_id &= 0x000000ff;
+		tmp_ihre->ring_id |= vmid << 8;
+		tmp_ihre->ring_id |= pasid << 16;
+
+		return ret && (pasid != 0) &&
+			vmid >= dev->vm_info.first_vmid_kfd &&
+			vmid <= dev->vm_info.last_vmid_kfd;
+	}
+
+	/* Only handle interrupts from KFD VMIDs */
+	vmid  = (ihre->ring_id & 0x0000ff00) >> 8;
+	if (vmid < dev->vm_info.first_vmid_kfd ||
+	    vmid > dev->vm_info.last_vmid_kfd)
+		return false;
+
+	/* If there is no valid PASID, it's likely a firmware bug */
+	pasid = (ihre->ring_id & 0xffff0000) >> 16;
+	if (WARN_ONCE(pasid == 0, "FW bug: No PASID in KFD interrupt"))
+		return false;
+
+	/* Interrupt types we care about: various signals and faults.
+	 * They will be forwarded to a work queue (see below).
+	 */
+	return ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE ||
+		ihre->source_id == CIK_INTSRC_SDMA_TRAP ||
+		ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG ||
+		ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE ||
+		((ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT ||
+		ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) &&
+		!amdgpu_no_queue_eviction_on_vm_fault);
+}
+
+static void cik_event_interrupt_wq(struct kfd_node *dev,
+					const uint32_t *ih_ring_entry)
+{
+	const struct cik_ih_ring_entry *ihre =
+			(const struct cik_ih_ring_entry *)ih_ring_entry;
+	uint32_t context_id = ihre->data & 0xfffffff;
+	unsigned int vmid  = (ihre->ring_id & 0x0000ff00) >> 8;
+	u32 pasid = (ihre->ring_id & 0xffff0000) >> 16;
+
+	if (pasid == 0)
+		return;
+
+	if (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE)
+		kfd_signal_event_interrupt(pasid, context_id, 28);
+	else if (ihre->source_id == CIK_INTSRC_SDMA_TRAP)
+		kfd_signal_event_interrupt(pasid, context_id, 28);
+	else if (ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG)
+		kfd_signal_event_interrupt(pasid, context_id & 0xff, 8);
+	else if (ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE)
+		kfd_signal_hw_exception_event(pasid);
+	else if (ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT ||
+		ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) {
+		struct kfd_vm_fault_info info;
+
+		kfd_smi_event_update_vmfault(dev, pasid);
+		kfd_dqm_evict_pasid(dev->dqm, pasid);
+
+		memset(&info, 0, sizeof(info));
+		amdgpu_amdkfd_gpuvm_get_vm_fault_info(dev->adev, &info);
+		if (!info.page_addr && !info.status)
+			return;
+
+		if (info.vmid == vmid)
+			kfd_signal_vm_fault_event(dev, pasid, &info, NULL);
+		else
+			kfd_signal_vm_fault_event(dev, pasid, NULL, NULL);
+	}
+}
+
+const struct kfd_event_interrupt_class event_interrupt_class_cik = {
+	.interrupt_isr = cik_event_interrupt_isr,
+	.interrupt_wq = cik_event_interrupt_wq,
+};
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_int.h b/drivers/gpu/drm/amd/amdkfd/cik_int.h
new file mode 100644
index 000000000..76f8677a7
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/cik_int.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef CIK_INT_H_INCLUDED
+#define CIK_INT_H_INCLUDED
+
+#include <linux/types.h>
+
+struct cik_ih_ring_entry {
+	uint32_t source_id;
+	uint32_t data;
+	uint32_t ring_id;
+	uint32_t reserved;
+};
+
+#define CIK_INTSRC_CP_END_OF_PIPE	0xB5
+#define CIK_INTSRC_CP_BAD_OPCODE	0xB7
+#define CIK_INTSRC_SDMA_TRAP		0xE0
+#define CIK_INTSRC_SQ_INTERRUPT_MSG	0xEF
+#define CIK_INTSRC_GFX_PAGE_INV_FAULT	0x92
+#define CIK_INTSRC_GFX_MEM_PROT_FAULT	0x93
+
+#endif
+
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_regs.h b/drivers/gpu/drm/amd/amdkfd/cik_regs.h
new file mode 100644
index 000000000..8e2a1663c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/cik_regs.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef CIK_REGS_H
+#define CIK_REGS_H
+
+/* if PTR32, these are the bases for scratch and lds */
+#define	PRIVATE_BASE(x)					((x) << 0) /* scratch */
+#define	SHARED_BASE(x)					((x) << 16) /* LDS */
+#define	PTR32						(1 << 0)
+#define	ALIGNMENT_MODE(x)				((x) << 2)
+#define	SH_MEM_ALIGNMENT_MODE_UNALIGNED			3
+#define	DEFAULT_MTYPE(x)				((x) << 4)
+#define	APE1_MTYPE(x)					((x) << 7)
+
+/* valid for both DEFAULT_MTYPE and APE1_MTYPE */
+#define	MTYPE_CACHED_NV					0
+#define	MTYPE_CACHED					1
+#define	MTYPE_NONCACHED					3
+
+#define	DEFAULT_CP_HQD_PERSISTENT_STATE			(0x33U << 8)
+#define	PRELOAD_REQ					(1 << 0)
+
+#define	MQD_CONTROL_PRIV_STATE_EN			(1U << 8)
+
+#define	DEFAULT_MIN_IB_AVAIL_SIZE			(3U << 20)
+
+#define	IB_ATC_EN					(1U << 23)
+
+#define	QUANTUM_EN					1U
+#define	QUANTUM_SCALE_1MS				(1U << 4)
+#define	QUANTUM_DURATION(x)				((x) << 8)
+
+#define	RPTR_BLOCK_SIZE(x)				((x) << 8)
+#define	MIN_AVAIL_SIZE(x)				((x) << 20)
+#define	DEFAULT_RPTR_BLOCK_SIZE				RPTR_BLOCK_SIZE(5)
+#define	DEFAULT_MIN_AVAIL_SIZE				MIN_AVAIL_SIZE(3)
+
+#define	PQ_ATC_EN					(1 << 23)
+#define	NO_UPDATE_RPTR					(1 << 27)
+
+#define	DOORBELL_OFFSET(x)				((x) << 2)
+#define	DOORBELL_EN					(1 << 30)
+
+#define	PRIV_STATE					(1 << 30)
+#define	KMD_QUEUE					(1 << 31)
+
+#define	AQL_ENABLE					1
+
+#define GRBM_GFX_INDEX					0x30800
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
new file mode 100644
index 000000000..d7cd5fa31
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -0,0 +1,3440 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+static const uint32_t cwsr_trap_gfx8_hex[] = {
+	0xbf820001, 0xbf820121,
+	0xb8f4f802, 0x89748674,
+	0xb8f5f803, 0x8675ff75,
+	0x00000400, 0xbf850017,
+	0xc00a1e37, 0x00000000,
+	0xbf8c007f, 0x87777978,
+	0xbf840005, 0x8f728374,
+	0xb972e0c2, 0xbf800002,
+	0xb9740002, 0xbe801d78,
+	0xb8f5f803, 0x8675ff75,
+	0x000001ff, 0xbf850002,
+	0x80708470, 0x82718071,
+	0x8671ff71, 0x0000ffff,
+	0x8f728374, 0xb972e0c2,
+	0xbf800002, 0xb9740002,
+	0xbe801f70, 0xbefa0080,
+	0xb97a0283, 0xbef60068,
+	0xbef70069, 0xb8fa1c07,
+	0x8e7a9c7a, 0x87717a71,
+	0xb8fa03c7, 0x8e7a9b7a,
+	0x87717a71, 0xb8faf807,
+	0x867aff7a, 0x00007fff,
+	0xb97af807, 0xbef2007e,
+	0xbef3007f, 0xbefe0180,
+	0xbf900004, 0x877a8474,
+	0xb97af802, 0xbf8e0002,
+	0xbf88fffe, 0xbef8007e,
+	0x8679ff7f, 0x0000ffff,
+	0x8779ff79, 0x00040000,
+	0xbefa0080, 0xbefb00ff,
+	0x00807fac, 0x867aff7f,
+	0x08000000, 0x8f7a837a,
+	0x877b7a7b, 0x867aff7f,
+	0x70000000, 0x8f7a817a,
+	0x877b7a7b, 0xbeef007c,
+	0xbeee0080, 0xb8ee2a05,
+	0x806e816e, 0x8e6e8a6e,
+	0xb8fa1605, 0x807a817a,
+	0x8e7a867a, 0x806e7a6e,
+	0xbefa0084, 0xbefa00ff,
+	0x01000000, 0xbefe007c,
+	0xbefc006e, 0xc0611bfc,
+	0x0000007c, 0x806e846e,
+	0xbefc007e, 0xbefe007c,
+	0xbefc006e, 0xc0611c3c,
+	0x0000007c, 0x806e846e,
+	0xbefc007e, 0xbefe007c,
+	0xbefc006e, 0xc0611c7c,
+	0x0000007c, 0x806e846e,
+	0xbefc007e, 0xbefe007c,
+	0xbefc006e, 0xc0611cbc,
+	0x0000007c, 0x806e846e,
+	0xbefc007e, 0xbefe007c,
+	0xbefc006e, 0xc0611cfc,
+	0x0000007c, 0x806e846e,
+	0xbefc007e, 0xbefe007c,
+	0xbefc006e, 0xc0611d3c,
+	0x0000007c, 0x806e846e,
+	0xbefc007e, 0xb8f5f803,
+	0xbefe007c, 0xbefc006e,
+	0xc0611d7c, 0x0000007c,
+	0x806e846e, 0xbefc007e,
+	0xbefe007c, 0xbefc006e,
+	0xc0611dbc, 0x0000007c,
+	0x806e846e, 0xbefc007e,
+	0xbefe007c, 0xbefc006e,
+	0xc0611dfc, 0x0000007c,
+	0x806e846e, 0xbefc007e,
+	0xb8eff801, 0xbefe007c,
+	0xbefc006e, 0xc0611bfc,
+	0x0000007c, 0x806e846e,
+	0xbefc007e, 0xbefe007c,
+	0xbefc006e, 0xc0611b3c,
+	0x0000007c, 0x806e846e,
+	0xbefc007e, 0xbefe007c,
+	0xbefc006e, 0xc0611b7c,
+	0x0000007c, 0x806e846e,
+	0xbefc007e, 0x867aff7f,
+	0x04000000, 0xbef30080,
+	0x8773737a, 0xb8ee2a05,
+	0x806e816e, 0x8e6e8a6e,
+	0xb8f51605, 0x80758175,
+	0x8e758475, 0x8e7a8275,
+	0xbefa00ff, 0x01000000,
+	0xbef60178, 0x80786e78,
+	0x82798079, 0xbefc0080,
+	0xbe802b00, 0xbe822b02,
+	0xbe842b04, 0xbe862b06,
+	0xbe882b08, 0xbe8a2b0a,
+	0xbe8c2b0c, 0xbe8e2b0e,
+	0xc06b003c, 0x00000000,
+	0xc06b013c, 0x00000010,
+	0xc06b023c, 0x00000020,
+	0xc06b033c, 0x00000030,
+	0x8078c078, 0x82798079,
+	0x807c907c, 0xbf0a757c,
+	0xbf85ffeb, 0xbef80176,
+	0xbeee0080, 0xbefe00c1,
+	0xbeff00c1, 0xbefa00ff,
+	0x01000000, 0xe0724000,
+	0x6e1e0000, 0xe0724100,
+	0x6e1e0100, 0xe0724200,
+	0x6e1e0200, 0xe0724300,
+	0x6e1e0300, 0xbefe00c1,
+	0xbeff00c1, 0xb8f54306,
+	0x8675c175, 0xbf84002c,
+	0xbf8a0000, 0x867aff73,
+	0x04000000, 0xbf840028,
+	0x8e758675, 0x8e758275,
+	0xbefa0075, 0xb8ee2a05,
+	0x806e816e, 0x8e6e8a6e,
+	0xb8fa1605, 0x807a817a,
+	0x8e7a867a, 0x806e7a6e,
+	0x806eff6e, 0x00000080,
+	0xbefa00ff, 0x01000000,
+	0xbefc0080, 0xd28c0002,
+	0x000100c1, 0xd28d0003,
+	0x000204c1, 0xd1060002,
+	0x00011103, 0x7e0602ff,
+	0x00000200, 0xbefc00ff,
+	0x00010000, 0xbe80007b,
+	0x867bff7b, 0xff7fffff,
+	0x877bff7b, 0x00058000,
+	0xd8ec0000, 0x00000002,
+	0xbf8c007f, 0xe0765000,
+	0x6e1e0002, 0x32040702,
+	0xd0c9006a, 0x0000eb02,
+	0xbf87fff7, 0xbefb0000,
+	0xbeee00ff, 0x00000400,
+	0xbefe00c1, 0xbeff00c1,
+	0xb8f52a05, 0x80758175,
+	0x8e758275, 0x8e7a8875,
+	0xbefa00ff, 0x01000000,
+	0xbefc0084, 0xbf0a757c,
+	0xbf840015, 0xbf11017c,
+	0x8075ff75, 0x00001000,
+	0x7e000300, 0x7e020301,
+	0x7e040302, 0x7e060303,
+	0xe0724000, 0x6e1e0000,
+	0xe0724100, 0x6e1e0100,
+	0xe0724200, 0x6e1e0200,
+	0xe0724300, 0x6e1e0300,
+	0x807c847c, 0x806eff6e,
+	0x00000400, 0xbf0a757c,
+	0xbf85ffef, 0xbf9c0000,
+	0xbf8200ce, 0xbef8007e,
+	0x8679ff7f, 0x0000ffff,
+	0x8779ff79, 0x00040000,
+	0xbefa0080, 0xbefb00ff,
+	0x00807fac, 0x8676ff7f,
+	0x08000000, 0x8f768376,
+	0x877b767b, 0x8676ff7f,
+	0x70000000, 0x8f768176,
+	0x877b767b, 0x8676ff7f,
+	0x04000000, 0xbf84001e,
+	0xbefe00c1, 0xbeff00c1,
+	0xb8f34306, 0x8673c173,
+	0xbf840019, 0x8e738673,
+	0x8e738273, 0xbefa0073,
+	0xb8f22a05, 0x80728172,
+	0x8e728a72, 0xb8f61605,
+	0x80768176, 0x8e768676,
+	0x80727672, 0x8072ff72,
+	0x00000080, 0xbefa00ff,
+	0x01000000, 0xbefc0080,
+	0xe0510000, 0x721e0000,
+	0xe0510100, 0x721e0000,
+	0x807cff7c, 0x00000200,
+	0x8072ff72, 0x00000200,
+	0xbf0a737c, 0xbf85fff6,
+	0xbef20080, 0xbefe00c1,
+	0xbeff00c1, 0xb8f32a05,
+	0x80738173, 0x8e738273,
+	0x8e7a8873, 0xbefa00ff,
+	0x01000000, 0xbef60072,
+	0x8072ff72, 0x00000400,
+	0xbefc0084, 0xbf11087c,
+	0x8073ff73, 0x00008000,
+	0xe0524000, 0x721e0000,
+	0xe0524100, 0x721e0100,
+	0xe0524200, 0x721e0200,
+	0xe0524300, 0x721e0300,
+	0xbf8c0f70, 0x7e000300,
+	0x7e020301, 0x7e040302,
+	0x7e060303, 0x807c847c,
+	0x8072ff72, 0x00000400,
+	0xbf0a737c, 0xbf85ffee,
+	0xbf9c0000, 0xe0524000,
+	0x761e0000, 0xe0524100,
+	0x761e0100, 0xe0524200,
+	0x761e0200, 0xe0524300,
+	0x761e0300, 0xbf8c0f70,
+	0xb8f22a05, 0x80728172,
+	0x8e728a72, 0xb8f61605,
+	0x80768176, 0x8e768676,
+	0x80727672, 0x80f2c072,
+	0xb8f31605, 0x80738173,
+	0x8e738473, 0x8e7a8273,
+	0xbefa00ff, 0x01000000,
+	0xbefc0073, 0xc031003c,
+	0x00000072, 0x80f2c072,
+	0xbf8c007f, 0x80fc907c,
+	0xbe802d00, 0xbe822d02,
+	0xbe842d04, 0xbe862d06,
+	0xbe882d08, 0xbe8a2d0a,
+	0xbe8c2d0c, 0xbe8e2d0e,
+	0xbf06807c, 0xbf84fff1,
+	0xb8f22a05, 0x80728172,
+	0x8e728a72, 0xb8f61605,
+	0x80768176, 0x8e768676,
+	0x80727672, 0xbefa0084,
+	0xbefa00ff, 0x01000000,
+	0xc0211cfc, 0x00000072,
+	0x80728472, 0xc0211c3c,
+	0x00000072, 0x80728472,
+	0xc0211c7c, 0x00000072,
+	0x80728472, 0xc0211bbc,
+	0x00000072, 0x80728472,
+	0xc0211bfc, 0x00000072,
+	0x80728472, 0xc0211d3c,
+	0x00000072, 0x80728472,
+	0xc0211d7c, 0x00000072,
+	0x80728472, 0xc0211a3c,
+	0x00000072, 0x80728472,
+	0xc0211a7c, 0x00000072,
+	0x80728472, 0xc0211dfc,
+	0x00000072, 0x80728472,
+	0xc0211b3c, 0x00000072,
+	0x80728472, 0xc0211b7c,
+	0x00000072, 0x80728472,
+	0xbf8c007f, 0xbefc0073,
+	0xbefe006e, 0xbeff006f,
+	0x867375ff, 0x000003ff,
+	0xb9734803, 0x867375ff,
+	0xfffff800, 0x8f738b73,
+	0xb973a2c3, 0xb977f801,
+	0x8673ff71, 0xf0000000,
+	0x8f739c73, 0x8e739073,
+	0xbef60080, 0x87767376,
+	0x8673ff71, 0x08000000,
+	0x8f739b73, 0x8e738f73,
+	0x87767376, 0x8673ff74,
+	0x00800000, 0x8f739773,
+	0xb976f807, 0x8671ff71,
+	0x0000ffff, 0x86fe7e7e,
+	0x86ea6a6a, 0x8f768374,
+	0xb976e0c2, 0xbf800002,
+	0xb9740002, 0xbf8a0000,
+	0x95807370, 0xbf810000,
+};
+
+
+static const uint32_t cwsr_trap_gfx9_hex[] = {
+	0xbf820001, 0xbf820258,
+	0xb8f8f802, 0x8978ff78,
+	0x00020006, 0xb8fbf803,
+	0x866eff78, 0x00002000,
+	0xbf840009, 0x866eff6d,
+	0x00ff0000, 0xbf85001e,
+	0x866eff7b, 0x00000400,
+	0xbf850055, 0xbf8e0010,
+	0xb8fbf803, 0xbf82fffa,
+	0x866eff7b, 0x03c00900,
+	0xbf850015, 0x866eff7b,
+	0x000071ff, 0xbf840008,
+	0x866fff7b, 0x00007080,
+	0xbf840001, 0xbeee1a87,
+	0xb8eff801, 0x8e6e8c6e,
+	0x866e6f6e, 0xbf85000a,
+	0x866eff6d, 0x00ff0000,
+	0xbf850007, 0xb8eef801,
+	0x866eff6e, 0x00000800,
+	0xbf850003, 0x866eff7b,
+	0x00000400, 0xbf85003a,
+	0xb8faf807, 0x867aff7a,
+	0x001f8000, 0x8e7a8b7a,
+	0x8977ff77, 0xfc000000,
+	0x87777a77, 0xba7ff807,
+	0x00000000, 0xb8faf812,
+	0xb8fbf813, 0x8efa887a,
+	0xbf0d8f7b, 0xbf840002,
+	0x877bff7b, 0xffff0000,
+	0xc0031bbd, 0x00000010,
+	0xbf8cc07f, 0x8e6e976e,
+	0x8977ff77, 0x00800000,
+	0x87776e77, 0xc0071bbd,
+	0x00000000, 0xbf8cc07f,
+	0xc0071ebd, 0x00000008,
+	0xbf8cc07f, 0x86ee6e6e,
+	0xbf840001, 0xbe801d6e,
+	0x866eff6d, 0x01ff0000,
+	0xbf850005, 0x8778ff78,
+	0x00002000, 0x80ec886c,
+	0x82ed806d, 0xbf820005,
+	0x866eff6d, 0x01000000,
+	0xbf850002, 0x806c846c,
+	0x826d806d, 0x866dff6d,
+	0x0000ffff, 0x8f7a8b77,
+	0x867aff7a, 0x001f8000,
+	0xb97af807, 0x86fe7e7e,
+	0x86ea6a6a, 0x8f6e8378,
+	0xb96ee0c2, 0xbf800002,
+	0xb9780002, 0xbe801f6c,
+	0x866dff6d, 0x0000ffff,
+	0xbefa0080, 0xb97a0283,
+	0xb8faf807, 0x867aff7a,
+	0x001f8000, 0x8e7a8b7a,
+	0x8977ff77, 0xfc000000,
+	0x87777a77, 0xba7ff807,
+	0x00000000, 0xbeee007e,
+	0xbeef007f, 0xbefe0180,
+	0xbf900004, 0x877a8478,
+	0xb97af802, 0xbf8e0002,
+	0xbf88fffe, 0xb8fa2a05,
+	0x807a817a, 0x8e7a8a7a,
+	0xb8fb1605, 0x807b817b,
+	0x8e7b867b, 0x807a7b7a,
+	0x807a7e7a, 0x827b807f,
+	0x867bff7b, 0x0000ffff,
+	0xc04b1c3d, 0x00000050,
+	0xbf8cc07f, 0xc04b1d3d,
+	0x00000060, 0xbf8cc07f,
+	0xc0431e7d, 0x00000074,
+	0xbf8cc07f, 0xbef4007e,
+	0x8675ff7f, 0x0000ffff,
+	0x8775ff75, 0x00040000,
+	0xbef60080, 0xbef700ff,
+	0x00807fac, 0xbef1007c,
+	0xbef00080, 0xb8f02a05,
+	0x80708170, 0x8e708a70,
+	0xb8fa1605, 0x807a817a,
+	0x8e7a867a, 0x80707a70,
+	0xbef60084, 0xbef600ff,
+	0x01000000, 0xbefe007c,
+	0xbefc0070, 0xc0611c7a,
+	0x0000007c, 0xbf8cc07f,
+	0x80708470, 0xbefc007e,
+	0xbefe007c, 0xbefc0070,
+	0xc0611b3a, 0x0000007c,
+	0xbf8cc07f, 0x80708470,
+	0xbefc007e, 0xbefe007c,
+	0xbefc0070, 0xc0611b7a,
+	0x0000007c, 0xbf8cc07f,
+	0x80708470, 0xbefc007e,
+	0xbefe007c, 0xbefc0070,
+	0xc0611bba, 0x0000007c,
+	0xbf8cc07f, 0x80708470,
+	0xbefc007e, 0xbefe007c,
+	0xbefc0070, 0xc0611bfa,
+	0x0000007c, 0xbf8cc07f,
+	0x80708470, 0xbefc007e,
+	0xbefe007c, 0xbefc0070,
+	0xc0611e3a, 0x0000007c,
+	0xbf8cc07f, 0x80708470,
+	0xbefc007e, 0xb8fbf803,
+	0xbefe007c, 0xbefc0070,
+	0xc0611efa, 0x0000007c,
+	0xbf8cc07f, 0x80708470,
+	0xbefc007e, 0xbefe007c,
+	0xbefc0070, 0xc0611a3a,
+	0x0000007c, 0xbf8cc07f,
+	0x80708470, 0xbefc007e,
+	0xbefe007c, 0xbefc0070,
+	0xc0611a7a, 0x0000007c,
+	0xbf8cc07f, 0x80708470,
+	0xbefc007e, 0xb8f1f801,
+	0xbefe007c, 0xbefc0070,
+	0xc0611c7a, 0x0000007c,
+	0xbf8cc07f, 0x80708470,
+	0xbefc007e, 0x867aff7f,
+	0x04000000, 0xbeef0080,
+	0x876f6f7a, 0xb8f02a05,
+	0x80708170, 0x8e708a70,
+	0xb8fb1605, 0x807b817b,
+	0x8e7b847b, 0x8e76827b,
+	0xbef600ff, 0x01000000,
+	0xbef20174, 0x80747074,
+	0x82758075, 0xbefc0080,
+	0xbf800000, 0xbe802b00,
+	0xbe822b02, 0xbe842b04,
+	0xbe862b06, 0xbe882b08,
+	0xbe8a2b0a, 0xbe8c2b0c,
+	0xbe8e2b0e, 0xc06b003a,
+	0x00000000, 0xbf8cc07f,
+	0xc06b013a, 0x00000010,
+	0xbf8cc07f, 0xc06b023a,
+	0x00000020, 0xbf8cc07f,
+	0xc06b033a, 0x00000030,
+	0xbf8cc07f, 0x8074c074,
+	0x82758075, 0x807c907c,
+	0xbf0a7b7c, 0xbf85ffe7,
+	0xbef40172, 0xbef00080,
+	0xbefe00c1, 0xbeff00c1,
+	0xbee80080, 0xbee90080,
+	0xbef600ff, 0x01000000,
+	0x867aff78, 0x00400000,
+	0xbf850003, 0xb8faf803,
+	0x897a7aff, 0x10000000,
+	0xbf85004d, 0xbe840080,
+	0xd2890000, 0x00000900,
+	0x80048104, 0xd2890001,
+	0x00000900, 0x80048104,
+	0xd2890002, 0x00000900,
+	0x80048104, 0xd2890003,
+	0x00000900, 0x80048104,
+	0xc069003a, 0x00000070,
+	0xbf8cc07f, 0x80709070,
+	0xbf06c004, 0xbf84ffee,
+	0xbe840080, 0xd2890000,
+	0x00000901, 0x80048104,
+	0xd2890001, 0x00000901,
+	0x80048104, 0xd2890002,
+	0x00000901, 0x80048104,
+	0xd2890003, 0x00000901,
+	0x80048104, 0xc069003a,
+	0x00000070, 0xbf8cc07f,
+	0x80709070, 0xbf06c004,
+	0xbf84ffee, 0xbe840080,
+	0xd2890000, 0x00000902,
+	0x80048104, 0xd2890001,
+	0x00000902, 0x80048104,
+	0xd2890002, 0x00000902,
+	0x80048104, 0xd2890003,
+	0x00000902, 0x80048104,
+	0xc069003a, 0x00000070,
+	0xbf8cc07f, 0x80709070,
+	0xbf06c004, 0xbf84ffee,
+	0xbe840080, 0xd2890000,
+	0x00000903, 0x80048104,
+	0xd2890001, 0x00000903,
+	0x80048104, 0xd2890002,
+	0x00000903, 0x80048104,
+	0xd2890003, 0x00000903,
+	0x80048104, 0xc069003a,
+	0x00000070, 0xbf8cc07f,
+	0x80709070, 0xbf06c004,
+	0xbf84ffee, 0xbf820008,
+	0xe0724000, 0x701d0000,
+	0xe0724100, 0x701d0100,
+	0xe0724200, 0x701d0200,
+	0xe0724300, 0x701d0300,
+	0xbefe00c1, 0xbeff00c1,
+	0xb8fb4306, 0x867bc17b,
+	0xbf840063, 0xbf8a0000,
+	0x867aff6f, 0x04000000,
+	0xbf84005f, 0x8e7b867b,
+	0x8e7b827b, 0xbef6007b,
+	0xb8f02a05, 0x80708170,
+	0x8e708a70, 0xb8fa1605,
+	0x807a817a, 0x8e7a867a,
+	0x80707a70, 0x8070ff70,
+	0x00000080, 0xbef600ff,
+	0x01000000, 0xbefc0080,
+	0xd28c0002, 0x000100c1,
+	0xd28d0003, 0x000204c1,
+	0x867aff78, 0x00400000,
+	0xbf850003, 0xb8faf803,
+	0x897a7aff, 0x10000000,
+	0xbf850030, 0x24040682,
+	0xd86e4000, 0x00000002,
+	0xbf8cc07f, 0xbe840080,
+	0xd2890000, 0x00000900,
+	0x80048104, 0xd2890001,
+	0x00000900, 0x80048104,
+	0xd2890002, 0x00000900,
+	0x80048104, 0xd2890003,
+	0x00000900, 0x80048104,
+	0xc069003a, 0x00000070,
+	0xbf8cc07f, 0x80709070,
+	0xbf06c004, 0xbf84ffee,
+	0xbe840080, 0xd2890000,
+	0x00000901, 0x80048104,
+	0xd2890001, 0x00000901,
+	0x80048104, 0xd2890002,
+	0x00000901, 0x80048104,
+	0xd2890003, 0x00000901,
+	0x80048104, 0xc069003a,
+	0x00000070, 0xbf8cc07f,
+	0x80709070, 0xbf06c004,
+	0xbf84ffee, 0x680404ff,
+	0x00000200, 0xd0c9006a,
+	0x0000f702, 0xbf87ffd2,
+	0xbf820015, 0xd1060002,
+	0x00011103, 0x7e0602ff,
+	0x00000200, 0xbefc00ff,
+	0x00010000, 0xbe800077,
+	0x8677ff77, 0xff7fffff,
+	0x8777ff77, 0x00058000,
+	0xd8ec0000, 0x00000002,
+	0xbf8cc07f, 0xe0765000,
+	0x701d0002, 0x68040702,
+	0xd0c9006a, 0x0000f702,
+	0xbf87fff7, 0xbef70000,
+	0xbef000ff, 0x00000400,
+	0xbefe00c1, 0xbeff00c1,
+	0xb8fb2a05, 0x807b817b,
+	0x8e7b827b, 0xbef600ff,
+	0x01000000, 0xbefc0084,
+	0xbf0a7b7c, 0xbf84006d,
+	0xbf11017c, 0x807bff7b,
+	0x00001000, 0x867aff78,
+	0x00400000, 0xbf850003,
+	0xb8faf803, 0x897a7aff,
+	0x10000000, 0xbf850051,
+	0xbe840080, 0xd2890000,
+	0x00000900, 0x80048104,
+	0xd2890001, 0x00000900,
+	0x80048104, 0xd2890002,
+	0x00000900, 0x80048104,
+	0xd2890003, 0x00000900,
+	0x80048104, 0xc069003a,
+	0x00000070, 0xbf8cc07f,
+	0x80709070, 0xbf06c004,
+	0xbf84ffee, 0xbe840080,
+	0xd2890000, 0x00000901,
+	0x80048104, 0xd2890001,
+	0x00000901, 0x80048104,
+	0xd2890002, 0x00000901,
+	0x80048104, 0xd2890003,
+	0x00000901, 0x80048104,
+	0xc069003a, 0x00000070,
+	0xbf8cc07f, 0x80709070,
+	0xbf06c004, 0xbf84ffee,
+	0xbe840080, 0xd2890000,
+	0x00000902, 0x80048104,
+	0xd2890001, 0x00000902,
+	0x80048104, 0xd2890002,
+	0x00000902, 0x80048104,
+	0xd2890003, 0x00000902,
+	0x80048104, 0xc069003a,
+	0x00000070, 0xbf8cc07f,
+	0x80709070, 0xbf06c004,
+	0xbf84ffee, 0xbe840080,
+	0xd2890000, 0x00000903,
+	0x80048104, 0xd2890001,
+	0x00000903, 0x80048104,
+	0xd2890002, 0x00000903,
+	0x80048104, 0xd2890003,
+	0x00000903, 0x80048104,
+	0xc069003a, 0x00000070,
+	0xbf8cc07f, 0x80709070,
+	0xbf06c004, 0xbf84ffee,
+	0x807c847c, 0xbf0a7b7c,
+	0xbf85ffb1, 0xbf9c0000,
+	0xbf820012, 0x7e000300,
+	0x7e020301, 0x7e040302,
+	0x7e060303, 0xe0724000,
+	0x701d0000, 0xe0724100,
+	0x701d0100, 0xe0724200,
+	0x701d0200, 0xe0724300,
+	0x701d0300, 0x807c847c,
+	0x8070ff70, 0x00000400,
+	0xbf0a7b7c, 0xbf85ffef,
+	0xbf9c0000, 0xbf8200c7,
+	0xbef4007e, 0x8675ff7f,
+	0x0000ffff, 0x8775ff75,
+	0x00040000, 0xbef60080,
+	0xbef700ff, 0x00807fac,
+	0x866eff7f, 0x04000000,
+	0xbf84001e, 0xbefe00c1,
+	0xbeff00c1, 0xb8ef4306,
+	0x866fc16f, 0xbf840019,
+	0x8e6f866f, 0x8e6f826f,
+	0xbef6006f, 0xb8f82a05,
+	0x80788178, 0x8e788a78,
+	0xb8ee1605, 0x806e816e,
+	0x8e6e866e, 0x80786e78,
+	0x8078ff78, 0x00000080,
+	0xbef600ff, 0x01000000,
+	0xbefc0080, 0xe0510000,
+	0x781d0000, 0xe0510100,
+	0x781d0000, 0x807cff7c,
+	0x00000200, 0x8078ff78,
+	0x00000200, 0xbf0a6f7c,
+	0xbf85fff6, 0xbefe00c1,
+	0xbeff00c1, 0xbef600ff,
+	0x01000000, 0xb8ef2a05,
+	0x806f816f, 0x8e6f826f,
+	0x806fff6f, 0x00008000,
+	0xbef80080, 0xbeee0078,
+	0x8078ff78, 0x00000400,
+	0xbefc0084, 0xbf11087c,
+	0xe0524000, 0x781d0000,
+	0xe0524100, 0x781d0100,
+	0xe0524200, 0x781d0200,
+	0xe0524300, 0x781d0300,
+	0xbf8c0f70, 0x7e000300,
+	0x7e020301, 0x7e040302,
+	0x7e060303, 0x807c847c,
+	0x8078ff78, 0x00000400,
+	0xbf0a6f7c, 0xbf85ffee,
+	0xbf9c0000, 0xe0524000,
+	0x6e1d0000, 0xe0524100,
+	0x6e1d0100, 0xe0524200,
+	0x6e1d0200, 0xe0524300,
+	0x6e1d0300, 0xbf8c0f70,
+	0xb8f82a05, 0x80788178,
+	0x8e788a78, 0xb8ee1605,
+	0x806e816e, 0x8e6e866e,
+	0x80786e78, 0x80f8c078,
+	0xb8ef1605, 0x806f816f,
+	0x8e6f846f, 0x8e76826f,
+	0xbef600ff, 0x01000000,
+	0xbefc006f, 0xc031003a,
+	0x00000078, 0x80f8c078,
+	0xbf8cc07f, 0x80fc907c,
+	0xbf800000, 0xbe802d00,
+	0xbe822d02, 0xbe842d04,
+	0xbe862d06, 0xbe882d08,
+	0xbe8a2d0a, 0xbe8c2d0c,
+	0xbe8e2d0e, 0xbf06807c,
+	0xbf84fff0, 0xb8f82a05,
+	0x80788178, 0x8e788a78,
+	0xb8ee1605, 0x806e816e,
+	0x8e6e866e, 0x80786e78,
+	0xbef60084, 0xbef600ff,
+	0x01000000, 0xc0211bfa,
+	0x00000078, 0x80788478,
+	0xc0211b3a, 0x00000078,
+	0x80788478, 0xc0211b7a,
+	0x00000078, 0x80788478,
+	0xc0211c3a, 0x00000078,
+	0x80788478, 0xc0211c7a,
+	0x00000078, 0x80788478,
+	0xc0211eba, 0x00000078,
+	0x80788478, 0xc0211efa,
+	0x00000078, 0x80788478,
+	0xc0211a3a, 0x00000078,
+	0x80788478, 0xc0211a7a,
+	0x00000078, 0x80788478,
+	0xc0211cfa, 0x00000078,
+	0x80788478, 0xbf8cc07f,
+	0xbefc006f, 0xbefe0070,
+	0xbeff0071, 0x866f7bff,
+	0x000003ff, 0xb96f4803,
+	0x866f7bff, 0xfffff800,
+	0x8f6f8b6f, 0xb96fa2c3,
+	0xb973f801, 0xb8ee2a05,
+	0x806e816e, 0x8e6e8a6e,
+	0xb8ef1605, 0x806f816f,
+	0x8e6f866f, 0x806e6f6e,
+	0x806e746e, 0x826f8075,
+	0x866fff6f, 0x0000ffff,
+	0xc00b1c37, 0x00000050,
+	0xc00b1d37, 0x00000060,
+	0xc0031e77, 0x00000074,
+	0xbf8cc07f, 0x8f6e8b77,
+	0x866eff6e, 0x001f8000,
+	0xb96ef807, 0x866dff6d,
+	0x0000ffff, 0x86fe7e7e,
+	0x86ea6a6a, 0x8f6e837a,
+	0xb96ee0c2, 0xbf800002,
+	0xb97a0002, 0xbf8a0000,
+	0xbe801f6c, 0xbf810000,
+};
+
+static const uint32_t cwsr_trap_nv1x_hex[] = {
+	0xbf820001, 0xbf8201f5,
+	0xb0804004, 0xb978f802,
+	0x8a78ff78, 0x00020006,
+	0xb97bf803, 0x876eff78,
+	0x00002000, 0xbf840009,
+	0x876eff6d, 0x00ff0000,
+	0xbf85001e, 0x876eff7b,
+	0x00000400, 0xbf85005b,
+	0xbf8e0010, 0xb97bf803,
+	0xbf82fffa, 0x876eff7b,
+	0x00000900, 0xbf850015,
+	0x876eff7b, 0x000071ff,
+	0xbf840008, 0x876fff7b,
+	0x00007080, 0xbf840001,
+	0xbeee1d87, 0xb96ff801,
+	0x8f6e8c6e, 0x876e6f6e,
+	0xbf85000a, 0x876eff6d,
+	0x00ff0000, 0xbf850007,
+	0xb96ef801, 0x876eff6e,
+	0x00000800, 0xbf850003,
+	0x876eff7b, 0x00000400,
+	0xbf850040, 0x8a77ff77,
+	0xff000000, 0xb97af807,
+	0x877bff7a, 0x02000000,
+	0x8f7b867b, 0x88777b77,
+	0x877bff7a, 0x003f8000,
+	0x8f7b897b, 0x88777b77,
+	0x8a7aff7a, 0x023f8000,
+	0xb9faf807, 0xb97af812,
+	0xb97bf813, 0x8ffa887a,
+	0xbf0d8f7b, 0xbf840002,
+	0x887bff7b, 0xffff0000,
+	0xf4011bbd, 0xfa000010,
+	0xbf8cc07f, 0x8f6e976e,
+	0x8a77ff77, 0x00800000,
+	0x88776e77, 0xf4051bbd,
+	0xfa000000, 0xbf8cc07f,
+	0xf4051ebd, 0xfa000008,
+	0xbf8cc07f, 0x87ee6e6e,
+	0xbf840001, 0xbe80206e,
+	0x876eff6d, 0x01ff0000,
+	0xbf850005, 0x8878ff78,
+	0x00002000, 0x80ec886c,
+	0x82ed806d, 0xbf820005,
+	0x876eff6d, 0x01000000,
+	0xbf850002, 0x806c846c,
+	0x826d806d, 0x876dff6d,
+	0x0000ffff, 0x907a8977,
+	0x877bff7a, 0x003f8000,
+	0x907a8677, 0x877aff7a,
+	0x02000000, 0x887a7b7a,
+	0xb9faf807, 0x87fe7e7e,
+	0x87ea6a6a, 0xb9f8f802,
+	0xbe80226c, 0x876dff6d,
+	0x0000ffff, 0xbefa0380,
+	0xb9fa0283, 0x8a77ff77,
+	0xff000000, 0xb97af807,
+	0x877bff7a, 0x02000000,
+	0x8f7b867b, 0x88777b77,
+	0x877bff7a, 0x003f8000,
+	0x8f7b897b, 0x88777b77,
+	0x8a7aff7a, 0x023f8000,
+	0xb9faf807, 0xbeee037e,
+	0xbeef037f, 0xbefe0480,
+	0xbf900004, 0xbf8e0002,
+	0xbf88fffe, 0x877aff7f,
+	0x04000000, 0x8f7a857a,
+	0x886d7a6d, 0xb97b02dc,
+	0x8f7b997b, 0xb97a3a05,
+	0x807a817a, 0xbf0d997b,
+	0xbf850002, 0x8f7a897a,
+	0xbf820001, 0x8f7a8a7a,
+	0xb97b1e06, 0x8f7b8a7b,
+	0x807a7b7a, 0x877bff7f,
+	0x0000ffff, 0x807aff7a,
+	0x00000200, 0x807a7e7a,
+	0x827b807b, 0xf4491c3d,
+	0xfa000050, 0xf4491d3d,
+	0xfa000060, 0xf4411e7d,
+	0xfa000074, 0xbef4037e,
+	0x8775ff7f, 0x0000ffff,
+	0x8875ff75, 0x00040000,
+	0xbef60380, 0xbef703ff,
+	0x10807fac, 0xbef1037c,
+	0xbef00380, 0xb97302dc,
+	0x8f739973, 0xb97bf816,
+	0xba80f816, 0x00000000,
+	0xbefe03c1, 0x907c9973,
+	0x877c817c, 0xbf06817c,
+	0xbf850002, 0xbeff0380,
+	0xbf820002, 0xbeff03c1,
+	0xbf82000b, 0xbef603ff,
+	0x01000000, 0xe0704000,
+	0x705d0000, 0xe0704080,
+	0x705d0100, 0xe0704100,
+	0x705d0200, 0xe0704180,
+	0x705d0300, 0xbf82000a,
+	0xbef603ff, 0x01000000,
+	0xe0704000, 0x705d0000,
+	0xe0704100, 0x705d0100,
+	0xe0704200, 0x705d0200,
+	0xe0704300, 0x705d0300,
+	0xb9703a05, 0x80708170,
+	0xbf0d9973, 0xbf850002,
+	0x8f708970, 0xbf820001,
+	0x8f708a70, 0xb97a1e06,
+	0x8f7a8a7a, 0x80707a70,
+	0x8070ff70, 0x00000200,
+	0xbef603ff, 0x01000000,
+	0xbefe037c, 0xbefc0370,
+	0xf4611c7a, 0xf8000000,
+	0x80708470, 0xbefc037e,
+	0xbefe037c, 0xbefc0370,
+	0xf4611b3a, 0xf8000000,
+	0x80708470, 0xbefc037e,
+	0x8a7aff6d, 0x80000000,
+	0xbefe037c, 0xbefc0370,
+	0xf4611eba, 0xf8000000,
+	0x80708470, 0xbefc037e,
+	0xbefe037c, 0xbefc0370,
+	0xf4611bba, 0xf8000000,
+	0x80708470, 0xbefc037e,
+	0xbefe037c, 0xbefc0370,
+	0xf4611bfa, 0xf8000000,
+	0x80708470, 0xbefc037e,
+	0xbefe037c, 0xbefc0370,
+	0xf4611e3a, 0xf8000000,
+	0x80708470, 0xbefc037e,
+	0xb97af803, 0xbefe037c,
+	0xbefc0370, 0xf4611eba,
+	0xf8000000, 0x80708470,
+	0xbefc037e, 0xbefe037c,
+	0xbefc0370, 0xf4611efa,
+	0xf8000000, 0x80708470,
+	0xbefc037e, 0xb971f801,
+	0xbefe037c, 0xbefc0370,
+	0xf4611c7a, 0xf8000000,
+	0x80708470, 0xbefc037e,
+	0xb971f814, 0xbefe037c,
+	0xbefc0370, 0xf4611c7a,
+	0xf8000000, 0x80708470,
+	0xbefc037e, 0xb971f815,
+	0xbefe037c, 0xbefc0370,
+	0xf4611c7a, 0xf8000000,
+	0x80708470, 0xbefc037e,
+	0xb9703a05, 0x80708170,
+	0xbf0d9973, 0xbf850002,
+	0x8f708970, 0xbf820001,
+	0x8f708a70, 0xb97a1e06,
+	0x8f7a8a7a, 0x80707a70,
+	0xbef603ff, 0x01000000,
+	0xbefb0374, 0x80747074,
+	0x82758075, 0xbefc0380,
+	0xbf800000, 0xbe802f00,
+	0xbe822f02, 0xbe842f04,
+	0xbe862f06, 0xbe882f08,
+	0xbe8a2f0a, 0xbe8c2f0c,
+	0xbe8e2f0e, 0xf469003a,
+	0xfa000000, 0xf469013a,
+	0xfa000010, 0xf469023a,
+	0xfa000020, 0xf469033a,
+	0xfa000030, 0x8074c074,
+	0x82758075, 0x807c907c,
+	0xbf0aff7c, 0x00000060,
+	0xbf85ffea, 0xbe802f00,
+	0xbe822f02, 0xbe842f04,
+	0xbe862f06, 0xbe882f08,
+	0xbe8a2f0a, 0xf469003a,
+	0xfa000000, 0xf469013a,
+	0xfa000010, 0xf469023a,
+	0xfa000020, 0x8074b074,
+	0x82758075, 0xbef4037b,
+	0xbefe03c1, 0x907c9973,
+	0x877c817c, 0xbf06817c,
+	0xbf850002, 0xbeff0380,
+	0xbf820001, 0xbeff03c1,
+	0xb97b4306, 0x877bc17b,
+	0xbf840044, 0xbf8a0000,
+	0x877aff6d, 0x80000000,
+	0xbf840040, 0x8f7b867b,
+	0x8f7b827b, 0xbef6037b,
+	0xb9703a05, 0x80708170,
+	0xbf0d9973, 0xbf850002,
+	0x8f708970, 0xbf820001,
+	0x8f708a70, 0xb97a1e06,
+	0x8f7a8a7a, 0x80707a70,
+	0x8070ff70, 0x00000200,
+	0x8070ff70, 0x00000080,
+	0xbef603ff, 0x01000000,
+	0xd7650000, 0x000100c1,
+	0xd7660000, 0x000200c1,
+	0x16000084, 0x907c9973,
+	0x877c817c, 0xbf06817c,
+	0xbefc0380, 0xbf850012,
+	0xbe8303ff, 0x00000080,
+	0xbf800000, 0xbf800000,
+	0xbf800000, 0xd8d80000,
+	0x01000000, 0xbf8c0000,
+	0xe0704000, 0x705d0100,
+	0x807c037c, 0x80700370,
+	0xd5250000, 0x0001ff00,
+	0x00000080, 0xbf0a7b7c,
+	0xbf85fff4, 0xbf820011,
+	0xbe8303ff, 0x00000100,
+	0xbf800000, 0xbf800000,
+	0xbf800000, 0xd8d80000,
+	0x01000000, 0xbf8c0000,
+	0xe0704000, 0x705d0100,
+	0x807c037c, 0x80700370,
+	0xd5250000, 0x0001ff00,
+	0x00000100, 0xbf0a7b7c,
+	0xbf85fff4, 0xbefe03c1,
+	0x907c9973, 0x877c817c,
+	0xbf06817c, 0xbf850004,
+	0xbef003ff, 0x00000200,
+	0xbeff0380, 0xbf820003,
+	0xbef003ff, 0x00000400,
+	0xbeff03c1, 0xb97b3a05,
+	0x807b817b, 0x8f7b827b,
+	0x907c9973, 0x877c817c,
+	0xbf06817c, 0xbf850017,
+	0xbef603ff, 0x01000000,
+	0xbefc0384, 0xbf0a7b7c,
+	0xbf840037, 0x7e008700,
+	0x7e028701, 0x7e048702,
+	0x7e068703, 0xe0704000,
+	0x705d0000, 0xe0704080,
+	0x705d0100, 0xe0704100,
+	0x705d0200, 0xe0704180,
+	0x705d0300, 0x807c847c,
+	0x8070ff70, 0x00000200,
+	0xbf0a7b7c, 0xbf85ffef,
+	0xbf820025, 0xbef603ff,
+	0x01000000, 0xbefc0384,
+	0xbf0a7b7c, 0xbf840011,
+	0x7e008700, 0x7e028701,
+	0x7e048702, 0x7e068703,
+	0xe0704000, 0x705d0000,
+	0xe0704100, 0x705d0100,
+	0xe0704200, 0x705d0200,
+	0xe0704300, 0x705d0300,
+	0x807c847c, 0x8070ff70,
+	0x00000400, 0xbf0a7b7c,
+	0xbf85ffef, 0xb97b1e06,
+	0x877bc17b, 0xbf84000c,
+	0x8f7b837b, 0x807b7c7b,
+	0xbefe03c1, 0xbeff0380,
+	0x7e008700, 0xe0704000,
+	0x705d0000, 0x807c817c,
+	0x8070ff70, 0x00000080,
+	0xbf0a7b7c, 0xbf85fff8,
+	0xbf820144, 0xbef4037e,
+	0x8775ff7f, 0x0000ffff,
+	0x8875ff75, 0x00040000,
+	0xbef60380, 0xbef703ff,
+	0x10807fac, 0xb97202dc,
+	0x8f729972, 0x876eff7f,
+	0x04000000, 0xbf840034,
+	0xbefe03c1, 0x907c9972,
+	0x877c817c, 0xbf06817c,
+	0xbf850002, 0xbeff0380,
+	0xbf820001, 0xbeff03c1,
+	0xb96f4306, 0x876fc16f,
+	0xbf840029, 0x8f6f866f,
+	0x8f6f826f, 0xbef6036f,
+	0xb9783a05, 0x80788178,
+	0xbf0d9972, 0xbf850002,
+	0x8f788978, 0xbf820001,
+	0x8f788a78, 0xb96e1e06,
+	0x8f6e8a6e, 0x80786e78,
+	0x8078ff78, 0x00000200,
+	0x8078ff78, 0x00000080,
+	0xbef603ff, 0x01000000,
+	0x907c9972, 0x877c817c,
+	0xbf06817c, 0xbefc0380,
+	0xbf850009, 0xe0310000,
+	0x781d0000, 0x807cff7c,
+	0x00000080, 0x8078ff78,
+	0x00000080, 0xbf0a6f7c,
+	0xbf85fff8, 0xbf820008,
+	0xe0310000, 0x781d0000,
+	0x807cff7c, 0x00000100,
+	0x8078ff78, 0x00000100,
+	0xbf0a6f7c, 0xbf85fff8,
+	0xbef80380, 0xbefe03c1,
+	0x907c9972, 0x877c817c,
+	0xbf06817c, 0xbf850002,
+	0xbeff0380, 0xbf820001,
+	0xbeff03c1, 0xb96f3a05,
+	0x806f816f, 0x8f6f826f,
+	0x907c9972, 0x877c817c,
+	0xbf06817c, 0xbf850024,
+	0xbef603ff, 0x01000000,
+	0xbeee0378, 0x8078ff78,
+	0x00000200, 0xbefc0384,
+	0xbf0a6f7c, 0xbf840050,
+	0xe0304000, 0x785d0000,
+	0xe0304080, 0x785d0100,
+	0xe0304100, 0x785d0200,
+	0xe0304180, 0x785d0300,
+	0xbf8c3f70, 0x7e008500,
+	0x7e028501, 0x7e048502,
+	0x7e068503, 0x807c847c,
+	0x8078ff78, 0x00000200,
+	0xbf0a6f7c, 0xbf85ffee,
+	0xe0304000, 0x6e5d0000,
+	0xe0304080, 0x6e5d0100,
+	0xe0304100, 0x6e5d0200,
+	0xe0304180, 0x6e5d0300,
+	0xbf8c3f70, 0xbf820034,
+	0xbef603ff, 0x01000000,
+	0xbeee0378, 0x8078ff78,
+	0x00000400, 0xbefc0384,
+	0xbf0a6f7c, 0xbf840012,
+	0xe0304000, 0x785d0000,
+	0xe0304100, 0x785d0100,
+	0xe0304200, 0x785d0200,
+	0xe0304300, 0x785d0300,
+	0xbf8c3f70, 0x7e008500,
+	0x7e028501, 0x7e048502,
+	0x7e068503, 0x807c847c,
+	0x8078ff78, 0x00000400,
+	0xbf0a6f7c, 0xbf85ffee,
+	0xb96f1e06, 0x876fc16f,
+	0xbf84000e, 0x8f6f836f,
+	0x806f7c6f, 0xbefe03c1,
+	0xbeff0380, 0xe0304000,
+	0x785d0000, 0xbf8c3f70,
+	0x7e008500, 0x807c817c,
+	0x8078ff78, 0x00000080,
+	0xbf0a6f7c, 0xbf85fff7,
+	0xbeff03c1, 0xe0304000,
+	0x6e5d0000, 0xe0304100,
+	0x6e5d0100, 0xe0304200,
+	0x6e5d0200, 0xe0304300,
+	0x6e5d0300, 0xbf8c3f70,
+	0xb9783a05, 0x80788178,
+	0xbf0d9972, 0xbf850002,
+	0x8f788978, 0xbf820001,
+	0x8f788a78, 0xb96e1e06,
+	0x8f6e8a6e, 0x80786e78,
+	0x8078ff78, 0x00000200,
+	0x80f8ff78, 0x00000050,
+	0xbef603ff, 0x01000000,
+	0xbefc03ff, 0x0000006c,
+	0x80f89078, 0xf429003a,
+	0xf0000000, 0xbf8cc07f,
+	0x80fc847c, 0xbf800000,
+	0xbe803100, 0xbe823102,
+	0x80f8a078, 0xf42d003a,
+	0xf0000000, 0xbf8cc07f,
+	0x80fc887c, 0xbf800000,
+	0xbe803100, 0xbe823102,
+	0xbe843104, 0xbe863106,
+	0x80f8c078, 0xf431003a,
+	0xf0000000, 0xbf8cc07f,
+	0x80fc907c, 0xbf800000,
+	0xbe803100, 0xbe823102,
+	0xbe843104, 0xbe863106,
+	0xbe883108, 0xbe8a310a,
+	0xbe8c310c, 0xbe8e310e,
+	0xbf06807c, 0xbf84fff0,
+	0xba80f801, 0x00000000,
+	0xbf8a0000, 0xb9783a05,
+	0x80788178, 0xbf0d9972,
+	0xbf850002, 0x8f788978,
+	0xbf820001, 0x8f788a78,
+	0xb96e1e06, 0x8f6e8a6e,
+	0x80786e78, 0x8078ff78,
+	0x00000200, 0xbef603ff,
+	0x01000000, 0xf4211bfa,
+	0xf0000000, 0x80788478,
+	0xf4211b3a, 0xf0000000,
+	0x80788478, 0xf4211b7a,
+	0xf0000000, 0x80788478,
+	0xf4211c3a, 0xf0000000,
+	0x80788478, 0xf4211c7a,
+	0xf0000000, 0x80788478,
+	0xf4211eba, 0xf0000000,
+	0x80788478, 0xf4211efa,
+	0xf0000000, 0x80788478,
+	0xf4211e7a, 0xf0000000,
+	0x80788478, 0xf4211cfa,
+	0xf0000000, 0x80788478,
+	0xf4211bba, 0xf0000000,
+	0x80788478, 0xbf8cc07f,
+	0xb9eef814, 0xf4211bba,
+	0xf0000000, 0x80788478,
+	0xbf8cc07f, 0xb9eef815,
+	0xbefc036f, 0xbefe0370,
+	0xbeff0371, 0x876f7bff,
+	0x000003ff, 0xb9ef4803,
+	0xb9f9f816, 0x876f7bff,
+	0xfffff800, 0x906f8b6f,
+	0xb9efa2c3, 0xb9f3f801,
+	0xb96e3a05, 0x806e816e,
+	0xbf0d9972, 0xbf850002,
+	0x8f6e896e, 0xbf820001,
+	0x8f6e8a6e, 0xb96f1e06,
+	0x8f6f8a6f, 0x806e6f6e,
+	0x806eff6e, 0x00000200,
+	0x806e746e, 0x826f8075,
+	0x876fff6f, 0x0000ffff,
+	0xf4091c37, 0xfa000050,
+	0xf4091d37, 0xfa000060,
+	0xf4011e77, 0xfa000074,
+	0xbf8cc07f, 0x906e8977,
+	0x876fff6e, 0x003f8000,
+	0x906e8677, 0x876eff6e,
+	0x02000000, 0x886e6f6e,
+	0xb9eef807, 0x876dff6d,
+	0x0000ffff, 0x87fe7e7e,
+	0x87ea6a6a, 0xb9faf802,
+	0xbe80226c, 0xbf810000,
+	0xbf9f0000, 0xbf9f0000,
+	0xbf9f0000, 0xbf9f0000,
+	0xbf9f0000, 0x00000000,
+};
+
+static const uint32_t cwsr_trap_arcturus_hex[] = {
+	0xbf820001, 0xbf8202d4,
+	0xb8f8f802, 0x8978ff78,
+	0x00020006, 0xb8fbf803,
+	0x866eff78, 0x00002000,
+	0xbf840009, 0x866eff6d,
+	0x00ff0000, 0xbf85001e,
+	0x866eff7b, 0x00000400,
+	0xbf850055, 0xbf8e0010,
+	0xb8fbf803, 0xbf82fffa,
+	0x866eff7b, 0x03c00900,
+	0xbf850015, 0x866eff7b,
+	0x000071ff, 0xbf840008,
+	0x866fff7b, 0x00007080,
+	0xbf840001, 0xbeee1a87,
+	0xb8eff801, 0x8e6e8c6e,
+	0x866e6f6e, 0xbf85000a,
+	0x866eff6d, 0x00ff0000,
+	0xbf850007, 0xb8eef801,
+	0x866eff6e, 0x00000800,
+	0xbf850003, 0x866eff7b,
+	0x00000400, 0xbf85003a,
+	0xb8faf807, 0x867aff7a,
+	0x001f8000, 0x8e7a8b7a,
+	0x8977ff77, 0xfc000000,
+	0x87777a77, 0xba7ff807,
+	0x00000000, 0xb8faf812,
+	0xb8fbf813, 0x8efa887a,
+	0xbf0d8f7b, 0xbf840002,
+	0x877bff7b, 0xffff0000,
+	0xc0031bbd, 0x00000010,
+	0xbf8cc07f, 0x8e6e976e,
+	0x8977ff77, 0x00800000,
+	0x87776e77, 0xc0071bbd,
+	0x00000000, 0xbf8cc07f,
+	0xc0071ebd, 0x00000008,
+	0xbf8cc07f, 0x86ee6e6e,
+	0xbf840001, 0xbe801d6e,
+	0x866eff6d, 0x01ff0000,
+	0xbf850005, 0x8778ff78,
+	0x00002000, 0x80ec886c,
+	0x82ed806d, 0xbf820005,
+	0x866eff6d, 0x01000000,
+	0xbf850002, 0x806c846c,
+	0x826d806d, 0x866dff6d,
+	0x0000ffff, 0x8f7a8b77,
+	0x867aff7a, 0x001f8000,
+	0xb97af807, 0x86fe7e7e,
+	0x86ea6a6a, 0x8f6e8378,
+	0xb96ee0c2, 0xbf800002,
+	0xb9780002, 0xbe801f6c,
+	0x866dff6d, 0x0000ffff,
+	0xbefa0080, 0xb97a0283,
+	0xb8faf807, 0x867aff7a,
+	0x001f8000, 0x8e7a8b7a,
+	0x8977ff77, 0xfc000000,
+	0x87777a77, 0xba7ff807,
+	0x00000000, 0xbeee007e,
+	0xbeef007f, 0xbefe0180,
+	0xbf900004, 0x877a8478,
+	0xb97af802, 0xbf8e0002,
+	0xbf88fffe, 0xb8fa2a05,
+	0x807a817a, 0x8e7a8a7a,
+	0x8e7a817a, 0xb8fb1605,
+	0x807b817b, 0x8e7b867b,
+	0x807a7b7a, 0x807a7e7a,
+	0x827b807f, 0x867bff7b,
+	0x0000ffff, 0xc04b1c3d,
+	0x00000050, 0xbf8cc07f,
+	0xc04b1d3d, 0x00000060,
+	0xbf8cc07f, 0xc0431e7d,
+	0x00000074, 0xbf8cc07f,
+	0xbef4007e, 0x8675ff7f,
+	0x0000ffff, 0x8775ff75,
+	0x00040000, 0xbef60080,
+	0xbef700ff, 0x00807fac,
+	0xbef1007c, 0xbef00080,
+	0xb8f02a05, 0x80708170,
+	0x8e708a70, 0x8e708170,
+	0xb8fa1605, 0x807a817a,
+	0x8e7a867a, 0x80707a70,
+	0xbef60084, 0xbef600ff,
+	0x01000000, 0xbefe007c,
+	0xbefc0070, 0xc0611c7a,
+	0x0000007c, 0xbf8cc07f,
+	0x80708470, 0xbefc007e,
+	0xbefe007c, 0xbefc0070,
+	0xc0611b3a, 0x0000007c,
+	0xbf8cc07f, 0x80708470,
+	0xbefc007e, 0xbefe007c,
+	0xbefc0070, 0xc0611b7a,
+	0x0000007c, 0xbf8cc07f,
+	0x80708470, 0xbefc007e,
+	0xbefe007c, 0xbefc0070,
+	0xc0611bba, 0x0000007c,
+	0xbf8cc07f, 0x80708470,
+	0xbefc007e, 0xbefe007c,
+	0xbefc0070, 0xc0611bfa,
+	0x0000007c, 0xbf8cc07f,
+	0x80708470, 0xbefc007e,
+	0xbefe007c, 0xbefc0070,
+	0xc0611e3a, 0x0000007c,
+	0xbf8cc07f, 0x80708470,
+	0xbefc007e, 0xb8fbf803,
+	0xbefe007c, 0xbefc0070,
+	0xc0611efa, 0x0000007c,
+	0xbf8cc07f, 0x80708470,
+	0xbefc007e, 0xbefe007c,
+	0xbefc0070, 0xc0611a3a,
+	0x0000007c, 0xbf8cc07f,
+	0x80708470, 0xbefc007e,
+	0xbefe007c, 0xbefc0070,
+	0xc0611a7a, 0x0000007c,
+	0xbf8cc07f, 0x80708470,
+	0xbefc007e, 0xb8f1f801,
+	0xbefe007c, 0xbefc0070,
+	0xc0611c7a, 0x0000007c,
+	0xbf8cc07f, 0x80708470,
+	0xbefc007e, 0x867aff7f,
+	0x04000000, 0xbeef0080,
+	0x876f6f7a, 0xb8f02a05,
+	0x80708170, 0x8e708a70,
+	0x8e708170, 0xb8fb1605,
+	0x807b817b, 0x8e7b847b,
+	0x8e76827b, 0xbef600ff,
+	0x01000000, 0xbef20174,
+	0x80747074, 0x82758075,
+	0xbefc0080, 0xbf800000,
+	0xbe802b00, 0xbe822b02,
+	0xbe842b04, 0xbe862b06,
+	0xbe882b08, 0xbe8a2b0a,
+	0xbe8c2b0c, 0xbe8e2b0e,
+	0xc06b003a, 0x00000000,
+	0xbf8cc07f, 0xc06b013a,
+	0x00000010, 0xbf8cc07f,
+	0xc06b023a, 0x00000020,
+	0xbf8cc07f, 0xc06b033a,
+	0x00000030, 0xbf8cc07f,
+	0x8074c074, 0x82758075,
+	0x807c907c, 0xbf0a7b7c,
+	0xbf85ffe7, 0xbef40172,
+	0xbef00080, 0xbefe00c1,
+	0xbeff00c1, 0xbee80080,
+	0xbee90080, 0xbef600ff,
+	0x01000000, 0x867aff78,
+	0x00400000, 0xbf850003,
+	0xb8faf803, 0x897a7aff,
+	0x10000000, 0xbf85004d,
+	0xbe840080, 0xd2890000,
+	0x00000900, 0x80048104,
+	0xd2890001, 0x00000900,
+	0x80048104, 0xd2890002,
+	0x00000900, 0x80048104,
+	0xd2890003, 0x00000900,
+	0x80048104, 0xc069003a,
+	0x00000070, 0xbf8cc07f,
+	0x80709070, 0xbf06c004,
+	0xbf84ffee, 0xbe840080,
+	0xd2890000, 0x00000901,
+	0x80048104, 0xd2890001,
+	0x00000901, 0x80048104,
+	0xd2890002, 0x00000901,
+	0x80048104, 0xd2890003,
+	0x00000901, 0x80048104,
+	0xc069003a, 0x00000070,
+	0xbf8cc07f, 0x80709070,
+	0xbf06c004, 0xbf84ffee,
+	0xbe840080, 0xd2890000,
+	0x00000902, 0x80048104,
+	0xd2890001, 0x00000902,
+	0x80048104, 0xd2890002,
+	0x00000902, 0x80048104,
+	0xd2890003, 0x00000902,
+	0x80048104, 0xc069003a,
+	0x00000070, 0xbf8cc07f,
+	0x80709070, 0xbf06c004,
+	0xbf84ffee, 0xbe840080,
+	0xd2890000, 0x00000903,
+	0x80048104, 0xd2890001,
+	0x00000903, 0x80048104,
+	0xd2890002, 0x00000903,
+	0x80048104, 0xd2890003,
+	0x00000903, 0x80048104,
+	0xc069003a, 0x00000070,
+	0xbf8cc07f, 0x80709070,
+	0xbf06c004, 0xbf84ffee,
+	0xbf820008, 0xe0724000,
+	0x701d0000, 0xe0724100,
+	0x701d0100, 0xe0724200,
+	0x701d0200, 0xe0724300,
+	0x701d0300, 0xbefe00c1,
+	0xbeff00c1, 0xb8fb4306,
+	0x867bc17b, 0xbf840064,
+	0xbf8a0000, 0x867aff6f,
+	0x04000000, 0xbf840060,
+	0x8e7b867b, 0x8e7b827b,
+	0xbef6007b, 0xb8f02a05,
+	0x80708170, 0x8e708a70,
+	0x8e708170, 0xb8fa1605,
+	0x807a817a, 0x8e7a867a,
+	0x80707a70, 0x8070ff70,
+	0x00000080, 0xbef600ff,
+	0x01000000, 0xbefc0080,
+	0xd28c0002, 0x000100c1,
+	0xd28d0003, 0x000204c1,
+	0x867aff78, 0x00400000,
+	0xbf850003, 0xb8faf803,
+	0x897a7aff, 0x10000000,
+	0xbf850030, 0x24040682,
+	0xd86e4000, 0x00000002,
+	0xbf8cc07f, 0xbe840080,
+	0xd2890000, 0x00000900,
+	0x80048104, 0xd2890001,
+	0x00000900, 0x80048104,
+	0xd2890002, 0x00000900,
+	0x80048104, 0xd2890003,
+	0x00000900, 0x80048104,
+	0xc069003a, 0x00000070,
+	0xbf8cc07f, 0x80709070,
+	0xbf06c004, 0xbf84ffee,
+	0xbe840080, 0xd2890000,
+	0x00000901, 0x80048104,
+	0xd2890001, 0x00000901,
+	0x80048104, 0xd2890002,
+	0x00000901, 0x80048104,
+	0xd2890003, 0x00000901,
+	0x80048104, 0xc069003a,
+	0x00000070, 0xbf8cc07f,
+	0x80709070, 0xbf06c004,
+	0xbf84ffee, 0x680404ff,
+	0x00000200, 0xd0c9006a,
+	0x0000f702, 0xbf87ffd2,
+	0xbf820015, 0xd1060002,
+	0x00011103, 0x7e0602ff,
+	0x00000200, 0xbefc00ff,
+	0x00010000, 0xbe800077,
+	0x8677ff77, 0xff7fffff,
+	0x8777ff77, 0x00058000,
+	0xd8ec0000, 0x00000002,
+	0xbf8cc07f, 0xe0765000,
+	0x701d0002, 0x68040702,
+	0xd0c9006a, 0x0000f702,
+	0xbf87fff7, 0xbef70000,
+	0xbef000ff, 0x00000400,
+	0xbefe00c1, 0xbeff00c1,
+	0xb8fb2a05, 0x807b817b,
+	0x8e7b827b, 0xbef600ff,
+	0x01000000, 0xbefc0084,
+	0xbf0a7b7c, 0xbf84006d,
+	0xbf11017c, 0x807bff7b,
+	0x00001000, 0x867aff78,
+	0x00400000, 0xbf850003,
+	0xb8faf803, 0x897a7aff,
+	0x10000000, 0xbf850051,
+	0xbe840080, 0xd2890000,
+	0x00000900, 0x80048104,
+	0xd2890001, 0x00000900,
+	0x80048104, 0xd2890002,
+	0x00000900, 0x80048104,
+	0xd2890003, 0x00000900,
+	0x80048104, 0xc069003a,
+	0x00000070, 0xbf8cc07f,
+	0x80709070, 0xbf06c004,
+	0xbf84ffee, 0xbe840080,
+	0xd2890000, 0x00000901,
+	0x80048104, 0xd2890001,
+	0x00000901, 0x80048104,
+	0xd2890002, 0x00000901,
+	0x80048104, 0xd2890003,
+	0x00000901, 0x80048104,
+	0xc069003a, 0x00000070,
+	0xbf8cc07f, 0x80709070,
+	0xbf06c004, 0xbf84ffee,
+	0xbe840080, 0xd2890000,
+	0x00000902, 0x80048104,
+	0xd2890001, 0x00000902,
+	0x80048104, 0xd2890002,
+	0x00000902, 0x80048104,
+	0xd2890003, 0x00000902,
+	0x80048104, 0xc069003a,
+	0x00000070, 0xbf8cc07f,
+	0x80709070, 0xbf06c004,
+	0xbf84ffee, 0xbe840080,
+	0xd2890000, 0x00000903,
+	0x80048104, 0xd2890001,
+	0x00000903, 0x80048104,
+	0xd2890002, 0x00000903,
+	0x80048104, 0xd2890003,
+	0x00000903, 0x80048104,
+	0xc069003a, 0x00000070,
+	0xbf8cc07f, 0x80709070,
+	0xbf06c004, 0xbf84ffee,
+	0x807c847c, 0xbf0a7b7c,
+	0xbf85ffb1, 0xbf9c0000,
+	0xbf820012, 0x7e000300,
+	0x7e020301, 0x7e040302,
+	0x7e060303, 0xe0724000,
+	0x701d0000, 0xe0724100,
+	0x701d0100, 0xe0724200,
+	0x701d0200, 0xe0724300,
+	0x701d0300, 0x807c847c,
+	0x8070ff70, 0x00000400,
+	0xbf0a7b7c, 0xbf85ffef,
+	0xbf9c0000, 0xbefc0080,
+	0xbf11017c, 0x867aff78,
+	0x00400000, 0xbf850003,
+	0xb8faf803, 0x897a7aff,
+	0x10000000, 0xbf850059,
+	0xd3d84000, 0x18000100,
+	0xd3d84001, 0x18000101,
+	0xd3d84002, 0x18000102,
+	0xd3d84003, 0x18000103,
+	0xbe840080, 0xd2890000,
+	0x00000900, 0x80048104,
+	0xd2890001, 0x00000900,
+	0x80048104, 0xd2890002,
+	0x00000900, 0x80048104,
+	0xd2890003, 0x00000900,
+	0x80048104, 0xc069003a,
+	0x00000070, 0xbf8cc07f,
+	0x80709070, 0xbf06c004,
+	0xbf84ffee, 0xbe840080,
+	0xd2890000, 0x00000901,
+	0x80048104, 0xd2890001,
+	0x00000901, 0x80048104,
+	0xd2890002, 0x00000901,
+	0x80048104, 0xd2890003,
+	0x00000901, 0x80048104,
+	0xc069003a, 0x00000070,
+	0xbf8cc07f, 0x80709070,
+	0xbf06c004, 0xbf84ffee,
+	0xbe840080, 0xd2890000,
+	0x00000902, 0x80048104,
+	0xd2890001, 0x00000902,
+	0x80048104, 0xd2890002,
+	0x00000902, 0x80048104,
+	0xd2890003, 0x00000902,
+	0x80048104, 0xc069003a,
+	0x00000070, 0xbf8cc07f,
+	0x80709070, 0xbf06c004,
+	0xbf84ffee, 0xbe840080,
+	0xd2890000, 0x00000903,
+	0x80048104, 0xd2890001,
+	0x00000903, 0x80048104,
+	0xd2890002, 0x00000903,
+	0x80048104, 0xd2890003,
+	0x00000903, 0x80048104,
+	0xc069003a, 0x00000070,
+	0xbf8cc07f, 0x80709070,
+	0xbf06c004, 0xbf84ffee,
+	0x807c847c, 0xbf0a7b7c,
+	0xbf85ffa9, 0xbf9c0000,
+	0xbf820016, 0xd3d84000,
+	0x18000100, 0xd3d84001,
+	0x18000101, 0xd3d84002,
+	0x18000102, 0xd3d84003,
+	0x18000103, 0xe0724000,
+	0x701d0000, 0xe0724100,
+	0x701d0100, 0xe0724200,
+	0x701d0200, 0xe0724300,
+	0x701d0300, 0x807c847c,
+	0x8070ff70, 0x00000400,
+	0xbf0a7b7c, 0xbf85ffeb,
+	0xbf9c0000, 0xbf8200e3,
+	0xbef4007e, 0x8675ff7f,
+	0x0000ffff, 0x8775ff75,
+	0x00040000, 0xbef60080,
+	0xbef700ff, 0x00807fac,
+	0x866eff7f, 0x04000000,
+	0xbf84001f, 0xbefe00c1,
+	0xbeff00c1, 0xb8ef4306,
+	0x866fc16f, 0xbf84001a,
+	0x8e6f866f, 0x8e6f826f,
+	0xbef6006f, 0xb8f82a05,
+	0x80788178, 0x8e788a78,
+	0x8e788178, 0xb8ee1605,
+	0x806e816e, 0x8e6e866e,
+	0x80786e78, 0x8078ff78,
+	0x00000080, 0xbef600ff,
+	0x01000000, 0xbefc0080,
+	0xe0510000, 0x781d0000,
+	0xe0510100, 0x781d0000,
+	0x807cff7c, 0x00000200,
+	0x8078ff78, 0x00000200,
+	0xbf0a6f7c, 0xbf85fff6,
+	0xbefe00c1, 0xbeff00c1,
+	0xbef600ff, 0x01000000,
+	0xb8ef2a05, 0x806f816f,
+	0x8e6f826f, 0x806fff6f,
+	0x00008000, 0xbef80080,
+	0xbeee0078, 0x8078ff78,
+	0x00000400, 0xbefc0084,
+	0xbf11087c, 0xe0524000,
+	0x781d0000, 0xe0524100,
+	0x781d0100, 0xe0524200,
+	0x781d0200, 0xe0524300,
+	0x781d0300, 0xbf8c0f70,
+	0x7e000300, 0x7e020301,
+	0x7e040302, 0x7e060303,
+	0x807c847c, 0x8078ff78,
+	0x00000400, 0xbf0a6f7c,
+	0xbf85ffee, 0xbefc0080,
+	0xbf11087c, 0xe0524000,
+	0x781d0000, 0xe0524100,
+	0x781d0100, 0xe0524200,
+	0x781d0200, 0xe0524300,
+	0x781d0300, 0xbf8c0f70,
+	0xd3d94000, 0x18000100,
+	0xd3d94001, 0x18000101,
+	0xd3d94002, 0x18000102,
+	0xd3d94003, 0x18000103,
+	0x807c847c, 0x8078ff78,
+	0x00000400, 0xbf0a6f7c,
+	0xbf85ffea, 0xbf9c0000,
+	0xe0524000, 0x6e1d0000,
+	0xe0524100, 0x6e1d0100,
+	0xe0524200, 0x6e1d0200,
+	0xe0524300, 0x6e1d0300,
+	0xbf8c0f70, 0xb8f82a05,
+	0x80788178, 0x8e788a78,
+	0x8e788178, 0xb8ee1605,
+	0x806e816e, 0x8e6e866e,
+	0x80786e78, 0x80f8c078,
+	0xb8ef1605, 0x806f816f,
+	0x8e6f846f, 0x8e76826f,
+	0xbef600ff, 0x01000000,
+	0xbefc006f, 0xc031003a,
+	0x00000078, 0x80f8c078,
+	0xbf8cc07f, 0x80fc907c,
+	0xbf800000, 0xbe802d00,
+	0xbe822d02, 0xbe842d04,
+	0xbe862d06, 0xbe882d08,
+	0xbe8a2d0a, 0xbe8c2d0c,
+	0xbe8e2d0e, 0xbf06807c,
+	0xbf84fff0, 0xb8f82a05,
+	0x80788178, 0x8e788a78,
+	0x8e788178, 0xb8ee1605,
+	0x806e816e, 0x8e6e866e,
+	0x80786e78, 0xbef60084,
+	0xbef600ff, 0x01000000,
+	0xc0211bfa, 0x00000078,
+	0x80788478, 0xc0211b3a,
+	0x00000078, 0x80788478,
+	0xc0211b7a, 0x00000078,
+	0x80788478, 0xc0211c3a,
+	0x00000078, 0x80788478,
+	0xc0211c7a, 0x00000078,
+	0x80788478, 0xc0211eba,
+	0x00000078, 0x80788478,
+	0xc0211efa, 0x00000078,
+	0x80788478, 0xc0211a3a,
+	0x00000078, 0x80788478,
+	0xc0211a7a, 0x00000078,
+	0x80788478, 0xc0211cfa,
+	0x00000078, 0x80788478,
+	0xbf8cc07f, 0xbefc006f,
+	0xbefe0070, 0xbeff0071,
+	0x866f7bff, 0x000003ff,
+	0xb96f4803, 0x866f7bff,
+	0xfffff800, 0x8f6f8b6f,
+	0xb96fa2c3, 0xb973f801,
+	0xb8ee2a05, 0x806e816e,
+	0x8e6e8a6e, 0x8e6e816e,
+	0xb8ef1605, 0x806f816f,
+	0x8e6f866f, 0x806e6f6e,
+	0x806e746e, 0x826f8075,
+	0x866fff6f, 0x0000ffff,
+	0xc00b1c37, 0x00000050,
+	0xc00b1d37, 0x00000060,
+	0xc0031e77, 0x00000074,
+	0xbf8cc07f, 0x8f6e8b77,
+	0x866eff6e, 0x001f8000,
+	0xb96ef807, 0x866dff6d,
+	0x0000ffff, 0x86fe7e7e,
+	0x86ea6a6a, 0x8f6e837a,
+	0xb96ee0c2, 0xbf800002,
+	0xb97a0002, 0xbf8a0000,
+	0xbe801f6c, 0xbf810000,
+};
+
+static const uint32_t cwsr_trap_aldebaran_hex[] = {
+	0xbf820001, 0xbf8202df,
+	0xb8f8f802, 0x8978ff78,
+	0x00020006, 0xb8fbf803,
+	0x866eff78, 0x00002000,
+	0xbf840009, 0x866eff6d,
+	0x00ff0000, 0xbf85001e,
+	0x866eff7b, 0x00000400,
+	0xbf850055, 0xbf8e0010,
+	0xb8fbf803, 0xbf82fffa,
+	0x866eff7b, 0x03c00900,
+	0xbf850015, 0x866eff7b,
+	0x000071ff, 0xbf840008,
+	0x866fff7b, 0x00007080,
+	0xbf840001, 0xbeee1a87,
+	0xb8eff801, 0x8e6e8c6e,
+	0x866e6f6e, 0xbf85000a,
+	0x866eff6d, 0x00ff0000,
+	0xbf850007, 0xb8eef801,
+	0x866eff6e, 0x00000800,
+	0xbf850003, 0x866eff7b,
+	0x00000400, 0xbf85003a,
+	0xb8faf807, 0x867aff7a,
+	0x001f8000, 0x8e7a8b7a,
+	0x8977ff77, 0xfc000000,
+	0x87777a77, 0xba7ff807,
+	0x00000000, 0xb8faf812,
+	0xb8fbf813, 0x8efa887a,
+	0xbf0d8f7b, 0xbf840002,
+	0x877bff7b, 0xffff0000,
+	0xc0031bbd, 0x00000010,
+	0xbf8cc07f, 0x8e6e976e,
+	0x8977ff77, 0x00800000,
+	0x87776e77, 0xc0071bbd,
+	0x00000000, 0xbf8cc07f,
+	0xc0071ebd, 0x00000008,
+	0xbf8cc07f, 0x86ee6e6e,
+	0xbf840001, 0xbe801d6e,
+	0x866eff6d, 0x01ff0000,
+	0xbf850005, 0x8778ff78,
+	0x00002000, 0x80ec886c,
+	0x82ed806d, 0xbf820005,
+	0x866eff6d, 0x01000000,
+	0xbf850002, 0x806c846c,
+	0x826d806d, 0x866dff6d,
+	0x0000ffff, 0x8f7a8b77,
+	0x867aff7a, 0x001f8000,
+	0xb97af807, 0x86fe7e7e,
+	0x86ea6a6a, 0x8f6e8378,
+	0xb96ee0c2, 0xbf800002,
+	0xb9780002, 0xbe801f6c,
+	0x866dff6d, 0x0000ffff,
+	0xbefa0080, 0xb97a0283,
+	0xb8faf807, 0x867aff7a,
+	0x001f8000, 0x8e7a8b7a,
+	0x8977ff77, 0xfc000000,
+	0x87777a77, 0xba7ff807,
+	0x00000000, 0xbeee007e,
+	0xbeef007f, 0xbefe0180,
+	0xbf900004, 0x877a8478,
+	0xb97af802, 0xbf8e0002,
+	0xbf88fffe, 0xb8fa2985,
+	0x807a817a, 0x8e7a8a7a,
+	0x8e7a817a, 0xb8fb1605,
+	0x807b817b, 0x8e7b867b,
+	0x807a7b7a, 0x807a7e7a,
+	0x827b807f, 0x867bff7b,
+	0x0000ffff, 0xc04b1c3d,
+	0x00000050, 0xbf8cc07f,
+	0xc04b1d3d, 0x00000060,
+	0xbf8cc07f, 0xc0431e7d,
+	0x00000074, 0xbf8cc07f,
+	0xbef4007e, 0x8675ff7f,
+	0x0000ffff, 0x8775ff75,
+	0x00040000, 0xbef60080,
+	0xbef700ff, 0x00807fac,
+	0xbef1007c, 0xbef00080,
+	0xb8f02985, 0x80708170,
+	0x8e708a70, 0x8e708170,
+	0xb8fa1605, 0x807a817a,
+	0x8e7a867a, 0x80707a70,
+	0xbef60084, 0xbef600ff,
+	0x01000000, 0xbefe007c,
+	0xbefc0070, 0xc0611c7a,
+	0x0000007c, 0xbf8cc07f,
+	0x80708470, 0xbefc007e,
+	0xbefe007c, 0xbefc0070,
+	0xc0611b3a, 0x0000007c,
+	0xbf8cc07f, 0x80708470,
+	0xbefc007e, 0xbefe007c,
+	0xbefc0070, 0xc0611b7a,
+	0x0000007c, 0xbf8cc07f,
+	0x80708470, 0xbefc007e,
+	0xbefe007c, 0xbefc0070,
+	0xc0611bba, 0x0000007c,
+	0xbf8cc07f, 0x80708470,
+	0xbefc007e, 0xbefe007c,
+	0xbefc0070, 0xc0611bfa,
+	0x0000007c, 0xbf8cc07f,
+	0x80708470, 0xbefc007e,
+	0xbefe007c, 0xbefc0070,
+	0xc0611e3a, 0x0000007c,
+	0xbf8cc07f, 0x80708470,
+	0xbefc007e, 0xb8fbf803,
+	0xbefe007c, 0xbefc0070,
+	0xc0611efa, 0x0000007c,
+	0xbf8cc07f, 0x80708470,
+	0xbefc007e, 0xbefe007c,
+	0xbefc0070, 0xc0611a3a,
+	0x0000007c, 0xbf8cc07f,
+	0x80708470, 0xbefc007e,
+	0xbefe007c, 0xbefc0070,
+	0xc0611a7a, 0x0000007c,
+	0xbf8cc07f, 0x80708470,
+	0xbefc007e, 0xb8f1f801,
+	0xbefe007c, 0xbefc0070,
+	0xc0611c7a, 0x0000007c,
+	0xbf8cc07f, 0x80708470,
+	0xbefc007e, 0x867aff7f,
+	0x04000000, 0xbeef0080,
+	0x876f6f7a, 0xb8f02985,
+	0x80708170, 0x8e708a70,
+	0x8e708170, 0xb8fb1605,
+	0x807b817b, 0x8e7b847b,
+	0x8e76827b, 0xbef600ff,
+	0x01000000, 0xbef20174,
+	0x80747074, 0x82758075,
+	0xbefc0080, 0xbf800000,
+	0xbe802b00, 0xbe822b02,
+	0xbe842b04, 0xbe862b06,
+	0xbe882b08, 0xbe8a2b0a,
+	0xbe8c2b0c, 0xbe8e2b0e,
+	0xc06b003a, 0x00000000,
+	0xbf8cc07f, 0xc06b013a,
+	0x00000010, 0xbf8cc07f,
+	0xc06b023a, 0x00000020,
+	0xbf8cc07f, 0xc06b033a,
+	0x00000030, 0xbf8cc07f,
+	0x8074c074, 0x82758075,
+	0x807c907c, 0xbf0a7b7c,
+	0xbf85ffe7, 0xbef40172,
+	0xbef00080, 0xbefe00c1,
+	0xbeff00c1, 0xbee80080,
+	0xbee90080, 0xbef600ff,
+	0x01000000, 0x867aff78,
+	0x00400000, 0xbf850003,
+	0xb8faf803, 0x897a7aff,
+	0x10000000, 0xbf85004d,
+	0xbe840080, 0xd2890000,
+	0x00000900, 0x80048104,
+	0xd2890001, 0x00000900,
+	0x80048104, 0xd2890002,
+	0x00000900, 0x80048104,
+	0xd2890003, 0x00000900,
+	0x80048104, 0xc069003a,
+	0x00000070, 0xbf8cc07f,
+	0x80709070, 0xbf06c004,
+	0xbf84ffee, 0xbe840080,
+	0xd2890000, 0x00000901,
+	0x80048104, 0xd2890001,
+	0x00000901, 0x80048104,
+	0xd2890002, 0x00000901,
+	0x80048104, 0xd2890003,
+	0x00000901, 0x80048104,
+	0xc069003a, 0x00000070,
+	0xbf8cc07f, 0x80709070,
+	0xbf06c004, 0xbf84ffee,
+	0xbe840080, 0xd2890000,
+	0x00000902, 0x80048104,
+	0xd2890001, 0x00000902,
+	0x80048104, 0xd2890002,
+	0x00000902, 0x80048104,
+	0xd2890003, 0x00000902,
+	0x80048104, 0xc069003a,
+	0x00000070, 0xbf8cc07f,
+	0x80709070, 0xbf06c004,
+	0xbf84ffee, 0xbe840080,
+	0xd2890000, 0x00000903,
+	0x80048104, 0xd2890001,
+	0x00000903, 0x80048104,
+	0xd2890002, 0x00000903,
+	0x80048104, 0xd2890003,
+	0x00000903, 0x80048104,
+	0xc069003a, 0x00000070,
+	0xbf8cc07f, 0x80709070,
+	0xbf06c004, 0xbf84ffee,
+	0xbf820008, 0xe0724000,
+	0x701d0000, 0xe0724100,
+	0x701d0100, 0xe0724200,
+	0x701d0200, 0xe0724300,
+	0x701d0300, 0xbefe00c1,
+	0xbeff00c1, 0xb8fb4306,
+	0x867bc17b, 0xbf840064,
+	0xbf8a0000, 0x867aff6f,
+	0x04000000, 0xbf840060,
+	0x8e7b867b, 0x8e7b827b,
+	0xbef6007b, 0xb8f02985,
+	0x80708170, 0x8e708a70,
+	0x8e708170, 0xb8fa1605,
+	0x807a817a, 0x8e7a867a,
+	0x80707a70, 0x8070ff70,
+	0x00000080, 0xbef600ff,
+	0x01000000, 0xbefc0080,
+	0xd28c0002, 0x000100c1,
+	0xd28d0003, 0x000204c1,
+	0x867aff78, 0x00400000,
+	0xbf850003, 0xb8faf803,
+	0x897a7aff, 0x10000000,
+	0xbf850030, 0x24040682,
+	0xd86e4000, 0x00000002,
+	0xbf8cc07f, 0xbe840080,
+	0xd2890000, 0x00000900,
+	0x80048104, 0xd2890001,
+	0x00000900, 0x80048104,
+	0xd2890002, 0x00000900,
+	0x80048104, 0xd2890003,
+	0x00000900, 0x80048104,
+	0xc069003a, 0x00000070,
+	0xbf8cc07f, 0x80709070,
+	0xbf06c004, 0xbf84ffee,
+	0xbe840080, 0xd2890000,
+	0x00000901, 0x80048104,
+	0xd2890001, 0x00000901,
+	0x80048104, 0xd2890002,
+	0x00000901, 0x80048104,
+	0xd2890003, 0x00000901,
+	0x80048104, 0xc069003a,
+	0x00000070, 0xbf8cc07f,
+	0x80709070, 0xbf06c004,
+	0xbf84ffee, 0x680404ff,
+	0x00000200, 0xd0c9006a,
+	0x0000f702, 0xbf87ffd2,
+	0xbf820015, 0xd1060002,
+	0x00011103, 0x7e0602ff,
+	0x00000200, 0xbefc00ff,
+	0x00010000, 0xbe800077,
+	0x8677ff77, 0xff7fffff,
+	0x8777ff77, 0x00058000,
+	0xd8ec0000, 0x00000002,
+	0xbf8cc07f, 0xe0765000,
+	0x701d0002, 0x68040702,
+	0xd0c9006a, 0x0000f702,
+	0xbf87fff7, 0xbef70000,
+	0xbef000ff, 0x00000400,
+	0xbefe00c1, 0xbeff00c1,
+	0xb8fb2b05, 0x807b817b,
+	0x8e7b827b, 0xbef600ff,
+	0x01000000, 0xbefc0084,
+	0xbf0a7b7c, 0xbf84006d,
+	0xbf11017c, 0x807bff7b,
+	0x00001000, 0x867aff78,
+	0x00400000, 0xbf850003,
+	0xb8faf803, 0x897a7aff,
+	0x10000000, 0xbf850051,
+	0xbe840080, 0xd2890000,
+	0x00000900, 0x80048104,
+	0xd2890001, 0x00000900,
+	0x80048104, 0xd2890002,
+	0x00000900, 0x80048104,
+	0xd2890003, 0x00000900,
+	0x80048104, 0xc069003a,
+	0x00000070, 0xbf8cc07f,
+	0x80709070, 0xbf06c004,
+	0xbf84ffee, 0xbe840080,
+	0xd2890000, 0x00000901,
+	0x80048104, 0xd2890001,
+	0x00000901, 0x80048104,
+	0xd2890002, 0x00000901,
+	0x80048104, 0xd2890003,
+	0x00000901, 0x80048104,
+	0xc069003a, 0x00000070,
+	0xbf8cc07f, 0x80709070,
+	0xbf06c004, 0xbf84ffee,
+	0xbe840080, 0xd2890000,
+	0x00000902, 0x80048104,
+	0xd2890001, 0x00000902,
+	0x80048104, 0xd2890002,
+	0x00000902, 0x80048104,
+	0xd2890003, 0x00000902,
+	0x80048104, 0xc069003a,
+	0x00000070, 0xbf8cc07f,
+	0x80709070, 0xbf06c004,
+	0xbf84ffee, 0xbe840080,
+	0xd2890000, 0x00000903,
+	0x80048104, 0xd2890001,
+	0x00000903, 0x80048104,
+	0xd2890002, 0x00000903,
+	0x80048104, 0xd2890003,
+	0x00000903, 0x80048104,
+	0xc069003a, 0x00000070,
+	0xbf8cc07f, 0x80709070,
+	0xbf06c004, 0xbf84ffee,
+	0x807c847c, 0xbf0a7b7c,
+	0xbf85ffb1, 0xbf9c0000,
+	0xbf820012, 0x7e000300,
+	0x7e020301, 0x7e040302,
+	0x7e060303, 0xe0724000,
+	0x701d0000, 0xe0724100,
+	0x701d0100, 0xe0724200,
+	0x701d0200, 0xe0724300,
+	0x701d0300, 0x807c847c,
+	0x8070ff70, 0x00000400,
+	0xbf0a7b7c, 0xbf85ffef,
+	0xbf9c0000, 0xb8fb2985,
+	0x807b817b, 0x8e7b837b,
+	0xb8fa2b05, 0x807a817a,
+	0x8e7a827a, 0x80fb7a7b,
+	0x867b7b7b, 0xbf84007a,
+	0x807bff7b, 0x00001000,
+	0xbefc0080, 0xbf11017c,
+	0x867aff78, 0x00400000,
+	0xbf850003, 0xb8faf803,
+	0x897a7aff, 0x10000000,
+	0xbf850059, 0xd3d84000,
+	0x18000100, 0xd3d84001,
+	0x18000101, 0xd3d84002,
+	0x18000102, 0xd3d84003,
+	0x18000103, 0xbe840080,
+	0xd2890000, 0x00000900,
+	0x80048104, 0xd2890001,
+	0x00000900, 0x80048104,
+	0xd2890002, 0x00000900,
+	0x80048104, 0xd2890003,
+	0x00000900, 0x80048104,
+	0xc069003a, 0x00000070,
+	0xbf8cc07f, 0x80709070,
+	0xbf06c004, 0xbf84ffee,
+	0xbe840080, 0xd2890000,
+	0x00000901, 0x80048104,
+	0xd2890001, 0x00000901,
+	0x80048104, 0xd2890002,
+	0x00000901, 0x80048104,
+	0xd2890003, 0x00000901,
+	0x80048104, 0xc069003a,
+	0x00000070, 0xbf8cc07f,
+	0x80709070, 0xbf06c004,
+	0xbf84ffee, 0xbe840080,
+	0xd2890000, 0x00000902,
+	0x80048104, 0xd2890001,
+	0x00000902, 0x80048104,
+	0xd2890002, 0x00000902,
+	0x80048104, 0xd2890003,
+	0x00000902, 0x80048104,
+	0xc069003a, 0x00000070,
+	0xbf8cc07f, 0x80709070,
+	0xbf06c004, 0xbf84ffee,
+	0xbe840080, 0xd2890000,
+	0x00000903, 0x80048104,
+	0xd2890001, 0x00000903,
+	0x80048104, 0xd2890002,
+	0x00000903, 0x80048104,
+	0xd2890003, 0x00000903,
+	0x80048104, 0xc069003a,
+	0x00000070, 0xbf8cc07f,
+	0x80709070, 0xbf06c004,
+	0xbf84ffee, 0x807c847c,
+	0xbf0a7b7c, 0xbf85ffa9,
+	0xbf9c0000, 0xbf820016,
+	0xd3d84000, 0x18000100,
+	0xd3d84001, 0x18000101,
+	0xd3d84002, 0x18000102,
+	0xd3d84003, 0x18000103,
+	0xe0724000, 0x701d0000,
+	0xe0724100, 0x701d0100,
+	0xe0724200, 0x701d0200,
+	0xe0724300, 0x701d0300,
+	0x807c847c, 0x8070ff70,
+	0x00000400, 0xbf0a7b7c,
+	0xbf85ffeb, 0xbf9c0000,
+	0xbf8200ee, 0xbef4007e,
+	0x8675ff7f, 0x0000ffff,
+	0x8775ff75, 0x00040000,
+	0xbef60080, 0xbef700ff,
+	0x00807fac, 0x866eff7f,
+	0x04000000, 0xbf84001f,
+	0xbefe00c1, 0xbeff00c1,
+	0xb8ef4306, 0x866fc16f,
+	0xbf84001a, 0x8e6f866f,
+	0x8e6f826f, 0xbef6006f,
+	0xb8f82985, 0x80788178,
+	0x8e788a78, 0x8e788178,
+	0xb8ee1605, 0x806e816e,
+	0x8e6e866e, 0x80786e78,
+	0x8078ff78, 0x00000080,
+	0xbef600ff, 0x01000000,
+	0xbefc0080, 0xe0510000,
+	0x781d0000, 0xe0510100,
+	0x781d0000, 0x807cff7c,
+	0x00000200, 0x8078ff78,
+	0x00000200, 0xbf0a6f7c,
+	0xbf85fff6, 0xbefe00c1,
+	0xbeff00c1, 0xbef600ff,
+	0x01000000, 0xb8ef2b05,
+	0x806f816f, 0x8e6f826f,
+	0x806fff6f, 0x00008000,
+	0xbef80080, 0xbeee0078,
+	0x8078ff78, 0x00000400,
+	0xbefc0084, 0xbf11087c,
+	0xe0524000, 0x781d0000,
+	0xe0524100, 0x781d0100,
+	0xe0524200, 0x781d0200,
+	0xe0524300, 0x781d0300,
+	0xbf8c0f70, 0x7e000300,
+	0x7e020301, 0x7e040302,
+	0x7e060303, 0x807c847c,
+	0x8078ff78, 0x00000400,
+	0xbf0a6f7c, 0xbf85ffee,
+	0xb8ef2985, 0x806f816f,
+	0x8e6f836f, 0xb8f92b05,
+	0x80798179, 0x8e798279,
+	0x80ef796f, 0x866f6f6f,
+	0xbf84001a, 0x806fff6f,
+	0x00008000, 0xbefc0080,
+	0xbf11087c, 0xe0524000,
+	0x781d0000, 0xe0524100,
+	0x781d0100, 0xe0524200,
+	0x781d0200, 0xe0524300,
+	0x781d0300, 0xbf8c0f70,
+	0xd3d94000, 0x18000100,
+	0xd3d94001, 0x18000101,
+	0xd3d94002, 0x18000102,
+	0xd3d94003, 0x18000103,
+	0x807c847c, 0x8078ff78,
+	0x00000400, 0xbf0a6f7c,
+	0xbf85ffea, 0xbf9c0000,
+	0xe0524000, 0x6e1d0000,
+	0xe0524100, 0x6e1d0100,
+	0xe0524200, 0x6e1d0200,
+	0xe0524300, 0x6e1d0300,
+	0xbf8c0f70, 0xb8f82985,
+	0x80788178, 0x8e788a78,
+	0x8e788178, 0xb8ee1605,
+	0x806e816e, 0x8e6e866e,
+	0x80786e78, 0x80f8c078,
+	0xb8ef1605, 0x806f816f,
+	0x8e6f846f, 0x8e76826f,
+	0xbef600ff, 0x01000000,
+	0xbefc006f, 0xc031003a,
+	0x00000078, 0x80f8c078,
+	0xbf8cc07f, 0x80fc907c,
+	0xbf800000, 0xbe802d00,
+	0xbe822d02, 0xbe842d04,
+	0xbe862d06, 0xbe882d08,
+	0xbe8a2d0a, 0xbe8c2d0c,
+	0xbe8e2d0e, 0xbf06807c,
+	0xbf84fff0, 0xb8f82985,
+	0x80788178, 0x8e788a78,
+	0x8e788178, 0xb8ee1605,
+	0x806e816e, 0x8e6e866e,
+	0x80786e78, 0xbef60084,
+	0xbef600ff, 0x01000000,
+	0xc0211bfa, 0x00000078,
+	0x80788478, 0xc0211b3a,
+	0x00000078, 0x80788478,
+	0xc0211b7a, 0x00000078,
+	0x80788478, 0xc0211c3a,
+	0x00000078, 0x80788478,
+	0xc0211c7a, 0x00000078,
+	0x80788478, 0xc0211eba,
+	0x00000078, 0x80788478,
+	0xc0211efa, 0x00000078,
+	0x80788478, 0xc0211a3a,
+	0x00000078, 0x80788478,
+	0xc0211a7a, 0x00000078,
+	0x80788478, 0xc0211cfa,
+	0x00000078, 0x80788478,
+	0xbf8cc07f, 0xbefc006f,
+	0xbefe0070, 0xbeff0071,
+	0x866f7bff, 0x000003ff,
+	0xb96f4803, 0x866f7bff,
+	0xfffff800, 0x8f6f8b6f,
+	0xb96fa2c3, 0xb973f801,
+	0xb8ee2985, 0x806e816e,
+	0x8e6e8a6e, 0x8e6e816e,
+	0xb8ef1605, 0x806f816f,
+	0x8e6f866f, 0x806e6f6e,
+	0x806e746e, 0x826f8075,
+	0x866fff6f, 0x0000ffff,
+	0xc00b1c37, 0x00000050,
+	0xc00b1d37, 0x00000060,
+	0xc0031e77, 0x00000074,
+	0xbf8cc07f, 0x8f6e8b77,
+	0x866eff6e, 0x001f8000,
+	0xb96ef807, 0x866dff6d,
+	0x0000ffff, 0x86fe7e7e,
+	0x86ea6a6a, 0x8f6e837a,
+	0xb96ee0c2, 0xbf800002,
+	0xb97a0002, 0xbf8a0000,
+	0xbe801f6c, 0xbf810000,
+};
+
+static const uint32_t cwsr_trap_gfx10_hex[] = {
+	0xbf820001, 0xbf820220,
+	0xb0804004, 0xb978f802,
+	0x8a78ff78, 0x00020006,
+	0xb97bf803, 0x876eff78,
+	0x00002000, 0xbf840009,
+	0x876eff6d, 0x00ff0000,
+	0xbf85001e, 0x876eff7b,
+	0x00000400, 0xbf850045,
+	0xbf8e0010, 0xb97bf803,
+	0xbf82fffa, 0x876eff7b,
+	0x00000900, 0xbf850015,
+	0x876eff7b, 0x000071ff,
+	0xbf840008, 0x876fff7b,
+	0x00007080, 0xbf840001,
+	0xbeee1d87, 0xb96ff801,
+	0x8f6e8c6e, 0x876e6f6e,
+	0xbf85000a, 0x876eff6d,
+	0x00ff0000, 0xbf850007,
+	0xb96ef801, 0x876eff6e,
+	0x00000800, 0xbf850003,
+	0x876eff7b, 0x00000400,
+	0xbf85002a, 0xb97af812,
+	0xb97bf813, 0x8ffa887a,
+	0xbf0d8f7b, 0xbf840002,
+	0x887bff7b, 0xffff0000,
+	0xf4011bbd, 0xfa000010,
+	0xbf8cc07f, 0x8f6e976e,
+	0x8a77ff77, 0x00800000,
+	0x88776e77, 0xf4051bbd,
+	0xfa000000, 0xbf8cc07f,
+	0xf4051ebd, 0xfa000008,
+	0xbf8cc07f, 0x87ee6e6e,
+	0xbf840001, 0xbe80206e,
+	0x876eff6d, 0x01ff0000,
+	0xbf850005, 0x8878ff78,
+	0x00002000, 0x80ec886c,
+	0x82ed806d, 0xbf820005,
+	0x876eff6d, 0x01000000,
+	0xbf850002, 0x806c846c,
+	0x826d806d, 0x876dff6d,
+	0x0000ffff, 0x87fe7e7e,
+	0x87ea6a6a, 0xb9f8f802,
+	0xbe80226c, 0x876dff6d,
+	0x0000ffff, 0xbefa0380,
+	0xb9fa0283, 0xbeee037e,
+	0xbeef037f, 0xbefe0480,
+	0xbf900004, 0xbf8cc07f,
+	0x877aff7f, 0x04000000,
+	0x8f7a857a, 0x886d7a6d,
+	0xbefa037e, 0x877bff7f,
+	0x0000ffff, 0xbefe03c1,
+	0xbeff03c1, 0xdc5f8000,
+	0x007a0000, 0x7e000280,
+	0xbefe037a, 0xbeff037b,
+	0xb97b02dc, 0x8f7b997b,
+	0xb97a3a05, 0x807a817a,
+	0xbf0d997b, 0xbf850002,
+	0x8f7a897a, 0xbf820001,
+	0x8f7a8a7a, 0xb97b1e06,
+	0x8f7b8a7b, 0x807a7b7a,
+	0x877bff7f, 0x0000ffff,
+	0x807aff7a, 0x00000200,
+	0x807a7e7a, 0x827b807b,
+	0xd7610000, 0x00010870,
+	0xd7610000, 0x00010a71,
+	0xd7610000, 0x00010c72,
+	0xd7610000, 0x00010e73,
+	0xd7610000, 0x00011074,
+	0xd7610000, 0x00011275,
+	0xd7610000, 0x00011476,
+	0xd7610000, 0x00011677,
+	0xd7610000, 0x00011a79,
+	0xd7610000, 0x00011c7e,
+	0xd7610000, 0x00011e7f,
+	0xbefe03ff, 0x00003fff,
+	0xbeff0380, 0xdc5f8040,
+	0x007a0000, 0xd760007a,
+	0x00011d00, 0xd760007b,
+	0x00011f00, 0xbefe037a,
+	0xbeff037b, 0xbef4037e,
+	0x8775ff7f, 0x0000ffff,
+	0x8875ff75, 0x00040000,
+	0xbef60380, 0xbef703ff,
+	0x10807fac, 0xbef1037c,
+	0xbef00380, 0xb97302dc,
+	0x8f739973, 0xbefe03c1,
+	0x907c9973, 0x877c817c,
+	0xbf06817c, 0xbf850002,
+	0xbeff0380, 0xbf820002,
+	0xbeff03c1, 0xbf820009,
+	0xbef603ff, 0x01000000,
+	0xe0704080, 0x705d0100,
+	0xe0704100, 0x705d0200,
+	0xe0704180, 0x705d0300,
+	0xbf820008, 0xbef603ff,
+	0x01000000, 0xe0704100,
+	0x705d0100, 0xe0704200,
+	0x705d0200, 0xe0704300,
+	0x705d0300, 0xb9703a05,
+	0x80708170, 0xbf0d9973,
+	0xbf850002, 0x8f708970,
+	0xbf820001, 0x8f708a70,
+	0xb97a1e06, 0x8f7a8a7a,
+	0x80707a70, 0x8070ff70,
+	0x00000200, 0xbef603ff,
+	0x01000000, 0x7e000280,
+	0x7e020280, 0x7e040280,
+	0xbefc0380, 0xd7610002,
+	0x0000f871, 0x807c817c,
+	0xd7610002, 0x0000f86c,
+	0x807c817c, 0x8a7aff6d,
+	0x80000000, 0xd7610002,
+	0x0000f87a, 0x807c817c,
+	0xd7610002, 0x0000f86e,
+	0x807c817c, 0xd7610002,
+	0x0000f86f, 0x807c817c,
+	0xd7610002, 0x0000f878,
+	0x807c817c, 0xb97af803,
+	0xd7610002, 0x0000f87a,
+	0x807c817c, 0xd7610002,
+	0x0000f87b, 0x807c817c,
+	0xb971f801, 0xd7610002,
+	0x0000f871, 0x807c817c,
+	0xb971f814, 0xd7610002,
+	0x0000f871, 0x807c817c,
+	0xb971f815, 0xd7610002,
+	0x0000f871, 0x807c817c,
+	0xbefe03ff, 0x0000ffff,
+	0xbeff0380, 0xe0704000,
+	0x705d0200, 0xbefe03c1,
+	0xb9703a05, 0x80708170,
+	0xbf0d9973, 0xbf850002,
+	0x8f708970, 0xbf820001,
+	0x8f708a70, 0xb97a1e06,
+	0x8f7a8a7a, 0x80707a70,
+	0xbef603ff, 0x01000000,
+	0xbef90380, 0xbefc0380,
+	0xbf800000, 0xbe802f00,
+	0xbe822f02, 0xbe842f04,
+	0xbe862f06, 0xbe882f08,
+	0xbe8a2f0a, 0xbe8c2f0c,
+	0xbe8e2f0e, 0xd7610002,
+	0x0000f200, 0x80798179,
+	0xd7610002, 0x0000f201,
+	0x80798179, 0xd7610002,
+	0x0000f202, 0x80798179,
+	0xd7610002, 0x0000f203,
+	0x80798179, 0xd7610002,
+	0x0000f204, 0x80798179,
+	0xd7610002, 0x0000f205,
+	0x80798179, 0xd7610002,
+	0x0000f206, 0x80798179,
+	0xd7610002, 0x0000f207,
+	0x80798179, 0xd7610002,
+	0x0000f208, 0x80798179,
+	0xd7610002, 0x0000f209,
+	0x80798179, 0xd7610002,
+	0x0000f20a, 0x80798179,
+	0xd7610002, 0x0000f20b,
+	0x80798179, 0xd7610002,
+	0x0000f20c, 0x80798179,
+	0xd7610002, 0x0000f20d,
+	0x80798179, 0xd7610002,
+	0x0000f20e, 0x80798179,
+	0xd7610002, 0x0000f20f,
+	0x80798179, 0xbf06a079,
+	0xbf840006, 0xe0704000,
+	0x705d0200, 0x8070ff70,
+	0x00000080, 0xbef90380,
+	0x7e040280, 0x807c907c,
+	0xbf0aff7c, 0x00000060,
+	0xbf85ffbc, 0xbe802f00,
+	0xbe822f02, 0xbe842f04,
+	0xbe862f06, 0xbe882f08,
+	0xbe8a2f0a, 0xd7610002,
+	0x0000f200, 0x80798179,
+	0xd7610002, 0x0000f201,
+	0x80798179, 0xd7610002,
+	0x0000f202, 0x80798179,
+	0xd7610002, 0x0000f203,
+	0x80798179, 0xd7610002,
+	0x0000f204, 0x80798179,
+	0xd7610002, 0x0000f205,
+	0x80798179, 0xd7610002,
+	0x0000f206, 0x80798179,
+	0xd7610002, 0x0000f207,
+	0x80798179, 0xd7610002,
+	0x0000f208, 0x80798179,
+	0xd7610002, 0x0000f209,
+	0x80798179, 0xd7610002,
+	0x0000f20a, 0x80798179,
+	0xd7610002, 0x0000f20b,
+	0x80798179, 0xe0704000,
+	0x705d0200, 0xbefe03c1,
+	0x907c9973, 0x877c817c,
+	0xbf06817c, 0xbf850002,
+	0xbeff0380, 0xbf820001,
+	0xbeff03c1, 0xb97b4306,
+	0x877bc17b, 0xbf840044,
+	0xbf8a0000, 0x877aff6d,
+	0x80000000, 0xbf840040,
+	0x8f7b867b, 0x8f7b827b,
+	0xbef6037b, 0xb9703a05,
+	0x80708170, 0xbf0d9973,
+	0xbf850002, 0x8f708970,
+	0xbf820001, 0x8f708a70,
+	0xb97a1e06, 0x8f7a8a7a,
+	0x80707a70, 0x8070ff70,
+	0x00000200, 0x8070ff70,
+	0x00000080, 0xbef603ff,
+	0x01000000, 0xd7650000,
+	0x000100c1, 0xd7660000,
+	0x000200c1, 0x16000084,
+	0x907c9973, 0x877c817c,
+	0xbf06817c, 0xbefc0380,
+	0xbf850012, 0xbe8303ff,
+	0x00000080, 0xbf800000,
+	0xbf800000, 0xbf800000,
+	0xd8d80000, 0x01000000,
+	0xbf8c0000, 0xe0704000,
+	0x705d0100, 0x807c037c,
+	0x80700370, 0xd5250000,
+	0x0001ff00, 0x00000080,
+	0xbf0a7b7c, 0xbf85fff4,
+	0xbf820011, 0xbe8303ff,
+	0x00000100, 0xbf800000,
+	0xbf800000, 0xbf800000,
+	0xd8d80000, 0x01000000,
+	0xbf8c0000, 0xe0704000,
+	0x705d0100, 0x807c037c,
+	0x80700370, 0xd5250000,
+	0x0001ff00, 0x00000100,
+	0xbf0a7b7c, 0xbf85fff4,
+	0xbefe03c1, 0x907c9973,
+	0x877c817c, 0xbf06817c,
+	0xbf850004, 0xbef003ff,
+	0x00000200, 0xbeff0380,
+	0xbf820003, 0xbef003ff,
+	0x00000400, 0xbeff03c1,
+	0xb97b3a05, 0x807b817b,
+	0x8f7b827b, 0x907c9973,
+	0x877c817c, 0xbf06817c,
+	0xbf850017, 0xbef603ff,
+	0x01000000, 0xbefc0384,
+	0xbf0a7b7c, 0xbf840037,
+	0x7e008700, 0x7e028701,
+	0x7e048702, 0x7e068703,
+	0xe0704000, 0x705d0000,
+	0xe0704080, 0x705d0100,
+	0xe0704100, 0x705d0200,
+	0xe0704180, 0x705d0300,
+	0x807c847c, 0x8070ff70,
+	0x00000200, 0xbf0a7b7c,
+	0xbf85ffef, 0xbf820025,
+	0xbef603ff, 0x01000000,
+	0xbefc0384, 0xbf0a7b7c,
+	0xbf840011, 0x7e008700,
+	0x7e028701, 0x7e048702,
+	0x7e068703, 0xe0704000,
+	0x705d0000, 0xe0704100,
+	0x705d0100, 0xe0704200,
+	0x705d0200, 0xe0704300,
+	0x705d0300, 0x807c847c,
+	0x8070ff70, 0x00000400,
+	0xbf0a7b7c, 0xbf85ffef,
+	0xb97b1e06, 0x877bc17b,
+	0xbf84000c, 0x8f7b837b,
+	0x807b7c7b, 0xbefe03c1,
+	0xbeff0380, 0x7e008700,
+	0xe0704000, 0x705d0000,
+	0x807c817c, 0x8070ff70,
+	0x00000080, 0xbf0a7b7c,
+	0xbf85fff8, 0xbf82013b,
+	0xbef4037e, 0x8775ff7f,
+	0x0000ffff, 0x8875ff75,
+	0x00040000, 0xbef60380,
+	0xbef703ff, 0x10807fac,
+	0xb97202dc, 0x8f729972,
+	0x876eff7f, 0x04000000,
+	0xbf840034, 0xbefe03c1,
+	0x907c9972, 0x877c817c,
+	0xbf06817c, 0xbf850002,
+	0xbeff0380, 0xbf820001,
+	0xbeff03c1, 0xb96f4306,
+	0x876fc16f, 0xbf840029,
+	0x8f6f866f, 0x8f6f826f,
+	0xbef6036f, 0xb9783a05,
+	0x80788178, 0xbf0d9972,
+	0xbf850002, 0x8f788978,
+	0xbf820001, 0x8f788a78,
+	0xb96e1e06, 0x8f6e8a6e,
+	0x80786e78, 0x8078ff78,
+	0x00000200, 0x8078ff78,
+	0x00000080, 0xbef603ff,
+	0x01000000, 0x907c9972,
+	0x877c817c, 0xbf06817c,
+	0xbefc0380, 0xbf850009,
+	0xe0310000, 0x781d0000,
+	0x807cff7c, 0x00000080,
+	0x8078ff78, 0x00000080,
+	0xbf0a6f7c, 0xbf85fff8,
+	0xbf820008, 0xe0310000,
+	0x781d0000, 0x807cff7c,
+	0x00000100, 0x8078ff78,
+	0x00000100, 0xbf0a6f7c,
+	0xbf85fff8, 0xbef80380,
+	0xbefe03c1, 0x907c9972,
+	0x877c817c, 0xbf06817c,
+	0xbf850002, 0xbeff0380,
+	0xbf820001, 0xbeff03c1,
+	0xb96f3a05, 0x806f816f,
+	0x8f6f826f, 0x907c9972,
+	0x877c817c, 0xbf06817c,
+	0xbf850024, 0xbef603ff,
+	0x01000000, 0xbeee0378,
+	0x8078ff78, 0x00000200,
+	0xbefc0384, 0xbf0a6f7c,
+	0xbf840050, 0xe0304000,
+	0x785d0000, 0xe0304080,
+	0x785d0100, 0xe0304100,
+	0x785d0200, 0xe0304180,
+	0x785d0300, 0xbf8c3f70,
+	0x7e008500, 0x7e028501,
+	0x7e048502, 0x7e068503,
+	0x807c847c, 0x8078ff78,
+	0x00000200, 0xbf0a6f7c,
+	0xbf85ffee, 0xe0304000,
+	0x6e5d0000, 0xe0304080,
+	0x6e5d0100, 0xe0304100,
+	0x6e5d0200, 0xe0304180,
+	0x6e5d0300, 0xbf8c3f70,
+	0xbf820034, 0xbef603ff,
+	0x01000000, 0xbeee0378,
+	0x8078ff78, 0x00000400,
+	0xbefc0384, 0xbf0a6f7c,
+	0xbf840012, 0xe0304000,
+	0x785d0000, 0xe0304100,
+	0x785d0100, 0xe0304200,
+	0x785d0200, 0xe0304300,
+	0x785d0300, 0xbf8c3f70,
+	0x7e008500, 0x7e028501,
+	0x7e048502, 0x7e068503,
+	0x807c847c, 0x8078ff78,
+	0x00000400, 0xbf0a6f7c,
+	0xbf85ffee, 0xb96f1e06,
+	0x876fc16f, 0xbf84000e,
+	0x8f6f836f, 0x806f7c6f,
+	0xbefe03c1, 0xbeff0380,
+	0xe0304000, 0x785d0000,
+	0xbf8c3f70, 0x7e008500,
+	0x807c817c, 0x8078ff78,
+	0x00000080, 0xbf0a6f7c,
+	0xbf85fff7, 0xbeff03c1,
+	0xe0304000, 0x6e5d0000,
+	0xe0304100, 0x6e5d0100,
+	0xe0304200, 0x6e5d0200,
+	0xe0304300, 0x6e5d0300,
+	0xbf8c3f70, 0xb9783a05,
+	0x80788178, 0xbf0d9972,
+	0xbf850002, 0x8f788978,
+	0xbf820001, 0x8f788a78,
+	0xb96e1e06, 0x8f6e8a6e,
+	0x80786e78, 0x8078ff78,
+	0x00000200, 0x80f8ff78,
+	0x00000050, 0xbef603ff,
+	0x01000000, 0xbefc03ff,
+	0x0000006c, 0x80f89078,
+	0xf429003a, 0xf0000000,
+	0xbf8cc07f, 0x80fc847c,
+	0xbf800000, 0xbe803100,
+	0xbe823102, 0x80f8a078,
+	0xf42d003a, 0xf0000000,
+	0xbf8cc07f, 0x80fc887c,
+	0xbf800000, 0xbe803100,
+	0xbe823102, 0xbe843104,
+	0xbe863106, 0x80f8c078,
+	0xf431003a, 0xf0000000,
+	0xbf8cc07f, 0x80fc907c,
+	0xbf800000, 0xbe803100,
+	0xbe823102, 0xbe843104,
+	0xbe863106, 0xbe883108,
+	0xbe8a310a, 0xbe8c310c,
+	0xbe8e310e, 0xbf06807c,
+	0xbf84fff0, 0xba80f801,
+	0x00000000, 0xbf8a0000,
+	0xb9783a05, 0x80788178,
+	0xbf0d9972, 0xbf850002,
+	0x8f788978, 0xbf820001,
+	0x8f788a78, 0xb96e1e06,
+	0x8f6e8a6e, 0x80786e78,
+	0x8078ff78, 0x00000200,
+	0xbef603ff, 0x01000000,
+	0xf4211bfa, 0xf0000000,
+	0x80788478, 0xf4211b3a,
+	0xf0000000, 0x80788478,
+	0xf4211b7a, 0xf0000000,
+	0x80788478, 0xf4211c3a,
+	0xf0000000, 0x80788478,
+	0xf4211c7a, 0xf0000000,
+	0x80788478, 0xf4211eba,
+	0xf0000000, 0x80788478,
+	0xf4211efa, 0xf0000000,
+	0x80788478, 0xf4211e7a,
+	0xf0000000, 0x80788478,
+	0xf4211cfa, 0xf0000000,
+	0x80788478, 0xf4211bba,
+	0xf0000000, 0x80788478,
+	0xbf8cc07f, 0xb9eef814,
+	0xf4211bba, 0xf0000000,
+	0x80788478, 0xbf8cc07f,
+	0xb9eef815, 0xbefc036f,
+	0xbefe0370, 0xbeff0371,
+	0x876f7bff, 0x000003ff,
+	0xb9ef4803, 0x876f7bff,
+	0xfffff800, 0x906f8b6f,
+	0xb9efa2c3, 0xb9f3f801,
+	0xb96e3a05, 0x806e816e,
+	0xbf0d9972, 0xbf850002,
+	0x8f6e896e, 0xbf820001,
+	0x8f6e8a6e, 0xb96f1e06,
+	0x8f6f8a6f, 0x806e6f6e,
+	0x806eff6e, 0x00000200,
+	0x806e746e, 0x826f8075,
+	0x876fff6f, 0x0000ffff,
+	0xf4091c37, 0xfa000050,
+	0xf4091d37, 0xfa000060,
+	0xf4011e77, 0xfa000074,
+	0xbf8cc07f, 0x876dff6d,
+	0x0000ffff, 0x87fe7e7e,
+	0x87ea6a6a, 0xb9faf802,
+	0xbe80226c, 0xbf810000,
+	0xbf9f0000, 0xbf9f0000,
+	0xbf9f0000, 0xbf9f0000,
+	0xbf9f0000, 0x00000000,
+};
+
+static const uint32_t cwsr_trap_gfx11_hex[] = {
+	0xbfa00001, 0xbfa00225,
+	0xb0804006, 0xb8f8f802,
+	0x9178ff78, 0x00020006,
+	0xb8fbf803, 0xbf0d9e6d,
+	0xbfa10001, 0xbfbd0000,
+	0xbf0d9f6d, 0xbfa20006,
+	0x8b6eff78, 0x00002000,
+	0xbfa10009, 0x8b6eff6d,
+	0x00ff0000, 0xbfa2001e,
+	0x8b6eff7b, 0x00000400,
+	0xbfa20045, 0xbf830010,
+	0xb8fbf803, 0xbfa0fffa,
+	0x8b6eff7b, 0x00000900,
+	0xbfa20015, 0x8b6eff7b,
+	0x000071ff, 0xbfa10008,
+	0x8b6fff7b, 0x00007080,
+	0xbfa10001, 0xbeee1287,
+	0xb8eff801, 0x846e8c6e,
+	0x8b6e6f6e, 0xbfa2000a,
+	0x8b6eff6d, 0x00ff0000,
+	0xbfa20007, 0xb8eef801,
+	0x8b6eff6e, 0x00000800,
+	0xbfa20003, 0x8b6eff7b,
+	0x00000400, 0xbfa2002a,
+	0xbefa4d82, 0xbf89fc07,
+	0x84fa887a, 0xbf0d8f7b,
+	0xbfa10002, 0x8c7bff7b,
+	0xffff0000, 0xf4005bbd,
+	0xf8000010, 0xbf89fc07,
+	0x846e976e, 0x9177ff77,
+	0x00800000, 0x8c776e77,
+	0xf4045bbd, 0xf8000000,
+	0xbf89fc07, 0xf4045ebd,
+	0xf8000008, 0xbf89fc07,
+	0x8bee6e6e, 0xbfa10001,
+	0xbe80486e, 0x8b6eff6d,
+	0x01ff0000, 0xbfa20005,
+	0x8c78ff78, 0x00002000,
+	0x80ec886c, 0x82ed806d,
+	0xbfa00005, 0x8b6eff6d,
+	0x01000000, 0xbfa20002,
+	0x806c846c, 0x826d806d,
+	0x8b6dff6d, 0x0000ffff,
+	0x8bfe7e7e, 0x8bea6a6a,
+	0xb978f802, 0xbe804a6c,
+	0x8b6dff6d, 0x0000ffff,
+	0xbefa0080, 0xb97a0283,
+	0xbeee007e, 0xbeef007f,
+	0xbefe0180, 0xbefe4d84,
+	0xbf89fc07, 0x8b7aff7f,
+	0x04000000, 0x847a857a,
+	0x8c6d7a6d, 0xbefa007e,
+	0x8b7bff7f, 0x0000ffff,
+	0xbefe00c1, 0xbeff00c1,
+	0xdca6c000, 0x007a0000,
+	0x7e000280, 0xbefe007a,
+	0xbeff007b, 0xb8fb02dc,
+	0x847b997b, 0xb8fa3b05,
+	0x807a817a, 0xbf0d997b,
+	0xbfa20002, 0x847a897a,
+	0xbfa00001, 0x847a8a7a,
+	0xb8fb1e06, 0x847b8a7b,
+	0x807a7b7a, 0x8b7bff7f,
+	0x0000ffff, 0x807aff7a,
+	0x00000200, 0x807a7e7a,
+	0x827b807b, 0xd7610000,
+	0x00010870, 0xd7610000,
+	0x00010a71, 0xd7610000,
+	0x00010c72, 0xd7610000,
+	0x00010e73, 0xd7610000,
+	0x00011074, 0xd7610000,
+	0x00011275, 0xd7610000,
+	0x00011476, 0xd7610000,
+	0x00011677, 0xd7610000,
+	0x00011a79, 0xd7610000,
+	0x00011c7e, 0xd7610000,
+	0x00011e7f, 0xbefe00ff,
+	0x00003fff, 0xbeff0080,
+	0xdca6c040, 0x007a0000,
+	0xd760007a, 0x00011d00,
+	0xd760007b, 0x00011f00,
+	0xbefe007a, 0xbeff007b,
+	0xbef4007e, 0x8b75ff7f,
+	0x0000ffff, 0x8c75ff75,
+	0x00040000, 0xbef60080,
+	0xbef700ff, 0x10807fac,
+	0xbef1007d, 0xbef00080,
+	0xb8f302dc, 0x84739973,
+	0xbefe00c1, 0x857d9973,
+	0x8b7d817d, 0xbf06817d,
+	0xbfa20002, 0xbeff0080,
+	0xbfa00002, 0xbeff00c1,
+	0xbfa00009, 0xbef600ff,
+	0x01000000, 0xe0685080,
+	0x701d0100, 0xe0685100,
+	0x701d0200, 0xe0685180,
+	0x701d0300, 0xbfa00008,
+	0xbef600ff, 0x01000000,
+	0xe0685100, 0x701d0100,
+	0xe0685200, 0x701d0200,
+	0xe0685300, 0x701d0300,
+	0xb8f03b05, 0x80708170,
+	0xbf0d9973, 0xbfa20002,
+	0x84708970, 0xbfa00001,
+	0x84708a70, 0xb8fa1e06,
+	0x847a8a7a, 0x80707a70,
+	0x8070ff70, 0x00000200,
+	0xbef600ff, 0x01000000,
+	0x7e000280, 0x7e020280,
+	0x7e040280, 0xbefd0080,
+	0xd7610002, 0x0000fa71,
+	0x807d817d, 0xd7610002,
+	0x0000fa6c, 0x807d817d,
+	0x917aff6d, 0x80000000,
+	0xd7610002, 0x0000fa7a,
+	0x807d817d, 0xd7610002,
+	0x0000fa6e, 0x807d817d,
+	0xd7610002, 0x0000fa6f,
+	0x807d817d, 0xd7610002,
+	0x0000fa78, 0x807d817d,
+	0xb8faf803, 0xd7610002,
+	0x0000fa7a, 0x807d817d,
+	0xd7610002, 0x0000fa7b,
+	0x807d817d, 0xb8f1f801,
+	0xd7610002, 0x0000fa71,
+	0x807d817d, 0xb8f1f814,
+	0xd7610002, 0x0000fa71,
+	0x807d817d, 0xb8f1f815,
+	0xd7610002, 0x0000fa71,
+	0x807d817d, 0xbefe00ff,
+	0x0000ffff, 0xbeff0080,
+	0xe0685000, 0x701d0200,
+	0xbefe00c1, 0xb8f03b05,
+	0x80708170, 0xbf0d9973,
+	0xbfa20002, 0x84708970,
+	0xbfa00001, 0x84708a70,
+	0xb8fa1e06, 0x847a8a7a,
+	0x80707a70, 0xbef600ff,
+	0x01000000, 0xbef90080,
+	0xbefd0080, 0xbf800000,
+	0xbe804100, 0xbe824102,
+	0xbe844104, 0xbe864106,
+	0xbe884108, 0xbe8a410a,
+	0xbe8c410c, 0xbe8e410e,
+	0xd7610002, 0x0000f200,
+	0x80798179, 0xd7610002,
+	0x0000f201, 0x80798179,
+	0xd7610002, 0x0000f202,
+	0x80798179, 0xd7610002,
+	0x0000f203, 0x80798179,
+	0xd7610002, 0x0000f204,
+	0x80798179, 0xd7610002,
+	0x0000f205, 0x80798179,
+	0xd7610002, 0x0000f206,
+	0x80798179, 0xd7610002,
+	0x0000f207, 0x80798179,
+	0xd7610002, 0x0000f208,
+	0x80798179, 0xd7610002,
+	0x0000f209, 0x80798179,
+	0xd7610002, 0x0000f20a,
+	0x80798179, 0xd7610002,
+	0x0000f20b, 0x80798179,
+	0xd7610002, 0x0000f20c,
+	0x80798179, 0xd7610002,
+	0x0000f20d, 0x80798179,
+	0xd7610002, 0x0000f20e,
+	0x80798179, 0xd7610002,
+	0x0000f20f, 0x80798179,
+	0xbf06a079, 0xbfa10006,
+	0xe0685000, 0x701d0200,
+	0x8070ff70, 0x00000080,
+	0xbef90080, 0x7e040280,
+	0x807d907d, 0xbf0aff7d,
+	0x00000060, 0xbfa2ffbc,
+	0xbe804100, 0xbe824102,
+	0xbe844104, 0xbe864106,
+	0xbe884108, 0xbe8a410a,
+	0xd7610002, 0x0000f200,
+	0x80798179, 0xd7610002,
+	0x0000f201, 0x80798179,
+	0xd7610002, 0x0000f202,
+	0x80798179, 0xd7610002,
+	0x0000f203, 0x80798179,
+	0xd7610002, 0x0000f204,
+	0x80798179, 0xd7610002,
+	0x0000f205, 0x80798179,
+	0xd7610002, 0x0000f206,
+	0x80798179, 0xd7610002,
+	0x0000f207, 0x80798179,
+	0xd7610002, 0x0000f208,
+	0x80798179, 0xd7610002,
+	0x0000f209, 0x80798179,
+	0xd7610002, 0x0000f20a,
+	0x80798179, 0xd7610002,
+	0x0000f20b, 0x80798179,
+	0xe0685000, 0x701d0200,
+	0xbefe00c1, 0x857d9973,
+	0x8b7d817d, 0xbf06817d,
+	0xbfa20002, 0xbeff0080,
+	0xbfa00001, 0xbeff00c1,
+	0xb8fb4306, 0x8b7bc17b,
+	0xbfa10044, 0xbfbd0000,
+	0x8b7aff6d, 0x80000000,
+	0xbfa10040, 0x847b867b,
+	0x847b827b, 0xbef6007b,
+	0xb8f03b05, 0x80708170,
+	0xbf0d9973, 0xbfa20002,
+	0x84708970, 0xbfa00001,
+	0x84708a70, 0xb8fa1e06,
+	0x847a8a7a, 0x80707a70,
+	0x8070ff70, 0x00000200,
+	0x8070ff70, 0x00000080,
+	0xbef600ff, 0x01000000,
+	0xd71f0000, 0x000100c1,
+	0xd7200000, 0x000200c1,
+	0x16000084, 0x857d9973,
+	0x8b7d817d, 0xbf06817d,
+	0xbefd0080, 0xbfa20012,
+	0xbe8300ff, 0x00000080,
+	0xbf800000, 0xbf800000,
+	0xbf800000, 0xd8d80000,
+	0x01000000, 0xbf890000,
+	0xe0685000, 0x701d0100,
+	0x807d037d, 0x80700370,
+	0xd5250000, 0x0001ff00,
+	0x00000080, 0xbf0a7b7d,
+	0xbfa2fff4, 0xbfa00011,
+	0xbe8300ff, 0x00000100,
+	0xbf800000, 0xbf800000,
+	0xbf800000, 0xd8d80000,
+	0x01000000, 0xbf890000,
+	0xe0685000, 0x701d0100,
+	0x807d037d, 0x80700370,
+	0xd5250000, 0x0001ff00,
+	0x00000100, 0xbf0a7b7d,
+	0xbfa2fff4, 0xbefe00c1,
+	0x857d9973, 0x8b7d817d,
+	0xbf06817d, 0xbfa20004,
+	0xbef000ff, 0x00000200,
+	0xbeff0080, 0xbfa00003,
+	0xbef000ff, 0x00000400,
+	0xbeff00c1, 0xb8fb3b05,
+	0x807b817b, 0x847b827b,
+	0x857d9973, 0x8b7d817d,
+	0xbf06817d, 0xbfa20017,
+	0xbef600ff, 0x01000000,
+	0xbefd0084, 0xbf0a7b7d,
+	0xbfa10037, 0x7e008700,
+	0x7e028701, 0x7e048702,
+	0x7e068703, 0xe0685000,
+	0x701d0000, 0xe0685080,
+	0x701d0100, 0xe0685100,
+	0x701d0200, 0xe0685180,
+	0x701d0300, 0x807d847d,
+	0x8070ff70, 0x00000200,
+	0xbf0a7b7d, 0xbfa2ffef,
+	0xbfa00025, 0xbef600ff,
+	0x01000000, 0xbefd0084,
+	0xbf0a7b7d, 0xbfa10011,
+	0x7e008700, 0x7e028701,
+	0x7e048702, 0x7e068703,
+	0xe0685000, 0x701d0000,
+	0xe0685100, 0x701d0100,
+	0xe0685200, 0x701d0200,
+	0xe0685300, 0x701d0300,
+	0x807d847d, 0x8070ff70,
+	0x00000400, 0xbf0a7b7d,
+	0xbfa2ffef, 0xb8fb1e06,
+	0x8b7bc17b, 0xbfa1000c,
+	0x847b837b, 0x807b7d7b,
+	0xbefe00c1, 0xbeff0080,
+	0x7e008700, 0xe0685000,
+	0x701d0000, 0x807d817d,
+	0x8070ff70, 0x00000080,
+	0xbf0a7b7d, 0xbfa2fff8,
+	0xbfa00146, 0xbef4007e,
+	0x8b75ff7f, 0x0000ffff,
+	0x8c75ff75, 0x00040000,
+	0xbef60080, 0xbef700ff,
+	0x10807fac, 0xb8f202dc,
+	0x84729972, 0x8b6eff7f,
+	0x04000000, 0xbfa1003a,
+	0xbefe00c1, 0x857d9972,
+	0x8b7d817d, 0xbf06817d,
+	0xbfa20002, 0xbeff0080,
+	0xbfa00001, 0xbeff00c1,
+	0xb8ef4306, 0x8b6fc16f,
+	0xbfa1002f, 0x846f866f,
+	0x846f826f, 0xbef6006f,
+	0xb8f83b05, 0x80788178,
+	0xbf0d9972, 0xbfa20002,
+	0x84788978, 0xbfa00001,
+	0x84788a78, 0xb8ee1e06,
+	0x846e8a6e, 0x80786e78,
+	0x8078ff78, 0x00000200,
+	0x8078ff78, 0x00000080,
+	0xbef600ff, 0x01000000,
+	0x857d9972, 0x8b7d817d,
+	0xbf06817d, 0xbefd0080,
+	0xbfa2000c, 0xe0500000,
+	0x781d0000, 0xbf8903f7,
+	0xdac00000, 0x00000000,
+	0x807dff7d, 0x00000080,
+	0x8078ff78, 0x00000080,
+	0xbf0a6f7d, 0xbfa2fff5,
+	0xbfa0000b, 0xe0500000,
+	0x781d0000, 0xbf8903f7,
+	0xdac00000, 0x00000000,
+	0x807dff7d, 0x00000100,
+	0x8078ff78, 0x00000100,
+	0xbf0a6f7d, 0xbfa2fff5,
+	0xbef80080, 0xbefe00c1,
+	0x857d9972, 0x8b7d817d,
+	0xbf06817d, 0xbfa20002,
+	0xbeff0080, 0xbfa00001,
+	0xbeff00c1, 0xb8ef3b05,
+	0x806f816f, 0x846f826f,
+	0x857d9972, 0x8b7d817d,
+	0xbf06817d, 0xbfa20024,
+	0xbef600ff, 0x01000000,
+	0xbeee0078, 0x8078ff78,
+	0x00000200, 0xbefd0084,
+	0xbf0a6f7d, 0xbfa10050,
+	0xe0505000, 0x781d0000,
+	0xe0505080, 0x781d0100,
+	0xe0505100, 0x781d0200,
+	0xe0505180, 0x781d0300,
+	0xbf8903f7, 0x7e008500,
+	0x7e028501, 0x7e048502,
+	0x7e068503, 0x807d847d,
+	0x8078ff78, 0x00000200,
+	0xbf0a6f7d, 0xbfa2ffee,
+	0xe0505000, 0x6e1d0000,
+	0xe0505080, 0x6e1d0100,
+	0xe0505100, 0x6e1d0200,
+	0xe0505180, 0x6e1d0300,
+	0xbf8903f7, 0xbfa00034,
+	0xbef600ff, 0x01000000,
+	0xbeee0078, 0x8078ff78,
+	0x00000400, 0xbefd0084,
+	0xbf0a6f7d, 0xbfa10012,
+	0xe0505000, 0x781d0000,
+	0xe0505100, 0x781d0100,
+	0xe0505200, 0x781d0200,
+	0xe0505300, 0x781d0300,
+	0xbf8903f7, 0x7e008500,
+	0x7e028501, 0x7e048502,
+	0x7e068503, 0x807d847d,
+	0x8078ff78, 0x00000400,
+	0xbf0a6f7d, 0xbfa2ffee,
+	0xb8ef1e06, 0x8b6fc16f,
+	0xbfa1000e, 0x846f836f,
+	0x806f7d6f, 0xbefe00c1,
+	0xbeff0080, 0xe0505000,
+	0x781d0000, 0xbf8903f7,
+	0x7e008500, 0x807d817d,
+	0x8078ff78, 0x00000080,
+	0xbf0a6f7d, 0xbfa2fff7,
+	0xbeff00c1, 0xe0505000,
+	0x6e1d0000, 0xe0505100,
+	0x6e1d0100, 0xe0505200,
+	0x6e1d0200, 0xe0505300,
+	0x6e1d0300, 0xbf8903f7,
+	0xb8f83b05, 0x80788178,
+	0xbf0d9972, 0xbfa20002,
+	0x84788978, 0xbfa00001,
+	0x84788a78, 0xb8ee1e06,
+	0x846e8a6e, 0x80786e78,
+	0x8078ff78, 0x00000200,
+	0x80f8ff78, 0x00000050,
+	0xbef600ff, 0x01000000,
+	0xbefd00ff, 0x0000006c,
+	0x80f89078, 0xf428403a,
+	0xf0000000, 0xbf89fc07,
+	0x80fd847d, 0xbf800000,
+	0xbe804300, 0xbe824302,
+	0x80f8a078, 0xf42c403a,
+	0xf0000000, 0xbf89fc07,
+	0x80fd887d, 0xbf800000,
+	0xbe804300, 0xbe824302,
+	0xbe844304, 0xbe864306,
+	0x80f8c078, 0xf430403a,
+	0xf0000000, 0xbf89fc07,
+	0x80fd907d, 0xbf800000,
+	0xbe804300, 0xbe824302,
+	0xbe844304, 0xbe864306,
+	0xbe884308, 0xbe8a430a,
+	0xbe8c430c, 0xbe8e430e,
+	0xbf06807d, 0xbfa1fff0,
+	0xb980f801, 0x00000000,
+	0xbfbd0000, 0xb8f83b05,
+	0x80788178, 0xbf0d9972,
+	0xbfa20002, 0x84788978,
+	0xbfa00001, 0x84788a78,
+	0xb8ee1e06, 0x846e8a6e,
+	0x80786e78, 0x8078ff78,
+	0x00000200, 0xbef600ff,
+	0x01000000, 0xf4205bfa,
+	0xf0000000, 0x80788478,
+	0xf4205b3a, 0xf0000000,
+	0x80788478, 0xf4205b7a,
+	0xf0000000, 0x80788478,
+	0xf4205c3a, 0xf0000000,
+	0x80788478, 0xf4205c7a,
+	0xf0000000, 0x80788478,
+	0xf4205eba, 0xf0000000,
+	0x80788478, 0xf4205efa,
+	0xf0000000, 0x80788478,
+	0xf4205e7a, 0xf0000000,
+	0x80788478, 0xf4205cfa,
+	0xf0000000, 0x80788478,
+	0xf4205bba, 0xf0000000,
+	0x80788478, 0xbf89fc07,
+	0xb96ef814, 0xf4205bba,
+	0xf0000000, 0x80788478,
+	0xbf89fc07, 0xb96ef815,
+	0xbefd006f, 0xbefe0070,
+	0xbeff0071, 0x8b6f7bff,
+	0x000003ff, 0xb96f4803,
+	0x8b6f7bff, 0xfffff800,
+	0x856f8b6f, 0xb96fa2c3,
+	0xb973f801, 0xb8ee3b05,
+	0x806e816e, 0xbf0d9972,
+	0xbfa20002, 0x846e896e,
+	0xbfa00001, 0x846e8a6e,
+	0xb8ef1e06, 0x846f8a6f,
+	0x806e6f6e, 0x806eff6e,
+	0x00000200, 0x806e746e,
+	0x826f8075, 0x8b6fff6f,
+	0x0000ffff, 0xf4085c37,
+	0xf8000050, 0xf4085d37,
+	0xf8000060, 0xf4005e77,
+	0xf8000074, 0xbf89fc07,
+	0x8b6dff6d, 0x0000ffff,
+	0x8bfe7e7e, 0x8bea6a6a,
+	0xb8eef802, 0xbf0d866e,
+	0xbfa20002, 0xb97af802,
+	0xbe80486c, 0xb97af802,
+	0xbe804a6c, 0xbfb00000,
+	0xbf9f0000, 0xbf9f0000,
+	0xbf9f0000, 0xbf9f0000,
+	0xbf9f0000, 0x00000000,
+};
+
+static const uint32_t cwsr_trap_gfx9_4_3_hex[] = {
+	0xbf820001, 0xbf8202db,
+	0xb8f8f802, 0x8978ff78,
+	0x00020006, 0xb8fbf803,
+	0x866eff78, 0x00002000,
+	0xbf840009, 0x866eff6d,
+	0x00ff0000, 0xbf85001a,
+	0x866eff7b, 0x00000400,
+	0xbf850051, 0xbf8e0010,
+	0xb8fbf803, 0xbf82fffa,
+	0x866eff7b, 0x03c00900,
+	0xbf850011, 0x866eff7b,
+	0x000071ff, 0xbf840008,
+	0x866fff7b, 0x00007080,
+	0xbf840001, 0xbeee1a87,
+	0xb8eff801, 0x8e6e8c6e,
+	0x866e6f6e, 0xbf850006,
+	0x866eff6d, 0x00ff0000,
+	0xbf850003, 0x866eff7b,
+	0x00000400, 0xbf85003a,
+	0xb8faf807, 0x867aff7a,
+	0x001f8000, 0x8e7a8b7a,
+	0x8979ff79, 0xfc000000,
+	0x87797a79, 0xba7ff807,
+	0x00000000, 0xb8faf812,
+	0xb8fbf813, 0x8efa887a,
+	0xbf0d8f7b, 0xbf840002,
+	0x877bff7b, 0xffff0000,
+	0xc0031bbd, 0x00000010,
+	0xbf8cc07f, 0x8e6e976e,
+	0x8979ff79, 0x00800000,
+	0x87796e79, 0xc0071bbd,
+	0x00000000, 0xbf8cc07f,
+	0xc0071ebd, 0x00000008,
+	0xbf8cc07f, 0x86ee6e6e,
+	0xbf840001, 0xbe801d6e,
+	0x866eff6d, 0x01ff0000,
+	0xbf850005, 0x8778ff78,
+	0x00002000, 0x80ec886c,
+	0x82ed806d, 0xbf820005,
+	0x866eff6d, 0x01000000,
+	0xbf850002, 0x806c846c,
+	0x826d806d, 0x866dff6d,
+	0x0000ffff, 0x8f7a8b79,
+	0x867aff7a, 0x001f8000,
+	0xb97af807, 0x86fe7e7e,
+	0x86ea6a6a, 0x8f6e8378,
+	0xb96ee0c2, 0xbf800002,
+	0xb9780002, 0xbe801f6c,
+	0x866dff6d, 0x0000ffff,
+	0xbefa0080, 0xb97a0283,
+	0xb8faf807, 0x867aff7a,
+	0x001f8000, 0x8e7a8b7a,
+	0x8979ff79, 0xfc000000,
+	0x87797a79, 0xba7ff807,
+	0x00000000, 0xbeee007e,
+	0xbeef007f, 0xbefe0180,
+	0xbf900004, 0x877a8478,
+	0xb97af802, 0xbf8e0002,
+	0xbf88fffe, 0xb8fa2985,
+	0x807a817a, 0x8e7a8a7a,
+	0x8e7a817a, 0xb8fb1605,
+	0x807b817b, 0x8e7b867b,
+	0x807a7b7a, 0x807a7e7a,
+	0x827b807f, 0x867bff7b,
+	0x0000ffff, 0xc04b1c3d,
+	0x00000050, 0xbf8cc07f,
+	0xc04b1d3d, 0x00000060,
+	0xbf8cc07f, 0xc0431e7d,
+	0x00000074, 0xbf8cc07f,
+	0xbef4007e, 0x8675ff7f,
+	0x0000ffff, 0x8775ff75,
+	0x00040000, 0xbef60080,
+	0xbef700ff, 0x00807fac,
+	0xbef1007c, 0xbef00080,
+	0xb8f02985, 0x80708170,
+	0x8e708a70, 0x8e708170,
+	0xb8fa1605, 0x807a817a,
+	0x8e7a867a, 0x80707a70,
+	0xbef60084, 0xbef600ff,
+	0x01000000, 0xbefe007c,
+	0xbefc0070, 0xc0611c7a,
+	0x0000007c, 0xbf8cc07f,
+	0x80708470, 0xbefc007e,
+	0xbefe007c, 0xbefc0070,
+	0xc0611b3a, 0x0000007c,
+	0xbf8cc07f, 0x80708470,
+	0xbefc007e, 0xbefe007c,
+	0xbefc0070, 0xc0611b7a,
+	0x0000007c, 0xbf8cc07f,
+	0x80708470, 0xbefc007e,
+	0xbefe007c, 0xbefc0070,
+	0xc0611bba, 0x0000007c,
+	0xbf8cc07f, 0x80708470,
+	0xbefc007e, 0xbefe007c,
+	0xbefc0070, 0xc0611bfa,
+	0x0000007c, 0xbf8cc07f,
+	0x80708470, 0xbefc007e,
+	0xbefe007c, 0xbefc0070,
+	0xc0611e3a, 0x0000007c,
+	0xbf8cc07f, 0x80708470,
+	0xbefc007e, 0xb8fbf803,
+	0xbefe007c, 0xbefc0070,
+	0xc0611efa, 0x0000007c,
+	0xbf8cc07f, 0x80708470,
+	0xbefc007e, 0xbefe007c,
+	0xbefc0070, 0xc0611a3a,
+	0x0000007c, 0xbf8cc07f,
+	0x80708470, 0xbefc007e,
+	0xbefe007c, 0xbefc0070,
+	0xc0611a7a, 0x0000007c,
+	0xbf8cc07f, 0x80708470,
+	0xbefc007e, 0xb8f1f801,
+	0xbefe007c, 0xbefc0070,
+	0xc0611c7a, 0x0000007c,
+	0xbf8cc07f, 0x80708470,
+	0xbefc007e, 0x867aff7f,
+	0x04000000, 0xbeef0080,
+	0x876f6f7a, 0xb8f02985,
+	0x80708170, 0x8e708a70,
+	0x8e708170, 0xb8fb1605,
+	0x807b817b, 0x8e7b847b,
+	0x8e76827b, 0xbef600ff,
+	0x01000000, 0xbef20174,
+	0x80747074, 0x82758075,
+	0xbefc0080, 0xbf800000,
+	0xbe802b00, 0xbe822b02,
+	0xbe842b04, 0xbe862b06,
+	0xbe882b08, 0xbe8a2b0a,
+	0xbe8c2b0c, 0xbe8e2b0e,
+	0xc06b003a, 0x00000000,
+	0xbf8cc07f, 0xc06b013a,
+	0x00000010, 0xbf8cc07f,
+	0xc06b023a, 0x00000020,
+	0xbf8cc07f, 0xc06b033a,
+	0x00000030, 0xbf8cc07f,
+	0x8074c074, 0x82758075,
+	0x807c907c, 0xbf0a7b7c,
+	0xbf85ffe7, 0xbef40172,
+	0xbef00080, 0xbefe00c1,
+	0xbeff00c1, 0xbee80080,
+	0xbee90080, 0xbef600ff,
+	0x01000000, 0x867aff78,
+	0x00400000, 0xbf850003,
+	0xb8faf803, 0x897a7aff,
+	0x10000000, 0xbf85004d,
+	0xbe840080, 0xd2890000,
+	0x00000900, 0x80048104,
+	0xd2890001, 0x00000900,
+	0x80048104, 0xd2890002,
+	0x00000900, 0x80048104,
+	0xd2890003, 0x00000900,
+	0x80048104, 0xc069003a,
+	0x00000070, 0xbf8cc07f,
+	0x80709070, 0xbf06c004,
+	0xbf84ffee, 0xbe840080,
+	0xd2890000, 0x00000901,
+	0x80048104, 0xd2890001,
+	0x00000901, 0x80048104,
+	0xd2890002, 0x00000901,
+	0x80048104, 0xd2890003,
+	0x00000901, 0x80048104,
+	0xc069003a, 0x00000070,
+	0xbf8cc07f, 0x80709070,
+	0xbf06c004, 0xbf84ffee,
+	0xbe840080, 0xd2890000,
+	0x00000902, 0x80048104,
+	0xd2890001, 0x00000902,
+	0x80048104, 0xd2890002,
+	0x00000902, 0x80048104,
+	0xd2890003, 0x00000902,
+	0x80048104, 0xc069003a,
+	0x00000070, 0xbf8cc07f,
+	0x80709070, 0xbf06c004,
+	0xbf84ffee, 0xbe840080,
+	0xd2890000, 0x00000903,
+	0x80048104, 0xd2890001,
+	0x00000903, 0x80048104,
+	0xd2890002, 0x00000903,
+	0x80048104, 0xd2890003,
+	0x00000903, 0x80048104,
+	0xc069003a, 0x00000070,
+	0xbf8cc07f, 0x80709070,
+	0xbf06c004, 0xbf84ffee,
+	0xbf820008, 0xe0724000,
+	0x701d0000, 0xe0724100,
+	0x701d0100, 0xe0724200,
+	0x701d0200, 0xe0724300,
+	0x701d0300, 0xbefe00c1,
+	0xbeff00c1, 0xb8fb4306,
+	0x867bc17b, 0xbf840064,
+	0xbf8a0000, 0x867aff6f,
+	0x04000000, 0xbf840060,
+	0x8e7b867b, 0x8e7b827b,
+	0xbef6007b, 0xb8f02985,
+	0x80708170, 0x8e708a70,
+	0x8e708170, 0xb8fa1605,
+	0x807a817a, 0x8e7a867a,
+	0x80707a70, 0x8070ff70,
+	0x00000080, 0xbef600ff,
+	0x01000000, 0xbefc0080,
+	0xd28c0002, 0x000100c1,
+	0xd28d0003, 0x000204c1,
+	0x867aff78, 0x00400000,
+	0xbf850003, 0xb8faf803,
+	0x897a7aff, 0x10000000,
+	0xbf850030, 0x24040682,
+	0xd86e4000, 0x00000002,
+	0xbf8cc07f, 0xbe840080,
+	0xd2890000, 0x00000900,
+	0x80048104, 0xd2890001,
+	0x00000900, 0x80048104,
+	0xd2890002, 0x00000900,
+	0x80048104, 0xd2890003,
+	0x00000900, 0x80048104,
+	0xc069003a, 0x00000070,
+	0xbf8cc07f, 0x80709070,
+	0xbf06c004, 0xbf84ffee,
+	0xbe840080, 0xd2890000,
+	0x00000901, 0x80048104,
+	0xd2890001, 0x00000901,
+	0x80048104, 0xd2890002,
+	0x00000901, 0x80048104,
+	0xd2890003, 0x00000901,
+	0x80048104, 0xc069003a,
+	0x00000070, 0xbf8cc07f,
+	0x80709070, 0xbf06c004,
+	0xbf84ffee, 0x680404ff,
+	0x00000200, 0xd0c9006a,
+	0x0000f702, 0xbf87ffd2,
+	0xbf820015, 0xd1060002,
+	0x00011103, 0x7e0602ff,
+	0x00000200, 0xbefc00ff,
+	0x00010000, 0xbe800077,
+	0x8677ff77, 0xff7fffff,
+	0x8777ff77, 0x00058000,
+	0xd8ec0000, 0x00000002,
+	0xbf8cc07f, 0xe0765000,
+	0x701d0002, 0x68040702,
+	0xd0c9006a, 0x0000f702,
+	0xbf87fff7, 0xbef70000,
+	0xbef000ff, 0x00000400,
+	0xbefe00c1, 0xbeff00c1,
+	0xb8fb2b05, 0x807b817b,
+	0x8e7b827b, 0xbef600ff,
+	0x01000000, 0xbefc0084,
+	0xbf0a7b7c, 0xbf84006d,
+	0xbf11017c, 0x807bff7b,
+	0x00001000, 0x867aff78,
+	0x00400000, 0xbf850003,
+	0xb8faf803, 0x897a7aff,
+	0x10000000, 0xbf850051,
+	0xbe840080, 0xd2890000,
+	0x00000900, 0x80048104,
+	0xd2890001, 0x00000900,
+	0x80048104, 0xd2890002,
+	0x00000900, 0x80048104,
+	0xd2890003, 0x00000900,
+	0x80048104, 0xc069003a,
+	0x00000070, 0xbf8cc07f,
+	0x80709070, 0xbf06c004,
+	0xbf84ffee, 0xbe840080,
+	0xd2890000, 0x00000901,
+	0x80048104, 0xd2890001,
+	0x00000901, 0x80048104,
+	0xd2890002, 0x00000901,
+	0x80048104, 0xd2890003,
+	0x00000901, 0x80048104,
+	0xc069003a, 0x00000070,
+	0xbf8cc07f, 0x80709070,
+	0xbf06c004, 0xbf84ffee,
+	0xbe840080, 0xd2890000,
+	0x00000902, 0x80048104,
+	0xd2890001, 0x00000902,
+	0x80048104, 0xd2890002,
+	0x00000902, 0x80048104,
+	0xd2890003, 0x00000902,
+	0x80048104, 0xc069003a,
+	0x00000070, 0xbf8cc07f,
+	0x80709070, 0xbf06c004,
+	0xbf84ffee, 0xbe840080,
+	0xd2890000, 0x00000903,
+	0x80048104, 0xd2890001,
+	0x00000903, 0x80048104,
+	0xd2890002, 0x00000903,
+	0x80048104, 0xd2890003,
+	0x00000903, 0x80048104,
+	0xc069003a, 0x00000070,
+	0xbf8cc07f, 0x80709070,
+	0xbf06c004, 0xbf84ffee,
+	0x807c847c, 0xbf0a7b7c,
+	0xbf85ffb1, 0xbf9c0000,
+	0xbf820012, 0x7e000300,
+	0x7e020301, 0x7e040302,
+	0x7e060303, 0xe0724000,
+	0x701d0000, 0xe0724100,
+	0x701d0100, 0xe0724200,
+	0x701d0200, 0xe0724300,
+	0x701d0300, 0x807c847c,
+	0x8070ff70, 0x00000400,
+	0xbf0a7b7c, 0xbf85ffef,
+	0xbf9c0000, 0xb8fb2985,
+	0x807b817b, 0x8e7b837b,
+	0xb8fa2b05, 0x807a817a,
+	0x8e7a827a, 0x80fb7a7b,
+	0x867b7b7b, 0xbf84007a,
+	0x807bff7b, 0x00001000,
+	0xbefc0080, 0xbf11017c,
+	0x867aff78, 0x00400000,
+	0xbf850003, 0xb8faf803,
+	0x897a7aff, 0x10000000,
+	0xbf850059, 0xd3d84000,
+	0x18000100, 0xd3d84001,
+	0x18000101, 0xd3d84002,
+	0x18000102, 0xd3d84003,
+	0x18000103, 0xbe840080,
+	0xd2890000, 0x00000900,
+	0x80048104, 0xd2890001,
+	0x00000900, 0x80048104,
+	0xd2890002, 0x00000900,
+	0x80048104, 0xd2890003,
+	0x00000900, 0x80048104,
+	0xc069003a, 0x00000070,
+	0xbf8cc07f, 0x80709070,
+	0xbf06c004, 0xbf84ffee,
+	0xbe840080, 0xd2890000,
+	0x00000901, 0x80048104,
+	0xd2890001, 0x00000901,
+	0x80048104, 0xd2890002,
+	0x00000901, 0x80048104,
+	0xd2890003, 0x00000901,
+	0x80048104, 0xc069003a,
+	0x00000070, 0xbf8cc07f,
+	0x80709070, 0xbf06c004,
+	0xbf84ffee, 0xbe840080,
+	0xd2890000, 0x00000902,
+	0x80048104, 0xd2890001,
+	0x00000902, 0x80048104,
+	0xd2890002, 0x00000902,
+	0x80048104, 0xd2890003,
+	0x00000902, 0x80048104,
+	0xc069003a, 0x00000070,
+	0xbf8cc07f, 0x80709070,
+	0xbf06c004, 0xbf84ffee,
+	0xbe840080, 0xd2890000,
+	0x00000903, 0x80048104,
+	0xd2890001, 0x00000903,
+	0x80048104, 0xd2890002,
+	0x00000903, 0x80048104,
+	0xd2890003, 0x00000903,
+	0x80048104, 0xc069003a,
+	0x00000070, 0xbf8cc07f,
+	0x80709070, 0xbf06c004,
+	0xbf84ffee, 0x807c847c,
+	0xbf0a7b7c, 0xbf85ffa9,
+	0xbf9c0000, 0xbf820016,
+	0xd3d84000, 0x18000100,
+	0xd3d84001, 0x18000101,
+	0xd3d84002, 0x18000102,
+	0xd3d84003, 0x18000103,
+	0xe0724000, 0x701d0000,
+	0xe0724100, 0x701d0100,
+	0xe0724200, 0x701d0200,
+	0xe0724300, 0x701d0300,
+	0x807c847c, 0x8070ff70,
+	0x00000400, 0xbf0a7b7c,
+	0xbf85ffeb, 0xbf9c0000,
+	0xbf8200ee, 0xbef4007e,
+	0x8675ff7f, 0x0000ffff,
+	0x8775ff75, 0x00040000,
+	0xbef60080, 0xbef700ff,
+	0x00807fac, 0x866eff7f,
+	0x04000000, 0xbf84001f,
+	0xbefe00c1, 0xbeff00c1,
+	0xb8ef4306, 0x866fc16f,
+	0xbf84001a, 0x8e6f866f,
+	0x8e6f826f, 0xbef6006f,
+	0xb8f82985, 0x80788178,
+	0x8e788a78, 0x8e788178,
+	0xb8ee1605, 0x806e816e,
+	0x8e6e866e, 0x80786e78,
+	0x8078ff78, 0x00000080,
+	0xbef600ff, 0x01000000,
+	0xbefc0080, 0xe0510000,
+	0x781d0000, 0xe0510100,
+	0x781d0000, 0x807cff7c,
+	0x00000200, 0x8078ff78,
+	0x00000200, 0xbf0a6f7c,
+	0xbf85fff6, 0xbefe00c1,
+	0xbeff00c1, 0xbef600ff,
+	0x01000000, 0xb8ef2b05,
+	0x806f816f, 0x8e6f826f,
+	0x806fff6f, 0x00008000,
+	0xbef80080, 0xbeee0078,
+	0x8078ff78, 0x00000400,
+	0xbefc0084, 0xbf11087c,
+	0xe0524000, 0x781d0000,
+	0xe0524100, 0x781d0100,
+	0xe0524200, 0x781d0200,
+	0xe0524300, 0x781d0300,
+	0xbf8c0f70, 0x7e000300,
+	0x7e020301, 0x7e040302,
+	0x7e060303, 0x807c847c,
+	0x8078ff78, 0x00000400,
+	0xbf0a6f7c, 0xbf85ffee,
+	0xb8ef2985, 0x806f816f,
+	0x8e6f836f, 0xb8f92b05,
+	0x80798179, 0x8e798279,
+	0x80ef796f, 0x866f6f6f,
+	0xbf84001a, 0x806fff6f,
+	0x00008000, 0xbefc0080,
+	0xbf11087c, 0xe0524000,
+	0x781d0000, 0xe0524100,
+	0x781d0100, 0xe0524200,
+	0x781d0200, 0xe0524300,
+	0x781d0300, 0xbf8c0f70,
+	0xd3d94000, 0x18000100,
+	0xd3d94001, 0x18000101,
+	0xd3d94002, 0x18000102,
+	0xd3d94003, 0x18000103,
+	0x807c847c, 0x8078ff78,
+	0x00000400, 0xbf0a6f7c,
+	0xbf85ffea, 0xbf9c0000,
+	0xe0524000, 0x6e1d0000,
+	0xe0524100, 0x6e1d0100,
+	0xe0524200, 0x6e1d0200,
+	0xe0524300, 0x6e1d0300,
+	0xbf8c0f70, 0xb8f82985,
+	0x80788178, 0x8e788a78,
+	0x8e788178, 0xb8ee1605,
+	0x806e816e, 0x8e6e866e,
+	0x80786e78, 0x80f8c078,
+	0xb8ef1605, 0x806f816f,
+	0x8e6f846f, 0x8e76826f,
+	0xbef600ff, 0x01000000,
+	0xbefc006f, 0xc031003a,
+	0x00000078, 0x80f8c078,
+	0xbf8cc07f, 0x80fc907c,
+	0xbf800000, 0xbe802d00,
+	0xbe822d02, 0xbe842d04,
+	0xbe862d06, 0xbe882d08,
+	0xbe8a2d0a, 0xbe8c2d0c,
+	0xbe8e2d0e, 0xbf06807c,
+	0xbf84fff0, 0xb8f82985,
+	0x80788178, 0x8e788a78,
+	0x8e788178, 0xb8ee1605,
+	0x806e816e, 0x8e6e866e,
+	0x80786e78, 0xbef60084,
+	0xbef600ff, 0x01000000,
+	0xc0211bfa, 0x00000078,
+	0x80788478, 0xc0211b3a,
+	0x00000078, 0x80788478,
+	0xc0211b7a, 0x00000078,
+	0x80788478, 0xc0211c3a,
+	0x00000078, 0x80788478,
+	0xc0211c7a, 0x00000078,
+	0x80788478, 0xc0211eba,
+	0x00000078, 0x80788478,
+	0xc0211efa, 0x00000078,
+	0x80788478, 0xc0211a3a,
+	0x00000078, 0x80788478,
+	0xc0211a7a, 0x00000078,
+	0x80788478, 0xc0211cfa,
+	0x00000078, 0x80788478,
+	0xbf8cc07f, 0xbefc006f,
+	0xbefe0070, 0xbeff0071,
+	0x866f7bff, 0x000003ff,
+	0xb96f4803, 0x866f7bff,
+	0xfffff800, 0x8f6f8b6f,
+	0xb96fa2c3, 0xb973f801,
+	0xb8ee2985, 0x806e816e,
+	0x8e6e8a6e, 0x8e6e816e,
+	0xb8ef1605, 0x806f816f,
+	0x8e6f866f, 0x806e6f6e,
+	0x806e746e, 0x826f8075,
+	0x866fff6f, 0x0000ffff,
+	0xc00b1c37, 0x00000050,
+	0xc00b1d37, 0x00000060,
+	0xc0031e77, 0x00000074,
+	0xbf8cc07f, 0x8f6e8b79,
+	0x866eff6e, 0x001f8000,
+	0xb96ef807, 0x866dff6d,
+	0x0000ffff, 0x86fe7e7e,
+	0x86ea6a6a, 0x8f6e837a,
+	0xb96ee0c2, 0xbf800002,
+	0xb97a0002, 0xbf8a0000,
+	0xbe801f6c, 0xbf810000,
+};
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
new file mode 100644
index 000000000..fdab64624
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
@@ -0,0 +1,1231 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* To compile this assembly code:
+ *
+ * Navi1x:
+ *   cpp -DASIC_FAMILY=CHIP_NAVI10 cwsr_trap_handler_gfx10.asm -P -o nv1x.sp3
+ *   sp3 nv1x.sp3 -hex nv1x.hex
+ *
+ * gfx10:
+ *   cpp -DASIC_FAMILY=CHIP_SIENNA_CICHLID cwsr_trap_handler_gfx10.asm -P -o gfx10.sp3
+ *   sp3 gfx10.sp3 -hex gfx10.hex
+ *
+ * gfx11:
+ *   cpp -DASIC_FAMILY=CHIP_PLUM_BONITO cwsr_trap_handler_gfx10.asm -P -o gfx11.sp3
+ *   sp3 gfx11.sp3 -hex gfx11.hex
+ */
+
+#define CHIP_NAVI10 26
+#define CHIP_SIENNA_CICHLID 30
+#define CHIP_PLUM_BONITO 36
+
+#define NO_SQC_STORE (ASIC_FAMILY >= CHIP_SIENNA_CICHLID)
+#define HAVE_XNACK (ASIC_FAMILY < CHIP_SIENNA_CICHLID)
+#define HAVE_SENDMSG_RTN (ASIC_FAMILY >= CHIP_PLUM_BONITO)
+#define HAVE_BUFFER_LDS_LOAD (ASIC_FAMILY < CHIP_PLUM_BONITO)
+#define SW_SA_TRAP (ASIC_FAMILY >= CHIP_PLUM_BONITO)
+
+var SINGLE_STEP_MISSED_WORKAROUND		= 1	//workaround for lost MODE.DEBUG_EN exception when SAVECTX raised
+
+var SQ_WAVE_STATUS_SPI_PRIO_MASK		= 0x00000006
+var SQ_WAVE_STATUS_HALT_MASK			= 0x2000
+var SQ_WAVE_STATUS_ECC_ERR_MASK			= 0x20000
+var SQ_WAVE_STATUS_TRAP_EN_SHIFT		= 6
+
+var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT		= 12
+var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE		= 9
+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE		= 8
+var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT	= 24
+var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE	= 4
+var SQ_WAVE_IB_STS2_WAVE64_SHIFT		= 11
+var SQ_WAVE_IB_STS2_WAVE64_SIZE			= 1
+
+#if ASIC_FAMILY < CHIP_PLUM_BONITO
+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT		= 8
+#else
+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT		= 12
+#endif
+
+var SQ_WAVE_TRAPSTS_SAVECTX_MASK		= 0x400
+var SQ_WAVE_TRAPSTS_EXCP_MASK			= 0x1FF
+var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT		= 10
+var SQ_WAVE_TRAPSTS_ADDR_WATCH_MASK		= 0x80
+var SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT		= 7
+var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK		= 0x100
+var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT		= 8
+var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK		= 0x3FF
+var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT		= 0x0
+var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE		= 10
+var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK		= 0xFFFFF800
+var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT		= 11
+var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE		= 21
+var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK		= 0x800
+var SQ_WAVE_TRAPSTS_EXCP_HI_MASK		= 0x7000
+
+var SQ_WAVE_MODE_EXCP_EN_SHIFT			= 12
+var SQ_WAVE_MODE_EXCP_EN_ADDR_WATCH_SHIFT	= 19
+
+var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT		= 15
+var SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT		= 25
+var SQ_WAVE_IB_STS_REPLAY_W64H_MASK		= 0x02000000
+var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK	= 0x003F8000
+
+var SQ_WAVE_MODE_DEBUG_EN_MASK			= 0x800
+
+// bits [31:24] unused by SPI debug data
+var TTMP11_SAVE_REPLAY_W64H_SHIFT		= 31
+var TTMP11_SAVE_REPLAY_W64H_MASK		= 0x80000000
+var TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT		= 24
+var TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK		= 0x7F000000
+var TTMP11_DEBUG_TRAP_ENABLED_SHIFT		= 23
+var TTMP11_DEBUG_TRAP_ENABLED_MASK		= 0x800000
+
+// SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14]
+// when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE
+var S_SAVE_BUF_RSRC_WORD1_STRIDE		= 0x00040000
+var S_SAVE_BUF_RSRC_WORD3_MISC			= 0x10807FAC
+var S_SAVE_PC_HI_TRAP_ID_MASK			= 0x00FF0000
+var S_SAVE_PC_HI_HT_MASK			= 0x01000000
+var S_SAVE_SPI_INIT_FIRST_WAVE_MASK		= 0x04000000
+var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT		= 26
+
+var S_SAVE_PC_HI_FIRST_WAVE_MASK		= 0x80000000
+var S_SAVE_PC_HI_FIRST_WAVE_SHIFT		= 31
+
+var s_sgpr_save_num				= 108
+
+var s_save_spi_init_lo				= exec_lo
+var s_save_spi_init_hi				= exec_hi
+var s_save_pc_lo				= ttmp0
+var s_save_pc_hi				= ttmp1
+var s_save_exec_lo				= ttmp2
+var s_save_exec_hi				= ttmp3
+var s_save_status				= ttmp12
+var s_save_trapsts				= ttmp15
+var s_save_xnack_mask				= s_save_trapsts
+var s_wave_size					= ttmp7
+var s_save_buf_rsrc0				= ttmp8
+var s_save_buf_rsrc1				= ttmp9
+var s_save_buf_rsrc2				= ttmp10
+var s_save_buf_rsrc3				= ttmp11
+var s_save_mem_offset				= ttmp4
+var s_save_alloc_size				= s_save_trapsts
+var s_save_tmp					= ttmp14
+var s_save_m0					= ttmp5
+var s_save_ttmps_lo				= s_save_tmp
+var s_save_ttmps_hi				= s_save_trapsts
+
+var S_RESTORE_BUF_RSRC_WORD1_STRIDE		= S_SAVE_BUF_RSRC_WORD1_STRIDE
+var S_RESTORE_BUF_RSRC_WORD3_MISC		= S_SAVE_BUF_RSRC_WORD3_MISC
+
+var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK		= 0x04000000
+var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT		= 26
+var S_WAVE_SIZE					= 25
+
+var s_restore_spi_init_lo			= exec_lo
+var s_restore_spi_init_hi			= exec_hi
+var s_restore_mem_offset			= ttmp12
+var s_restore_alloc_size			= ttmp3
+var s_restore_tmp				= ttmp2
+var s_restore_mem_offset_save			= s_restore_tmp
+var s_restore_m0				= s_restore_alloc_size
+var s_restore_mode				= ttmp7
+var s_restore_flat_scratch			= s_restore_tmp
+var s_restore_pc_lo				= ttmp0
+var s_restore_pc_hi				= ttmp1
+var s_restore_exec_lo				= ttmp4
+var s_restore_exec_hi				= ttmp5
+var s_restore_status				= ttmp14
+var s_restore_trapsts				= ttmp15
+var s_restore_xnack_mask			= ttmp13
+var s_restore_buf_rsrc0				= ttmp8
+var s_restore_buf_rsrc1				= ttmp9
+var s_restore_buf_rsrc2				= ttmp10
+var s_restore_buf_rsrc3				= ttmp11
+var s_restore_size				= ttmp6
+var s_restore_ttmps_lo				= s_restore_tmp
+var s_restore_ttmps_hi				= s_restore_alloc_size
+
+shader main
+	asic(DEFAULT)
+	type(CS)
+	wave_size(32)
+
+	s_branch	L_SKIP_RESTORE						//NOT restore. might be a regular trap or save
+
+L_JUMP_TO_RESTORE:
+	s_branch	L_RESTORE
+
+L_SKIP_RESTORE:
+	s_getreg_b32	s_save_status, hwreg(HW_REG_STATUS)			//save STATUS since we will change SCC
+
+	// Clear SPI_PRIO: do not save with elevated priority.
+	// Clear ECC_ERR: prevents SQC store and triggers FATAL_HALT if setreg'd.
+	s_andn2_b32	s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK|SQ_WAVE_STATUS_ECC_ERR_MASK
+
+	s_getreg_b32	s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+
+#if SW_SA_TRAP
+	// If ttmp1[30] is set then issue s_barrier to unblock dependent waves.
+	s_bitcmp1_b32	s_save_pc_hi, 30
+	s_cbranch_scc0	L_TRAP_NO_BARRIER
+	s_barrier
+
+L_TRAP_NO_BARRIER:
+	// If ttmp1[31] is set then trap may occur early.
+	// Spin wait until SAVECTX exception is raised.
+	s_bitcmp1_b32	s_save_pc_hi, 31
+	s_cbranch_scc1  L_CHECK_SAVE
+#endif
+
+	s_and_b32       ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
+	s_cbranch_scc0	L_NOT_HALTED
+
+L_HALTED:
+	// Host trap may occur while wave is halted.
+	s_and_b32	ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
+	s_cbranch_scc1	L_FETCH_2ND_TRAP
+
+L_CHECK_SAVE:
+	s_and_b32	ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK
+	s_cbranch_scc1	L_SAVE
+
+	// Wave is halted but neither host trap nor SAVECTX is raised.
+	// Caused by instruction fetch memory violation.
+	// Spin wait until context saved to prevent interrupt storm.
+	s_sleep		0x10
+	s_getreg_b32	s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+	s_branch	L_CHECK_SAVE
+
+L_NOT_HALTED:
+	// Let second-level handle non-SAVECTX exception or trap.
+	// Any concurrent SAVECTX will be handled upon re-entry once halted.
+
+	// Check non-maskable exceptions. memory_violation, illegal_instruction
+	// and xnack_error exceptions always cause the wave to enter the trap
+	// handler.
+	s_and_b32	ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK|SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK
+	s_cbranch_scc1	L_FETCH_2ND_TRAP
+
+	// Check for maskable exceptions in trapsts.excp and trapsts.excp_hi.
+	// Maskable exceptions only cause the wave to enter the trap handler if
+	// their respective bit in mode.excp_en is set.
+	s_and_b32	ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCP_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK
+	s_cbranch_scc0	L_CHECK_TRAP_ID
+
+	s_and_b32	ttmp3, s_save_trapsts, SQ_WAVE_TRAPSTS_ADDR_WATCH_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK
+	s_cbranch_scc0	L_NOT_ADDR_WATCH
+	s_bitset1_b32	ttmp2, SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT // Check all addr_watch[123] exceptions against excp_en.addr_watch
+
+L_NOT_ADDR_WATCH:
+	s_getreg_b32	ttmp3, hwreg(HW_REG_MODE)
+	s_lshl_b32	ttmp2, ttmp2, SQ_WAVE_MODE_EXCP_EN_SHIFT
+	s_and_b32	ttmp2, ttmp2, ttmp3
+	s_cbranch_scc1	L_FETCH_2ND_TRAP
+
+L_CHECK_TRAP_ID:
+	// Check trap_id != 0
+	s_and_b32	ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
+	s_cbranch_scc1	L_FETCH_2ND_TRAP
+
+if SINGLE_STEP_MISSED_WORKAROUND
+	// Prioritize single step exception over context save.
+	// Second-level trap will halt wave and RFE, re-entering for SAVECTX.
+	s_getreg_b32	ttmp2, hwreg(HW_REG_MODE)
+	s_and_b32	ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK
+	s_cbranch_scc1	L_FETCH_2ND_TRAP
+end
+
+	s_and_b32	ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK
+	s_cbranch_scc1	L_SAVE
+
+L_FETCH_2ND_TRAP:
+#if HAVE_XNACK
+	save_and_clear_ib_sts(ttmp14, ttmp15)
+#endif
+
+	// Read second-level TBA/TMA from first-level TMA and jump if available.
+	// ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data)
+	// ttmp12 holds SQ_WAVE_STATUS
+#if HAVE_SENDMSG_RTN
+	s_sendmsg_rtn_b64       [ttmp14, ttmp15], sendmsg(MSG_RTN_GET_TMA)
+	s_waitcnt       lgkmcnt(0)
+#else
+	s_getreg_b32	ttmp14, hwreg(HW_REG_SHADER_TMA_LO)
+	s_getreg_b32	ttmp15, hwreg(HW_REG_SHADER_TMA_HI)
+#endif
+	s_lshl_b64	[ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
+
+	s_bitcmp1_b32	ttmp15, 0xF
+	s_cbranch_scc0	L_NO_SIGN_EXTEND_TMA
+	s_or_b32	ttmp15, ttmp15, 0xFFFF0000
+L_NO_SIGN_EXTEND_TMA:
+
+	s_load_dword    ttmp2, [ttmp14, ttmp15], 0x10 glc:1			// debug trap enabled flag
+	s_waitcnt       lgkmcnt(0)
+	s_lshl_b32      ttmp2, ttmp2, TTMP11_DEBUG_TRAP_ENABLED_SHIFT
+	s_andn2_b32     ttmp11, ttmp11, TTMP11_DEBUG_TRAP_ENABLED_MASK
+	s_or_b32        ttmp11, ttmp11, ttmp2
+
+	s_load_dwordx2	[ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 glc:1		// second-level TBA
+	s_waitcnt	lgkmcnt(0)
+	s_load_dwordx2	[ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 glc:1		// second-level TMA
+	s_waitcnt	lgkmcnt(0)
+
+	s_and_b64	[ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3]
+	s_cbranch_scc0	L_NO_NEXT_TRAP						// second-level trap handler not been set
+	s_setpc_b64	[ttmp2, ttmp3]						// jump to second-level trap handler
+
+L_NO_NEXT_TRAP:
+	// If not caused by trap then halt wave to prevent re-entry.
+	s_and_b32	ttmp2, s_save_pc_hi, (S_SAVE_PC_HI_TRAP_ID_MASK|S_SAVE_PC_HI_HT_MASK)
+	s_cbranch_scc1	L_TRAP_CASE
+	s_or_b32	s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
+
+	// If the PC points to S_ENDPGM then context save will fail if STATUS.HALT is set.
+	// Rewind the PC to prevent this from occurring.
+	s_sub_u32	ttmp0, ttmp0, 0x8
+	s_subb_u32	ttmp1, ttmp1, 0x0
+
+	s_branch	L_EXIT_TRAP
+
+L_TRAP_CASE:
+	// Host trap will not cause trap re-entry.
+	s_and_b32	ttmp2, s_save_pc_hi, S_SAVE_PC_HI_HT_MASK
+	s_cbranch_scc1	L_EXIT_TRAP
+
+	// Advance past trap instruction to prevent re-entry.
+	s_add_u32	ttmp0, ttmp0, 0x4
+	s_addc_u32	ttmp1, ttmp1, 0x0
+
+L_EXIT_TRAP:
+	s_and_b32	ttmp1, ttmp1, 0xFFFF
+
+#if HAVE_XNACK
+	restore_ib_sts(ttmp14, ttmp15)
+#endif
+
+	// Restore SQ_WAVE_STATUS.
+	s_and_b64	exec, exec, exec					// Restore STATUS.EXECZ, not writable by s_setreg_b32
+	s_and_b64	vcc, vcc, vcc						// Restore STATUS.VCCZ, not writable by s_setreg_b32
+	s_setreg_b32	hwreg(HW_REG_STATUS), s_save_status
+
+	s_rfe_b64	[ttmp0, ttmp1]
+
+L_SAVE:
+	s_and_b32	s_save_pc_hi, s_save_pc_hi, 0x0000ffff			//pc[47:32]
+	s_mov_b32	s_save_tmp, 0
+	s_setreg_b32	hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp	//clear saveCtx bit
+
+#if HAVE_XNACK
+	save_and_clear_ib_sts(s_save_tmp, s_save_trapsts)
+#endif
+
+	/* inform SPI the readiness and wait for SPI's go signal */
+	s_mov_b32	s_save_exec_lo, exec_lo					//save EXEC and use EXEC for the go signal from SPI
+	s_mov_b32	s_save_exec_hi, exec_hi
+	s_mov_b64	exec, 0x0						//clear EXEC to get ready to receive
+
+#if HAVE_SENDMSG_RTN
+	s_sendmsg_rtn_b64       [exec_lo, exec_hi], sendmsg(MSG_RTN_SAVE_WAVE)
+#else
+	s_sendmsg	sendmsg(MSG_SAVEWAVE)					//send SPI a message and wait for SPI's write to EXEC
+#endif
+
+#if ASIC_FAMILY < CHIP_SIENNA_CICHLID
+L_SLEEP:
+	// sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause
+	// SQ hang, since the 7,8th wave could not get arbit to exec inst, while
+	// other waves are stuck into the sleep-loop and waiting for wrexec!=0
+	s_sleep		0x2
+	s_cbranch_execz	L_SLEEP
+#else
+	s_waitcnt	lgkmcnt(0)
+#endif
+
+	// Save first_wave flag so we can clear high bits of save address.
+	s_and_b32	s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK
+	s_lshl_b32	s_save_tmp, s_save_tmp, (S_SAVE_PC_HI_FIRST_WAVE_SHIFT - S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT)
+	s_or_b32	s_save_pc_hi, s_save_pc_hi, s_save_tmp
+
+#if NO_SQC_STORE
+	// Trap temporaries must be saved via VGPR but all VGPRs are in use.
+	// There is no ttmp space to hold the resource constant for VGPR save.
+	// Save v0 by itself since it requires only two SGPRs.
+	s_mov_b32	s_save_ttmps_lo, exec_lo
+	s_and_b32	s_save_ttmps_hi, exec_hi, 0xFFFF
+	s_mov_b32	exec_lo, 0xFFFFFFFF
+	s_mov_b32	exec_hi, 0xFFFFFFFF
+	global_store_dword_addtid	v0, [s_save_ttmps_lo, s_save_ttmps_hi] slc:1 glc:1
+	v_mov_b32	v0, 0x0
+	s_mov_b32	exec_lo, s_save_ttmps_lo
+	s_mov_b32	exec_hi, s_save_ttmps_hi
+#endif
+
+	// Save trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
+	// ttmp SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)+0x40
+	get_wave_size(s_save_ttmps_hi)
+	get_vgpr_size_bytes(s_save_ttmps_lo, s_save_ttmps_hi)
+	get_svgpr_size_bytes(s_save_ttmps_hi)
+	s_add_u32	s_save_ttmps_lo, s_save_ttmps_lo, s_save_ttmps_hi
+	s_and_b32	s_save_ttmps_hi, s_save_spi_init_hi, 0xFFFF
+	s_add_u32	s_save_ttmps_lo, s_save_ttmps_lo, get_sgpr_size_bytes()
+	s_add_u32	s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo
+	s_addc_u32	s_save_ttmps_hi, s_save_ttmps_hi, 0x0
+
+#if NO_SQC_STORE
+	v_writelane_b32	v0, ttmp4, 0x4
+	v_writelane_b32	v0, ttmp5, 0x5
+	v_writelane_b32	v0, ttmp6, 0x6
+	v_writelane_b32	v0, ttmp7, 0x7
+	v_writelane_b32	v0, ttmp8, 0x8
+	v_writelane_b32	v0, ttmp9, 0x9
+	v_writelane_b32	v0, ttmp10, 0xA
+	v_writelane_b32	v0, ttmp11, 0xB
+	v_writelane_b32	v0, ttmp13, 0xD
+	v_writelane_b32	v0, exec_lo, 0xE
+	v_writelane_b32	v0, exec_hi, 0xF
+
+	s_mov_b32	exec_lo, 0x3FFF
+	s_mov_b32	exec_hi, 0x0
+	global_store_dword_addtid	v0, [s_save_ttmps_lo, s_save_ttmps_hi] inst_offset:0x40 slc:1 glc:1
+	v_readlane_b32	ttmp14, v0, 0xE
+	v_readlane_b32	ttmp15, v0, 0xF
+	s_mov_b32	exec_lo, ttmp14
+	s_mov_b32	exec_hi, ttmp15
+#else
+	s_store_dwordx4	[ttmp4, ttmp5, ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x50 glc:1
+	s_store_dwordx4	[ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x60 glc:1
+	s_store_dword   ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x74 glc:1
+#endif
+
+	/* setup Resource Contants */
+	s_mov_b32	s_save_buf_rsrc0, s_save_spi_init_lo			//base_addr_lo
+	s_and_b32	s_save_buf_rsrc1, s_save_spi_init_hi, 0x0000FFFF	//base_addr_hi
+	s_or_b32	s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE
+	s_mov_b32	s_save_buf_rsrc2, 0					//NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited
+	s_mov_b32	s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC
+
+	s_mov_b32	s_save_m0, m0
+
+	/* global mem offset */
+	s_mov_b32	s_save_mem_offset, 0x0
+	get_wave_size(s_wave_size)
+
+#if HAVE_XNACK
+	// Save and clear vector XNACK state late to free up SGPRs.
+	s_getreg_b32	s_save_xnack_mask, hwreg(HW_REG_SHADER_XNACK_MASK)
+	s_setreg_imm32_b32	hwreg(HW_REG_SHADER_XNACK_MASK), 0x0
+#endif
+
+	/* save first 4 VGPRs, needed for SGPR save */
+	s_mov_b32	exec_lo, 0xFFFFFFFF					//need every thread from now on
+	s_lshr_b32	m0, s_wave_size, S_WAVE_SIZE
+	s_and_b32	m0, m0, 1
+	s_cmp_eq_u32	m0, 1
+	s_cbranch_scc1	L_ENABLE_SAVE_4VGPR_EXEC_HI
+	s_mov_b32	exec_hi, 0x00000000
+	s_branch	L_SAVE_4VGPR_WAVE32
+L_ENABLE_SAVE_4VGPR_EXEC_HI:
+	s_mov_b32	exec_hi, 0xFFFFFFFF
+	s_branch	L_SAVE_4VGPR_WAVE64
+L_SAVE_4VGPR_WAVE32:
+	s_mov_b32	s_save_buf_rsrc2, 0x1000000				//NUM_RECORDS in bytes
+
+	// VGPR Allocated in 4-GPR granularity
+
+#if !NO_SQC_STORE
+	buffer_store_dword	v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+#endif
+	buffer_store_dword	v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128
+	buffer_store_dword	v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*2
+	buffer_store_dword	v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*3
+	s_branch	L_SAVE_HWREG
+
+L_SAVE_4VGPR_WAVE64:
+	s_mov_b32	s_save_buf_rsrc2, 0x1000000				//NUM_RECORDS in bytes
+
+	// VGPR Allocated in 4-GPR granularity
+
+#if !NO_SQC_STORE
+	buffer_store_dword	v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+#endif
+	buffer_store_dword	v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256
+	buffer_store_dword	v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2
+	buffer_store_dword	v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3
+
+	/* save HW registers */
+
+L_SAVE_HWREG:
+	// HWREG SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)
+	get_vgpr_size_bytes(s_save_mem_offset, s_wave_size)
+	get_svgpr_size_bytes(s_save_tmp)
+	s_add_u32	s_save_mem_offset, s_save_mem_offset, s_save_tmp
+	s_add_u32	s_save_mem_offset, s_save_mem_offset, get_sgpr_size_bytes()
+
+	s_mov_b32	s_save_buf_rsrc2, 0x1000000				//NUM_RECORDS in bytes
+
+#if NO_SQC_STORE
+	v_mov_b32	v0, 0x0							//Offset[31:0] from buffer resource
+	v_mov_b32	v1, 0x0							//Offset[63:32] from buffer resource
+	v_mov_b32	v2, 0x0							//Set of SGPRs for TCP store
+	s_mov_b32	m0, 0x0							//Next lane of v2 to write to
+#endif
+
+	write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
+	write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset)
+	s_andn2_b32	s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK
+	write_hwreg_to_mem(s_save_tmp, s_save_buf_rsrc0, s_save_mem_offset)
+	write_hwreg_to_mem(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset)
+	write_hwreg_to_mem(s_save_exec_hi, s_save_buf_rsrc0, s_save_mem_offset)
+	write_hwreg_to_mem(s_save_status, s_save_buf_rsrc0, s_save_mem_offset)
+
+	s_getreg_b32	s_save_tmp, hwreg(HW_REG_TRAPSTS)
+	write_hwreg_to_mem(s_save_tmp, s_save_buf_rsrc0, s_save_mem_offset)
+
+	// Not used on Sienna_Cichlid but keep layout same for debugger.
+	write_hwreg_to_mem(s_save_xnack_mask, s_save_buf_rsrc0, s_save_mem_offset)
+
+	s_getreg_b32	s_save_m0, hwreg(HW_REG_MODE)
+	write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
+
+	s_getreg_b32	s_save_m0, hwreg(HW_REG_SHADER_FLAT_SCRATCH_LO)
+	write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
+
+	s_getreg_b32	s_save_m0, hwreg(HW_REG_SHADER_FLAT_SCRATCH_HI)
+	write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
+
+#if NO_SQC_STORE
+	// Write HWREGs with 16 VGPR lanes. TTMPs occupy space after this.
+	s_mov_b32       exec_lo, 0xFFFF
+	s_mov_b32	exec_hi, 0x0
+	buffer_store_dword	v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+
+	// Write SGPRs with 32 VGPR lanes. This works in wave32 and wave64 mode.
+	s_mov_b32       exec_lo, 0xFFFFFFFF
+#endif
+
+	/* save SGPRs */
+	// Save SGPR before LDS save, then the s0 to s4 can be used during LDS save...
+
+	// SGPR SR memory offset : size(VGPR)+size(SVGPR)
+	get_vgpr_size_bytes(s_save_mem_offset, s_wave_size)
+	get_svgpr_size_bytes(s_save_tmp)
+	s_add_u32	s_save_mem_offset, s_save_mem_offset, s_save_tmp
+	s_mov_b32	s_save_buf_rsrc2, 0x1000000				//NUM_RECORDS in bytes
+
+#if NO_SQC_STORE
+	s_mov_b32	ttmp13, 0x0						//next VGPR lane to copy SGPR into
+#else
+	// backup s_save_buf_rsrc0,1 to s_save_pc_lo/hi, since write_16sgpr_to_mem function will change the rsrc0
+	s_mov_b32	s_save_xnack_mask, s_save_buf_rsrc0
+	s_add_u32	s_save_buf_rsrc0, s_save_buf_rsrc0, s_save_mem_offset
+	s_addc_u32	s_save_buf_rsrc1, s_save_buf_rsrc1, 0
+#endif
+
+	s_mov_b32	m0, 0x0							//SGPR initial index value =0
+	s_nop		0x0							//Manually inserted wait states
+L_SAVE_SGPR_LOOP:
+	// SGPR is allocated in 16 SGPR granularity
+	s_movrels_b64	s0, s0							//s0 = s[0+m0], s1 = s[1+m0]
+	s_movrels_b64	s2, s2							//s2 = s[2+m0], s3 = s[3+m0]
+	s_movrels_b64	s4, s4							//s4 = s[4+m0], s5 = s[5+m0]
+	s_movrels_b64	s6, s6							//s6 = s[6+m0], s7 = s[7+m0]
+	s_movrels_b64	s8, s8							//s8 = s[8+m0], s9 = s[9+m0]
+	s_movrels_b64	s10, s10						//s10 = s[10+m0], s11 = s[11+m0]
+	s_movrels_b64	s12, s12						//s12 = s[12+m0], s13 = s[13+m0]
+	s_movrels_b64	s14, s14						//s14 = s[14+m0], s15 = s[15+m0]
+
+	write_16sgpr_to_mem(s0, s_save_buf_rsrc0, s_save_mem_offset)
+
+#if NO_SQC_STORE
+	s_cmp_eq_u32	ttmp13, 0x20						//have 32 VGPR lanes filled?
+	s_cbranch_scc0	L_SAVE_SGPR_SKIP_TCP_STORE
+
+	buffer_store_dword	v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+	s_add_u32	s_save_mem_offset, s_save_mem_offset, 0x80
+	s_mov_b32	ttmp13, 0x0
+	v_mov_b32	v2, 0x0
+L_SAVE_SGPR_SKIP_TCP_STORE:
+#endif
+
+	s_add_u32	m0, m0, 16						//next sgpr index
+	s_cmp_lt_u32	m0, 96							//scc = (m0 < first 96 SGPR) ? 1 : 0
+	s_cbranch_scc1	L_SAVE_SGPR_LOOP					//first 96 SGPR save is complete?
+
+	//save the rest 12 SGPR
+	s_movrels_b64	s0, s0							//s0 = s[0+m0], s1 = s[1+m0]
+	s_movrels_b64	s2, s2							//s2 = s[2+m0], s3 = s[3+m0]
+	s_movrels_b64	s4, s4							//s4 = s[4+m0], s5 = s[5+m0]
+	s_movrels_b64	s6, s6							//s6 = s[6+m0], s7 = s[7+m0]
+	s_movrels_b64	s8, s8							//s8 = s[8+m0], s9 = s[9+m0]
+	s_movrels_b64	s10, s10						//s10 = s[10+m0], s11 = s[11+m0]
+	write_12sgpr_to_mem(s0, s_save_buf_rsrc0, s_save_mem_offset)
+
+#if NO_SQC_STORE
+	buffer_store_dword	v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+#else
+	// restore s_save_buf_rsrc0,1
+	s_mov_b32	s_save_buf_rsrc0, s_save_xnack_mask
+#endif
+
+	/* save LDS */
+
+L_SAVE_LDS:
+	// Change EXEC to all threads...
+	s_mov_b32	exec_lo, 0xFFFFFFFF					//need every thread from now on
+	s_lshr_b32	m0, s_wave_size, S_WAVE_SIZE
+	s_and_b32	m0, m0, 1
+	s_cmp_eq_u32	m0, 1
+	s_cbranch_scc1	L_ENABLE_SAVE_LDS_EXEC_HI
+	s_mov_b32	exec_hi, 0x00000000
+	s_branch	L_SAVE_LDS_NORMAL
+L_ENABLE_SAVE_LDS_EXEC_HI:
+	s_mov_b32	exec_hi, 0xFFFFFFFF
+L_SAVE_LDS_NORMAL:
+	s_getreg_b32	s_save_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE)
+	s_and_b32	s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF	//lds_size is zero?
+	s_cbranch_scc0	L_SAVE_LDS_DONE						//no lds used? jump to L_SAVE_DONE
+
+	s_barrier								//LDS is used? wait for other waves in the same TG
+	s_and_b32	s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK
+	s_cbranch_scc0	L_SAVE_LDS_DONE
+
+	// first wave do LDS save;
+
+	s_lshl_b32	s_save_alloc_size, s_save_alloc_size, 6			//LDS size in dwords = lds_size * 64dw
+	s_lshl_b32	s_save_alloc_size, s_save_alloc_size, 2			//LDS size in bytes
+	s_mov_b32	s_save_buf_rsrc2, s_save_alloc_size			//NUM_RECORDS in bytes
+
+	// LDS at offset: size(VGPR)+size(SVGPR)+SIZE(SGPR)+SIZE(HWREG)
+	//
+	get_vgpr_size_bytes(s_save_mem_offset, s_wave_size)
+	get_svgpr_size_bytes(s_save_tmp)
+	s_add_u32	s_save_mem_offset, s_save_mem_offset, s_save_tmp
+	s_add_u32	s_save_mem_offset, s_save_mem_offset, get_sgpr_size_bytes()
+	s_add_u32	s_save_mem_offset, s_save_mem_offset, get_hwreg_size_bytes()
+
+	s_mov_b32	s_save_buf_rsrc2, 0x1000000				//NUM_RECORDS in bytes
+
+	//load 0~63*4(byte address) to vgpr v0
+	v_mbcnt_lo_u32_b32	v0, -1, 0
+	v_mbcnt_hi_u32_b32	v0, -1, v0
+	v_mul_u32_u24	v0, 4, v0
+
+	s_lshr_b32	m0, s_wave_size, S_WAVE_SIZE
+	s_and_b32	m0, m0, 1
+	s_cmp_eq_u32	m0, 1
+	s_mov_b32	m0, 0x0
+	s_cbranch_scc1	L_SAVE_LDS_W64
+
+L_SAVE_LDS_W32:
+	s_mov_b32	s3, 128
+	s_nop		0
+	s_nop		0
+	s_nop		0
+L_SAVE_LDS_LOOP_W32:
+	ds_read_b32	v1, v0
+	s_waitcnt	0
+	buffer_store_dword	v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+
+	s_add_u32	m0, m0, s3						//every buffer_store_lds does 256 bytes
+	s_add_u32	s_save_mem_offset, s_save_mem_offset, s3
+	v_add_nc_u32	v0, v0, 128						//mem offset increased by 128 bytes
+	s_cmp_lt_u32	m0, s_save_alloc_size					//scc=(m0 < s_save_alloc_size) ? 1 : 0
+	s_cbranch_scc1	L_SAVE_LDS_LOOP_W32					//LDS save is complete?
+
+	s_branch	L_SAVE_LDS_DONE
+
+L_SAVE_LDS_W64:
+	s_mov_b32	s3, 256
+	s_nop		0
+	s_nop		0
+	s_nop		0
+L_SAVE_LDS_LOOP_W64:
+	ds_read_b32	v1, v0
+	s_waitcnt	0
+	buffer_store_dword	v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+
+	s_add_u32	m0, m0, s3						//every buffer_store_lds does 256 bytes
+	s_add_u32	s_save_mem_offset, s_save_mem_offset, s3
+	v_add_nc_u32	v0, v0, 256						//mem offset increased by 256 bytes
+	s_cmp_lt_u32	m0, s_save_alloc_size					//scc=(m0 < s_save_alloc_size) ? 1 : 0
+	s_cbranch_scc1	L_SAVE_LDS_LOOP_W64					//LDS save is complete?
+
+L_SAVE_LDS_DONE:
+	/* save VGPRs  - set the Rest VGPRs */
+L_SAVE_VGPR:
+	// VGPR SR memory offset: 0
+	s_mov_b32	exec_lo, 0xFFFFFFFF					//need every thread from now on
+	s_lshr_b32	m0, s_wave_size, S_WAVE_SIZE
+	s_and_b32	m0, m0, 1
+	s_cmp_eq_u32	m0, 1
+	s_cbranch_scc1	L_ENABLE_SAVE_VGPR_EXEC_HI
+	s_mov_b32	s_save_mem_offset, (0+128*4)				// for the rest VGPRs
+	s_mov_b32	exec_hi, 0x00000000
+	s_branch	L_SAVE_VGPR_NORMAL
+L_ENABLE_SAVE_VGPR_EXEC_HI:
+	s_mov_b32	s_save_mem_offset, (0+256*4)				// for the rest VGPRs
+	s_mov_b32	exec_hi, 0xFFFFFFFF
+L_SAVE_VGPR_NORMAL:
+	s_getreg_b32	s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE)
+	s_add_u32	s_save_alloc_size, s_save_alloc_size, 1
+	s_lshl_b32	s_save_alloc_size, s_save_alloc_size, 2			//Number of VGPRs = (vgpr_size + 1) * 4    (non-zero value)
+	//determine it is wave32 or wave64
+	s_lshr_b32	m0, s_wave_size, S_WAVE_SIZE
+	s_and_b32	m0, m0, 1
+	s_cmp_eq_u32	m0, 1
+	s_cbranch_scc1	L_SAVE_VGPR_WAVE64
+
+	s_mov_b32	s_save_buf_rsrc2, 0x1000000				//NUM_RECORDS in bytes
+
+	// VGPR Allocated in 4-GPR granularity
+
+	// VGPR store using dw burst
+	s_mov_b32	m0, 0x4							//VGPR initial index value =4
+	s_cmp_lt_u32	m0, s_save_alloc_size
+	s_cbranch_scc0	L_SAVE_VGPR_END
+
+L_SAVE_VGPR_W32_LOOP:
+	v_movrels_b32	v0, v0							//v0 = v[0+m0]
+	v_movrels_b32	v1, v1							//v1 = v[1+m0]
+	v_movrels_b32	v2, v2							//v2 = v[2+m0]
+	v_movrels_b32	v3, v3							//v3 = v[3+m0]
+
+	buffer_store_dword	v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+	buffer_store_dword	v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128
+	buffer_store_dword	v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*2
+	buffer_store_dword	v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*3
+
+	s_add_u32	m0, m0, 4						//next vgpr index
+	s_add_u32	s_save_mem_offset, s_save_mem_offset, 128*4		//every buffer_store_dword does 128 bytes
+	s_cmp_lt_u32	m0, s_save_alloc_size					//scc = (m0 < s_save_alloc_size) ? 1 : 0
+	s_cbranch_scc1	L_SAVE_VGPR_W32_LOOP					//VGPR save is complete?
+
+	s_branch	L_SAVE_VGPR_END
+
+L_SAVE_VGPR_WAVE64:
+	s_mov_b32	s_save_buf_rsrc2, 0x1000000				//NUM_RECORDS in bytes
+
+	// VGPR store using dw burst
+	s_mov_b32	m0, 0x4							//VGPR initial index value =4
+	s_cmp_lt_u32	m0, s_save_alloc_size
+	s_cbranch_scc0	L_SAVE_SHARED_VGPR
+
+L_SAVE_VGPR_W64_LOOP:
+	v_movrels_b32	v0, v0							//v0 = v[0+m0]
+	v_movrels_b32	v1, v1							//v1 = v[1+m0]
+	v_movrels_b32	v2, v2							//v2 = v[2+m0]
+	v_movrels_b32	v3, v3							//v3 = v[3+m0]
+
+	buffer_store_dword	v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+	buffer_store_dword	v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256
+	buffer_store_dword	v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2
+	buffer_store_dword	v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3
+
+	s_add_u32	m0, m0, 4						//next vgpr index
+	s_add_u32	s_save_mem_offset, s_save_mem_offset, 256*4		//every buffer_store_dword does 256 bytes
+	s_cmp_lt_u32	m0, s_save_alloc_size					//scc = (m0 < s_save_alloc_size) ? 1 : 0
+	s_cbranch_scc1	L_SAVE_VGPR_W64_LOOP					//VGPR save is complete?
+
+L_SAVE_SHARED_VGPR:
+	//Below part will be the save shared vgpr part (new for gfx10)
+	s_getreg_b32	s_save_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE)
+	s_and_b32	s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF	//shared_vgpr_size is zero?
+	s_cbranch_scc0	L_SAVE_VGPR_END						//no shared_vgpr used? jump to L_SAVE_LDS
+	s_lshl_b32	s_save_alloc_size, s_save_alloc_size, 3			//Number of SHARED_VGPRs = shared_vgpr_size * 8    (non-zero value)
+	//m0 now has the value of normal vgpr count, just add the m0 with shared_vgpr count to get the total count.
+	//save shared_vgpr will start from the index of m0
+	s_add_u32	s_save_alloc_size, s_save_alloc_size, m0
+	s_mov_b32	exec_lo, 0xFFFFFFFF
+	s_mov_b32	exec_hi, 0x00000000
+L_SAVE_SHARED_VGPR_WAVE64_LOOP:
+	v_movrels_b32	v0, v0							//v0 = v[0+m0]
+	buffer_store_dword	v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+	s_add_u32	m0, m0, 1						//next vgpr index
+	s_add_u32	s_save_mem_offset, s_save_mem_offset, 128
+	s_cmp_lt_u32	m0, s_save_alloc_size					//scc = (m0 < s_save_alloc_size) ? 1 : 0
+	s_cbranch_scc1	L_SAVE_SHARED_VGPR_WAVE64_LOOP				//SHARED_VGPR save is complete?
+
+L_SAVE_VGPR_END:
+	s_branch	L_END_PGM
+
+L_RESTORE:
+	/* Setup Resource Contants */
+	s_mov_b32	s_restore_buf_rsrc0, s_restore_spi_init_lo		//base_addr_lo
+	s_and_b32	s_restore_buf_rsrc1, s_restore_spi_init_hi, 0x0000FFFF	//base_addr_hi
+	s_or_b32	s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE
+	s_mov_b32	s_restore_buf_rsrc2, 0					//NUM_RECORDS initial value = 0 (in bytes)
+	s_mov_b32	s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC
+
+	//determine it is wave32 or wave64
+	get_wave_size(s_restore_size)
+
+	s_and_b32	s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK
+	s_cbranch_scc0	L_RESTORE_VGPR
+
+	/* restore LDS */
+L_RESTORE_LDS:
+	s_mov_b32	exec_lo, 0xFFFFFFFF					//need every thread from now on
+	s_lshr_b32	m0, s_restore_size, S_WAVE_SIZE
+	s_and_b32	m0, m0, 1
+	s_cmp_eq_u32	m0, 1
+	s_cbranch_scc1	L_ENABLE_RESTORE_LDS_EXEC_HI
+	s_mov_b32	exec_hi, 0x00000000
+	s_branch	L_RESTORE_LDS_NORMAL
+L_ENABLE_RESTORE_LDS_EXEC_HI:
+	s_mov_b32	exec_hi, 0xFFFFFFFF
+L_RESTORE_LDS_NORMAL:
+	s_getreg_b32	s_restore_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE)
+	s_and_b32	s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF	//lds_size is zero?
+	s_cbranch_scc0	L_RESTORE_VGPR						//no lds used? jump to L_RESTORE_VGPR
+	s_lshl_b32	s_restore_alloc_size, s_restore_alloc_size, 6		//LDS size in dwords = lds_size * 64dw
+	s_lshl_b32	s_restore_alloc_size, s_restore_alloc_size, 2		//LDS size in bytes
+	s_mov_b32	s_restore_buf_rsrc2, s_restore_alloc_size		//NUM_RECORDS in bytes
+
+	// LDS at offset: size(VGPR)+size(SVGPR)+SIZE(SGPR)+SIZE(HWREG)
+	//
+	get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size)
+	get_svgpr_size_bytes(s_restore_tmp)
+	s_add_u32	s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+	s_add_u32	s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes()
+	s_add_u32	s_restore_mem_offset, s_restore_mem_offset, get_hwreg_size_bytes()
+
+	s_mov_b32	s_restore_buf_rsrc2, 0x1000000				//NUM_RECORDS in bytes
+
+	s_lshr_b32	m0, s_restore_size, S_WAVE_SIZE
+	s_and_b32	m0, m0, 1
+	s_cmp_eq_u32	m0, 1
+	s_mov_b32	m0, 0x0
+	s_cbranch_scc1	L_RESTORE_LDS_LOOP_W64
+
+L_RESTORE_LDS_LOOP_W32:
+#if HAVE_BUFFER_LDS_LOAD
+	buffer_load_dword	v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1	// first 64DW
+#else
+	buffer_load_dword       v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset
+	s_waitcnt	vmcnt(0)
+	ds_store_addtid_b32     v0
+#endif
+	s_add_u32	m0, m0, 128						// 128 DW
+	s_add_u32	s_restore_mem_offset, s_restore_mem_offset, 128		//mem offset increased by 128DW
+	s_cmp_lt_u32	m0, s_restore_alloc_size				//scc=(m0 < s_restore_alloc_size) ? 1 : 0
+	s_cbranch_scc1	L_RESTORE_LDS_LOOP_W32					//LDS restore is complete?
+	s_branch	L_RESTORE_VGPR
+
+L_RESTORE_LDS_LOOP_W64:
+#if HAVE_BUFFER_LDS_LOAD
+	buffer_load_dword	v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1	// first 64DW
+#else
+	buffer_load_dword       v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset
+	s_waitcnt	vmcnt(0)
+	ds_store_addtid_b32     v0
+#endif
+	s_add_u32	m0, m0, 256						// 256 DW
+	s_add_u32	s_restore_mem_offset, s_restore_mem_offset, 256		//mem offset increased by 256DW
+	s_cmp_lt_u32	m0, s_restore_alloc_size				//scc=(m0 < s_restore_alloc_size) ? 1 : 0
+	s_cbranch_scc1	L_RESTORE_LDS_LOOP_W64					//LDS restore is complete?
+
+	/* restore VGPRs */
+L_RESTORE_VGPR:
+	// VGPR SR memory offset : 0
+	s_mov_b32	s_restore_mem_offset, 0x0
+ 	s_mov_b32	exec_lo, 0xFFFFFFFF					//need every thread from now on
+	s_lshr_b32	m0, s_restore_size, S_WAVE_SIZE
+	s_and_b32	m0, m0, 1
+	s_cmp_eq_u32	m0, 1
+	s_cbranch_scc1	L_ENABLE_RESTORE_VGPR_EXEC_HI
+	s_mov_b32	exec_hi, 0x00000000
+	s_branch	L_RESTORE_VGPR_NORMAL
+L_ENABLE_RESTORE_VGPR_EXEC_HI:
+	s_mov_b32	exec_hi, 0xFFFFFFFF
+L_RESTORE_VGPR_NORMAL:
+	s_getreg_b32	s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE)
+	s_add_u32	s_restore_alloc_size, s_restore_alloc_size, 1
+	s_lshl_b32	s_restore_alloc_size, s_restore_alloc_size, 2		//Number of VGPRs = (vgpr_size + 1) * 4    (non-zero value)
+	//determine it is wave32 or wave64
+	s_lshr_b32	m0, s_restore_size, S_WAVE_SIZE
+	s_and_b32	m0, m0, 1
+	s_cmp_eq_u32	m0, 1
+	s_cbranch_scc1	L_RESTORE_VGPR_WAVE64
+
+	s_mov_b32	s_restore_buf_rsrc2, 0x1000000				//NUM_RECORDS in bytes
+
+	// VGPR load using dw burst
+	s_mov_b32	s_restore_mem_offset_save, s_restore_mem_offset		// restore start with v1, v0 will be the last
+	s_add_u32	s_restore_mem_offset, s_restore_mem_offset, 128*4
+	s_mov_b32	m0, 4							//VGPR initial index value = 4
+	s_cmp_lt_u32	m0, s_restore_alloc_size
+	s_cbranch_scc0	L_RESTORE_SGPR
+
+L_RESTORE_VGPR_WAVE32_LOOP:
+	buffer_load_dword	v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1
+	buffer_load_dword	v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:128
+	buffer_load_dword	v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:128*2
+	buffer_load_dword	v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:128*3
+	s_waitcnt	vmcnt(0)
+	v_movreld_b32	v0, v0							//v[0+m0] = v0
+	v_movreld_b32	v1, v1
+	v_movreld_b32	v2, v2
+	v_movreld_b32	v3, v3
+	s_add_u32	m0, m0, 4						//next vgpr index
+	s_add_u32	s_restore_mem_offset, s_restore_mem_offset, 128*4	//every buffer_load_dword does 128 bytes
+	s_cmp_lt_u32	m0, s_restore_alloc_size				//scc = (m0 < s_restore_alloc_size) ? 1 : 0
+	s_cbranch_scc1	L_RESTORE_VGPR_WAVE32_LOOP				//VGPR restore (except v0) is complete?
+
+	/* VGPR restore on v0 */
+	buffer_load_dword	v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1
+	buffer_load_dword	v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:128
+	buffer_load_dword	v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:128*2
+	buffer_load_dword	v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:128*3
+	s_waitcnt	vmcnt(0)
+
+	s_branch	L_RESTORE_SGPR
+
+L_RESTORE_VGPR_WAVE64:
+	s_mov_b32	s_restore_buf_rsrc2, 0x1000000				//NUM_RECORDS in bytes
+
+	// VGPR load using dw burst
+	s_mov_b32	s_restore_mem_offset_save, s_restore_mem_offset		// restore start with v4, v0 will be the last
+	s_add_u32	s_restore_mem_offset, s_restore_mem_offset, 256*4
+	s_mov_b32	m0, 4							//VGPR initial index value = 4
+	s_cmp_lt_u32	m0, s_restore_alloc_size
+	s_cbranch_scc0	L_RESTORE_SHARED_VGPR
+
+L_RESTORE_VGPR_WAVE64_LOOP:
+	buffer_load_dword	v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1
+	buffer_load_dword	v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256
+	buffer_load_dword	v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*2
+	buffer_load_dword	v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*3
+	s_waitcnt	vmcnt(0)
+	v_movreld_b32	v0, v0							//v[0+m0] = v0
+	v_movreld_b32	v1, v1
+	v_movreld_b32	v2, v2
+	v_movreld_b32	v3, v3
+	s_add_u32	m0, m0, 4						//next vgpr index
+	s_add_u32	s_restore_mem_offset, s_restore_mem_offset, 256*4	//every buffer_load_dword does 256 bytes
+	s_cmp_lt_u32	m0, s_restore_alloc_size				//scc = (m0 < s_restore_alloc_size) ? 1 : 0
+	s_cbranch_scc1	L_RESTORE_VGPR_WAVE64_LOOP				//VGPR restore (except v0) is complete?
+
+L_RESTORE_SHARED_VGPR:
+	//Below part will be the restore shared vgpr part (new for gfx10)
+	s_getreg_b32	s_restore_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE)	//shared_vgpr_size
+	s_and_b32	s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF	//shared_vgpr_size is zero?
+	s_cbranch_scc0	L_RESTORE_V0						//no shared_vgpr used?
+	s_lshl_b32	s_restore_alloc_size, s_restore_alloc_size, 3		//Number of SHARED_VGPRs = shared_vgpr_size * 8    (non-zero value)
+	//m0 now has the value of normal vgpr count, just add the m0 with shared_vgpr count to get the total count.
+	//restore shared_vgpr will start from the index of m0
+	s_add_u32	s_restore_alloc_size, s_restore_alloc_size, m0
+	s_mov_b32	exec_lo, 0xFFFFFFFF
+	s_mov_b32	exec_hi, 0x00000000
+L_RESTORE_SHARED_VGPR_WAVE64_LOOP:
+	buffer_load_dword	v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1
+	s_waitcnt	vmcnt(0)
+	v_movreld_b32	v0, v0							//v[0+m0] = v0
+	s_add_u32	m0, m0, 1						//next vgpr index
+	s_add_u32	s_restore_mem_offset, s_restore_mem_offset, 128
+	s_cmp_lt_u32	m0, s_restore_alloc_size				//scc = (m0 < s_restore_alloc_size) ? 1 : 0
+	s_cbranch_scc1	L_RESTORE_SHARED_VGPR_WAVE64_LOOP			//VGPR restore (except v0) is complete?
+
+	s_mov_b32	exec_hi, 0xFFFFFFFF					//restore back exec_hi before restoring V0!!
+
+	/* VGPR restore on v0 */
+L_RESTORE_V0:
+	buffer_load_dword	v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1
+	buffer_load_dword	v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256
+	buffer_load_dword	v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*2
+	buffer_load_dword	v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*3
+	s_waitcnt	vmcnt(0)
+
+	/* restore SGPRs */
+	//will be 2+8+16*6
+	// SGPR SR memory offset : size(VGPR)+size(SVGPR)
+L_RESTORE_SGPR:
+	get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size)
+	get_svgpr_size_bytes(s_restore_tmp)
+	s_add_u32	s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+	s_add_u32	s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes()
+	s_sub_u32	s_restore_mem_offset, s_restore_mem_offset, 20*4	//s108~s127 is not saved
+
+	s_mov_b32	s_restore_buf_rsrc2, 0x1000000				//NUM_RECORDS in bytes
+
+	s_mov_b32	m0, s_sgpr_save_num
+
+	read_4sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset)
+	s_waitcnt	lgkmcnt(0)
+
+	s_sub_u32	m0, m0, 4						// Restore from S[0] to S[104]
+	s_nop		0							// hazard SALU M0=> S_MOVREL
+
+	s_movreld_b64	s0, s0							//s[0+m0] = s0
+	s_movreld_b64	s2, s2
+
+	read_8sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset)
+	s_waitcnt	lgkmcnt(0)
+
+	s_sub_u32	m0, m0, 8						// Restore from S[0] to S[96]
+	s_nop		0							// hazard SALU M0=> S_MOVREL
+
+	s_movreld_b64	s0, s0							//s[0+m0] = s0
+	s_movreld_b64	s2, s2
+	s_movreld_b64	s4, s4
+	s_movreld_b64	s6, s6
+
+ L_RESTORE_SGPR_LOOP:
+	read_16sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset)
+	s_waitcnt	lgkmcnt(0)
+
+	s_sub_u32	m0, m0, 16						// Restore from S[n] to S[0]
+	s_nop		0							// hazard SALU M0=> S_MOVREL
+
+	s_movreld_b64	s0, s0							//s[0+m0] = s0
+	s_movreld_b64	s2, s2
+	s_movreld_b64	s4, s4
+	s_movreld_b64	s6, s6
+	s_movreld_b64	s8, s8
+	s_movreld_b64	s10, s10
+	s_movreld_b64	s12, s12
+	s_movreld_b64	s14, s14
+
+	s_cmp_eq_u32	m0, 0							//scc = (m0 < s_sgpr_save_num) ? 1 : 0
+	s_cbranch_scc0	L_RESTORE_SGPR_LOOP
+
+	// s_barrier with MODE.DEBUG_EN=1, STATUS.PRIV=1 incorrectly asserts debug exception.
+	// Clear DEBUG_EN before and restore MODE after the barrier.
+	s_setreg_imm32_b32	hwreg(HW_REG_MODE), 0
+	s_barrier								//barrier to ensure the readiness of LDS before access attemps from any other wave in the same TG
+
+	/* restore HW registers */
+L_RESTORE_HWREG:
+	// HWREG SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)
+	get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size)
+	get_svgpr_size_bytes(s_restore_tmp)
+	s_add_u32	s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+	s_add_u32	s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes()
+
+	s_mov_b32	s_restore_buf_rsrc2, 0x1000000				//NUM_RECORDS in bytes
+
+	read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset)
+	read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset)
+	read_hwreg_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset)
+	read_hwreg_from_mem(s_restore_exec_lo, s_restore_buf_rsrc0, s_restore_mem_offset)
+	read_hwreg_from_mem(s_restore_exec_hi, s_restore_buf_rsrc0, s_restore_mem_offset)
+	read_hwreg_from_mem(s_restore_status, s_restore_buf_rsrc0, s_restore_mem_offset)
+	read_hwreg_from_mem(s_restore_trapsts, s_restore_buf_rsrc0, s_restore_mem_offset)
+	read_hwreg_from_mem(s_restore_xnack_mask, s_restore_buf_rsrc0, s_restore_mem_offset)
+	read_hwreg_from_mem(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset)
+	read_hwreg_from_mem(s_restore_flat_scratch, s_restore_buf_rsrc0, s_restore_mem_offset)
+	s_waitcnt	lgkmcnt(0)
+
+	s_setreg_b32	hwreg(HW_REG_SHADER_FLAT_SCRATCH_LO), s_restore_flat_scratch
+
+	read_hwreg_from_mem(s_restore_flat_scratch, s_restore_buf_rsrc0, s_restore_mem_offset)
+	s_waitcnt	lgkmcnt(0)						//from now on, it is safe to restore STATUS and IB_STS
+
+	s_setreg_b32	hwreg(HW_REG_SHADER_FLAT_SCRATCH_HI), s_restore_flat_scratch
+
+	s_mov_b32	m0, s_restore_m0
+	s_mov_b32	exec_lo, s_restore_exec_lo
+	s_mov_b32	exec_hi, s_restore_exec_hi
+
+	s_and_b32	s_restore_m0, SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK, s_restore_trapsts
+	s_setreg_b32	hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE), s_restore_m0
+
+#if HAVE_XNACK
+	s_setreg_b32	hwreg(HW_REG_SHADER_XNACK_MASK), s_restore_xnack_mask
+#endif
+
+	s_and_b32	s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK, s_restore_trapsts
+	s_lshr_b32	s_restore_m0, s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT
+	s_setreg_b32	hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE), s_restore_m0
+	s_setreg_b32	hwreg(HW_REG_MODE), s_restore_mode
+
+	// Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
+	// ttmp SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)+0x40
+	get_vgpr_size_bytes(s_restore_ttmps_lo, s_restore_size)
+	get_svgpr_size_bytes(s_restore_ttmps_hi)
+	s_add_u32	s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_ttmps_hi
+	s_add_u32	s_restore_ttmps_lo, s_restore_ttmps_lo, get_sgpr_size_bytes()
+	s_add_u32	s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0
+	s_addc_u32	s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0
+	s_and_b32	s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF
+	s_load_dwordx4	[ttmp4, ttmp5, ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x50 glc:1
+	s_load_dwordx4	[ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x60 glc:1
+	s_load_dword	ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 glc:1
+	s_waitcnt	lgkmcnt(0)
+
+#if HAVE_XNACK
+	restore_ib_sts(s_restore_tmp, s_restore_m0)
+#endif
+
+	s_and_b32	s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff		//pc[47:32] //Do it here in order not to affect STATUS
+	s_and_b64	exec, exec, exec					// Restore STATUS.EXECZ, not writable by s_setreg_b32
+	s_and_b64	vcc, vcc, vcc						// Restore STATUS.VCCZ, not writable by s_setreg_b32
+
+#if SW_SA_TRAP
+	// If traps are enabled then return to the shader with PRIV=0.
+	// Otherwise retain PRIV=1 for subsequent context save requests.
+	s_getreg_b32	s_restore_tmp, hwreg(HW_REG_STATUS)
+	s_bitcmp1_b32	s_restore_tmp, SQ_WAVE_STATUS_TRAP_EN_SHIFT
+	s_cbranch_scc1	L_RETURN_WITHOUT_PRIV
+
+	s_setreg_b32	hwreg(HW_REG_STATUS), s_restore_status			// SCC is included, which is changed by previous salu
+	s_setpc_b64	[s_restore_pc_lo, s_restore_pc_hi]
+L_RETURN_WITHOUT_PRIV:
+#endif
+
+	s_setreg_b32	hwreg(HW_REG_STATUS), s_restore_status			// SCC is included, which is changed by previous salu
+	s_rfe_b64	s_restore_pc_lo						//Return to the main shader program and resume execution
+
+L_END_PGM:
+	s_endpgm
+end
+
+function write_hwreg_to_mem(s, s_rsrc, s_mem_offset)
+#if NO_SQC_STORE
+	// Copy into VGPR for later TCP store.
+	v_writelane_b32	v2, s, m0
+	s_add_u32	m0, m0, 0x1
+#else
+	s_mov_b32	exec_lo, m0
+	s_mov_b32	m0, s_mem_offset
+	s_buffer_store_dword	s, s_rsrc, m0 glc:1
+	s_add_u32	s_mem_offset, s_mem_offset, 4
+	s_mov_b32	m0, exec_lo
+#endif
+end
+
+
+function write_16sgpr_to_mem(s, s_rsrc, s_mem_offset)
+#if NO_SQC_STORE
+	// Copy into VGPR for later TCP store.
+	for var sgpr_idx = 0; sgpr_idx < 16; sgpr_idx ++
+		v_writelane_b32	v2, s[sgpr_idx], ttmp13
+		s_add_u32	ttmp13, ttmp13, 0x1
+	end
+#else
+	s_buffer_store_dwordx4	s[0], s_rsrc, 0 glc:1
+	s_buffer_store_dwordx4	s[4], s_rsrc, 16 glc:1
+	s_buffer_store_dwordx4	s[8], s_rsrc, 32 glc:1
+	s_buffer_store_dwordx4	s[12], s_rsrc, 48 glc:1
+	s_add_u32	s_rsrc[0], s_rsrc[0], 4*16
+	s_addc_u32	s_rsrc[1], s_rsrc[1], 0x0
+#endif
+end
+
+function write_12sgpr_to_mem(s, s_rsrc, s_mem_offset)
+#if NO_SQC_STORE
+	// Copy into VGPR for later TCP store.
+	for var sgpr_idx = 0; sgpr_idx < 12; sgpr_idx ++
+		v_writelane_b32	v2, s[sgpr_idx], ttmp13
+		s_add_u32	ttmp13, ttmp13, 0x1
+	end
+#else
+	s_buffer_store_dwordx4	s[0], s_rsrc, 0 glc:1
+	s_buffer_store_dwordx4	s[4], s_rsrc, 16 glc:1
+	s_buffer_store_dwordx4	s[8], s_rsrc, 32 glc:1
+	s_add_u32	s_rsrc[0], s_rsrc[0], 4*12
+	s_addc_u32	s_rsrc[1], s_rsrc[1], 0x0
+#endif
+end
+
+function read_hwreg_from_mem(s, s_rsrc, s_mem_offset)
+	s_buffer_load_dword	s, s_rsrc, s_mem_offset glc:1
+	s_add_u32	s_mem_offset, s_mem_offset, 4
+end
+
+function read_16sgpr_from_mem(s, s_rsrc, s_mem_offset)
+	s_sub_u32	s_mem_offset, s_mem_offset, 4*16
+	s_buffer_load_dwordx16	s, s_rsrc, s_mem_offset glc:1
+end
+
+function read_8sgpr_from_mem(s, s_rsrc, s_mem_offset)
+	s_sub_u32	s_mem_offset, s_mem_offset, 4*8
+	s_buffer_load_dwordx8	s, s_rsrc, s_mem_offset glc:1
+end
+
+function read_4sgpr_from_mem(s, s_rsrc, s_mem_offset)
+	s_sub_u32	s_mem_offset, s_mem_offset, 4*4
+	s_buffer_load_dwordx4	s, s_rsrc, s_mem_offset glc:1
+end
+
+
+function get_lds_size_bytes(s_lds_size_byte)
+	s_getreg_b32	s_lds_size_byte, hwreg(HW_REG_LDS_ALLOC, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE)
+	s_lshl_b32	s_lds_size_byte, s_lds_size_byte, 8			//LDS size in dwords = lds_size * 64 *4Bytes // granularity 64DW
+end
+
+function get_vgpr_size_bytes(s_vgpr_size_byte, s_size)
+	s_getreg_b32	s_vgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE)
+	s_add_u32	s_vgpr_size_byte, s_vgpr_size_byte, 1
+	s_bitcmp1_b32	s_size, S_WAVE_SIZE
+	s_cbranch_scc1	L_ENABLE_SHIFT_W64
+	s_lshl_b32	s_vgpr_size_byte, s_vgpr_size_byte, (2+7)		//Number of VGPRs = (vgpr_size + 1) * 4 * 32 * 4   (non-zero value)
+	s_branch	L_SHIFT_DONE
+L_ENABLE_SHIFT_W64:
+	s_lshl_b32	s_vgpr_size_byte, s_vgpr_size_byte, (2+8)		//Number of VGPRs = (vgpr_size + 1) * 4 * 64 * 4   (non-zero value)
+L_SHIFT_DONE:
+end
+
+function get_svgpr_size_bytes(s_svgpr_size_byte)
+	s_getreg_b32	s_svgpr_size_byte, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE)
+	s_lshl_b32	s_svgpr_size_byte, s_svgpr_size_byte, (3+7)
+end
+
+function get_sgpr_size_bytes
+	return 512
+end
+
+function get_hwreg_size_bytes
+	return 128
+end
+
+function get_wave_size(s_reg)
+	s_getreg_b32	s_reg, hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE)
+	s_lshl_b32	s_reg, s_reg, S_WAVE_SIZE
+end
+
+function save_and_clear_ib_sts(tmp1, tmp2)
+	// Preserve and clear scalar XNACK state before issuing scalar loads.
+	// Save IB_STS.REPLAY_W64H[25], RCNT[21:16], FIRST_REPLAY[15] into
+	// unused space ttmp11[31:24].
+	s_andn2_b32	ttmp11, ttmp11, (TTMP11_SAVE_REPLAY_W64H_MASK | TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK)
+	s_getreg_b32	tmp1, hwreg(HW_REG_IB_STS)
+	s_and_b32	tmp2, tmp1, SQ_WAVE_IB_STS_REPLAY_W64H_MASK
+	s_lshl_b32	tmp2, tmp2, (TTMP11_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT)
+	s_or_b32	ttmp11, ttmp11, tmp2
+	s_and_b32	tmp2, tmp1, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
+	s_lshl_b32	tmp2, tmp2, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
+	s_or_b32	ttmp11, ttmp11, tmp2
+	s_andn2_b32	tmp1, tmp1, (SQ_WAVE_IB_STS_REPLAY_W64H_MASK | SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK)
+	s_setreg_b32	hwreg(HW_REG_IB_STS), tmp1
+end
+
+function restore_ib_sts(tmp1, tmp2)
+	s_lshr_b32	tmp1, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
+	s_and_b32	tmp2, tmp1, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
+	s_lshr_b32	tmp1, ttmp11, (TTMP11_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT)
+	s_and_b32	tmp1, tmp1, SQ_WAVE_IB_STS_REPLAY_W64H_MASK
+	s_or_b32	tmp1, tmp1, tmp2
+	s_setreg_b32	hwreg(HW_REG_IB_STS), tmp1
+end
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm
new file mode 100644
index 000000000..ac8edef09
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm
@@ -0,0 +1,747 @@
+/*
+ * Copyright 2015-2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* To compile this assembly code:
+ * PROJECT=vi ./sp3 cwsr_trap_handler_gfx8.asm -hex tmp.hex
+ */
+
+/**************************************************************************/
+/*                      variables                                         */
+/**************************************************************************/
+var SQ_WAVE_STATUS_INST_ATC_SHIFT  = 23
+var SQ_WAVE_STATUS_INST_ATC_MASK   = 0x00800000
+var SQ_WAVE_STATUS_SPI_PRIO_SHIFT  = 1
+var SQ_WAVE_STATUS_SPI_PRIO_MASK   = 0x00000006
+var SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT   = 0
+var SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE    = 1
+var SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT  = 3
+var SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE   = 29
+
+var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT    = 12
+var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE     = 9
+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT   = 8
+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE    = 6
+var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT   = 24
+var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE    = 3                     //FIXME  sq.blk still has 4 bits at this time while SQ programming guide has 3 bits
+
+var SQ_WAVE_TRAPSTS_SAVECTX_MASK    =   0x400
+var SQ_WAVE_TRAPSTS_EXCE_MASK       =   0x1FF                   // Exception mask
+var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT   =   10
+var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK   =   0x100
+var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT  =   8
+var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK    =   0x3FF
+var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT   =   0x0
+var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE    =   10
+var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK   =   0xFFFFF800
+var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT  =   11
+var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE   =   21
+
+var SQ_WAVE_IB_STS_RCNT_SHIFT           =   16                  //FIXME
+var SQ_WAVE_IB_STS_RCNT_SIZE            =   4                   //FIXME
+var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT   =   15                  //FIXME
+var SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE    =   1                   //FIXME
+var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG   = 0x00007FFF    //FIXME
+
+var SQ_BUF_RSRC_WORD1_ATC_SHIFT     =   24
+var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT   =   27
+
+
+/*      Save        */
+var S_SAVE_BUF_RSRC_WORD1_STRIDE        =   0x00040000          //stride is 4 bytes
+var S_SAVE_BUF_RSRC_WORD3_MISC          =   0x00807FAC          //SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14] when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE
+
+var S_SAVE_SPI_INIT_ATC_MASK            =   0x08000000          //bit[27]: ATC bit
+var S_SAVE_SPI_INIT_ATC_SHIFT           =   27
+var S_SAVE_SPI_INIT_MTYPE_MASK          =   0x70000000          //bit[30:28]: Mtype
+var S_SAVE_SPI_INIT_MTYPE_SHIFT         =   28
+var S_SAVE_SPI_INIT_FIRST_WAVE_MASK     =   0x04000000          //bit[26]: FirstWaveInTG
+var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT    =   26
+
+var S_SAVE_PC_HI_RCNT_SHIFT             =   28                  //FIXME  check with Brian to ensure all fields other than PC[47:0] can be used
+var S_SAVE_PC_HI_RCNT_MASK              =   0xF0000000          //FIXME
+var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT     =   27                  //FIXME
+var S_SAVE_PC_HI_FIRST_REPLAY_MASK      =   0x08000000          //FIXME
+
+var s_save_spi_init_lo              =   exec_lo
+var s_save_spi_init_hi              =   exec_hi
+
+                                                //tba_lo and tba_hi need to be saved/restored
+var s_save_pc_lo            =   ttmp0           //{TTMP1, TTMP0} = {3'h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]}
+var s_save_pc_hi            =   ttmp1
+var s_save_exec_lo          =   ttmp2
+var s_save_exec_hi          =   ttmp3
+var s_save_status           =   ttmp4
+var s_save_trapsts          =   ttmp5           //not really used until the end of the SAVE routine
+var s_save_xnack_mask_lo    =   ttmp6
+var s_save_xnack_mask_hi    =   ttmp7
+var s_save_buf_rsrc0        =   ttmp8
+var s_save_buf_rsrc1        =   ttmp9
+var s_save_buf_rsrc2        =   ttmp10
+var s_save_buf_rsrc3        =   ttmp11
+
+var s_save_mem_offset       =   tma_lo
+var s_save_alloc_size       =   s_save_trapsts          //conflict
+var s_save_tmp              =   s_save_buf_rsrc2        //shared with s_save_buf_rsrc2  (conflict: should not use mem access with s_save_tmp at the same time)
+var s_save_m0               =   tma_hi
+
+/*      Restore     */
+var S_RESTORE_BUF_RSRC_WORD1_STRIDE         =   S_SAVE_BUF_RSRC_WORD1_STRIDE
+var S_RESTORE_BUF_RSRC_WORD3_MISC           =   S_SAVE_BUF_RSRC_WORD3_MISC
+
+var S_RESTORE_SPI_INIT_ATC_MASK             =   0x08000000          //bit[27]: ATC bit
+var S_RESTORE_SPI_INIT_ATC_SHIFT            =   27
+var S_RESTORE_SPI_INIT_MTYPE_MASK           =   0x70000000          //bit[30:28]: Mtype
+var S_RESTORE_SPI_INIT_MTYPE_SHIFT          =   28
+var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK      =   0x04000000          //bit[26]: FirstWaveInTG
+var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT     =   26
+
+var S_RESTORE_PC_HI_RCNT_SHIFT              =   S_SAVE_PC_HI_RCNT_SHIFT
+var S_RESTORE_PC_HI_RCNT_MASK               =   S_SAVE_PC_HI_RCNT_MASK
+var S_RESTORE_PC_HI_FIRST_REPLAY_SHIFT      =   S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
+var S_RESTORE_PC_HI_FIRST_REPLAY_MASK       =   S_SAVE_PC_HI_FIRST_REPLAY_MASK
+
+var s_restore_spi_init_lo                   =   exec_lo
+var s_restore_spi_init_hi                   =   exec_hi
+
+var s_restore_mem_offset        =   ttmp2
+var s_restore_alloc_size        =   ttmp3
+var s_restore_tmp               =   ttmp6               //tba_lo/hi need to be restored
+var s_restore_mem_offset_save   =   s_restore_tmp       //no conflict
+
+var s_restore_m0            =   s_restore_alloc_size    //no conflict
+
+var s_restore_mode          =   ttmp7
+
+var s_restore_pc_lo         =   ttmp0
+var s_restore_pc_hi         =   ttmp1
+var s_restore_exec_lo       =   tma_lo                  //no conflict
+var s_restore_exec_hi       =   tma_hi                  //no conflict
+var s_restore_status        =   ttmp4
+var s_restore_trapsts       =   ttmp5
+var s_restore_xnack_mask_lo =   xnack_mask_lo
+var s_restore_xnack_mask_hi =   xnack_mask_hi
+var s_restore_buf_rsrc0     =   ttmp8
+var s_restore_buf_rsrc1     =   ttmp9
+var s_restore_buf_rsrc2     =   ttmp10
+var s_restore_buf_rsrc3     =   ttmp11
+
+/**************************************************************************/
+/*                      trap handler entry points                         */
+/**************************************************************************/
+/* Shader Main*/
+
+shader main
+  asic(VI)
+  type(CS)
+
+
+        s_branch L_SKIP_RESTORE                                     //NOT restore. might be a regular trap or save
+
+L_JUMP_TO_RESTORE:
+    s_branch L_RESTORE                                              //restore
+
+L_SKIP_RESTORE:
+
+    s_getreg_b32    s_save_status, hwreg(HW_REG_STATUS)                             //save STATUS since we will change SCC
+    s_andn2_b32     s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK      //check whether this is for save
+    s_getreg_b32    s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+    s_and_b32       s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK    //check whether this is for save
+    s_cbranch_scc1  L_SAVE                                      //this is the operation for save
+
+    // *********    Handle non-CWSR traps       *******************
+
+    /* read tba and tma for next level trap handler, ttmp4 is used as s_save_status */
+    s_load_dwordx4  [ttmp8,ttmp9,ttmp10, ttmp11], [tma_lo,tma_hi], 0
+    s_waitcnt lgkmcnt(0)
+    s_or_b32        ttmp7, ttmp8, ttmp9
+    s_cbranch_scc0  L_NO_NEXT_TRAP //next level trap handler not been set
+    set_status_without_spi_prio(s_save_status, ttmp2) //restore HW status(SCC)
+    s_setpc_b64     [ttmp8,ttmp9] //jump to next level trap handler
+
+L_NO_NEXT_TRAP:
+    s_getreg_b32    s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+    s_and_b32       s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCE_MASK // Check whether it is an exception
+    s_cbranch_scc1  L_EXCP_CASE   // Exception, jump back to the shader program directly.
+    s_add_u32       ttmp0, ttmp0, 4   // S_TRAP case, add 4 to ttmp0
+    s_addc_u32  ttmp1, ttmp1, 0
+L_EXCP_CASE:
+    s_and_b32   ttmp1, ttmp1, 0xFFFF
+    set_status_without_spi_prio(s_save_status, ttmp2) //restore HW status(SCC)
+    s_rfe_b64       [ttmp0, ttmp1]
+
+    // *********        End handling of non-CWSR traps   *******************
+
+/**************************************************************************/
+/*                      save routine                                      */
+/**************************************************************************/
+
+L_SAVE:
+    s_mov_b32       s_save_tmp, 0                                                           //clear saveCtx bit
+    s_setreg_b32    hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp     //clear saveCtx bit
+
+    s_mov_b32       s_save_xnack_mask_lo,   xnack_mask_lo                                   //save XNACK_MASK
+    s_mov_b32       s_save_xnack_mask_hi,   xnack_mask_hi    //save XNACK must before any memory operation
+    s_getreg_b32    s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_RCNT_SHIFT, SQ_WAVE_IB_STS_RCNT_SIZE)                   //save RCNT
+    s_lshl_b32      s_save_tmp, s_save_tmp, S_SAVE_PC_HI_RCNT_SHIFT
+    s_or_b32        s_save_pc_hi, s_save_pc_hi, s_save_tmp
+    s_getreg_b32    s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT, SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE)   //save FIRST_REPLAY
+    s_lshl_b32      s_save_tmp, s_save_tmp, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
+    s_or_b32        s_save_pc_hi, s_save_pc_hi, s_save_tmp
+    s_getreg_b32    s_save_tmp, hwreg(HW_REG_IB_STS)                                        //clear RCNT and FIRST_REPLAY in IB_STS
+    s_and_b32       s_save_tmp, s_save_tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG
+
+    s_setreg_b32    hwreg(HW_REG_IB_STS), s_save_tmp
+
+    /*      inform SPI the readiness and wait for SPI's go signal */
+    s_mov_b32       s_save_exec_lo, exec_lo                                                 //save EXEC and use EXEC for the go signal from SPI
+    s_mov_b32       s_save_exec_hi, exec_hi
+    s_mov_b64       exec,   0x0                                                             //clear EXEC to get ready to receive
+
+        s_sendmsg   sendmsg(MSG_SAVEWAVE)  //send SPI a message and wait for SPI's write to EXEC
+
+    // Set SPI_PRIO=2 to avoid starving instruction fetch in the waves we're waiting for.
+    s_or_b32 s_save_tmp, s_save_status, (2 << SQ_WAVE_STATUS_SPI_PRIO_SHIFT)
+    s_setreg_b32 hwreg(HW_REG_STATUS), s_save_tmp
+
+  L_SLEEP:
+    s_sleep 0x2                // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to exec inst, while other waves are stuck into the sleep-loop and waiting for wrexec!=0
+
+        s_cbranch_execz L_SLEEP
+
+    /*      setup Resource Contants    */
+    s_mov_b32       s_save_buf_rsrc0,   s_save_spi_init_lo                                                      //base_addr_lo
+    s_and_b32       s_save_buf_rsrc1,   s_save_spi_init_hi, 0x0000FFFF                                          //base_addr_hi
+    s_or_b32        s_save_buf_rsrc1,   s_save_buf_rsrc1,  S_SAVE_BUF_RSRC_WORD1_STRIDE
+    s_mov_b32       s_save_buf_rsrc2,   0                                                                       //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited
+    s_mov_b32       s_save_buf_rsrc3,   S_SAVE_BUF_RSRC_WORD3_MISC
+    s_and_b32       s_save_tmp,         s_save_spi_init_hi, S_SAVE_SPI_INIT_ATC_MASK
+    s_lshr_b32      s_save_tmp,         s_save_tmp, (S_SAVE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT)         //get ATC bit into position
+    s_or_b32        s_save_buf_rsrc3,   s_save_buf_rsrc3,  s_save_tmp                                           //or ATC
+    s_and_b32       s_save_tmp,         s_save_spi_init_hi, S_SAVE_SPI_INIT_MTYPE_MASK
+    s_lshr_b32      s_save_tmp,         s_save_tmp, (S_SAVE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT)     //get MTYPE bits into position
+    s_or_b32        s_save_buf_rsrc3,   s_save_buf_rsrc3,  s_save_tmp                                           //or MTYPE
+
+    //FIXME  right now s_save_m0/s_save_mem_offset use tma_lo/tma_hi  (might need to save them before using them?)
+    s_mov_b32       s_save_m0,          m0                                                                  //save M0
+
+    /*      global mem offset           */
+    s_mov_b32       s_save_mem_offset,  0x0                                                                     //mem offset initial value = 0
+
+
+
+
+    /*      save HW registers   */
+    //////////////////////////////
+
+  L_SAVE_HWREG:
+        // HWREG SR memory offset : size(VGPR)+size(SGPR)
+       get_vgpr_size_bytes(s_save_mem_offset)
+       get_sgpr_size_bytes(s_save_tmp)
+       s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp
+
+
+    s_mov_b32       s_save_buf_rsrc2, 0x4                               //NUM_RECORDS   in bytes
+        s_mov_b32       s_save_buf_rsrc2,  0x1000000                                //NUM_RECORDS in bytes
+
+
+    write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)                  //M0
+    write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset)                   //PC
+    write_hwreg_to_mem(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset)
+    write_hwreg_to_mem(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset)             //EXEC
+    write_hwreg_to_mem(s_save_exec_hi, s_save_buf_rsrc0, s_save_mem_offset)
+    write_hwreg_to_mem(s_save_status, s_save_buf_rsrc0, s_save_mem_offset)              //STATUS
+
+    //s_save_trapsts conflicts with s_save_alloc_size
+    s_getreg_b32    s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+    write_hwreg_to_mem(s_save_trapsts, s_save_buf_rsrc0, s_save_mem_offset)             //TRAPSTS
+
+    write_hwreg_to_mem(s_save_xnack_mask_lo, s_save_buf_rsrc0, s_save_mem_offset)           //XNACK_MASK_LO
+    write_hwreg_to_mem(s_save_xnack_mask_hi, s_save_buf_rsrc0, s_save_mem_offset)           //XNACK_MASK_HI
+
+    //use s_save_tmp would introduce conflict here between s_save_tmp and s_save_buf_rsrc2
+    s_getreg_b32    s_save_m0, hwreg(HW_REG_MODE)                                                   //MODE
+    write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
+    write_hwreg_to_mem(tba_lo, s_save_buf_rsrc0, s_save_mem_offset)                     //TBA_LO
+    write_hwreg_to_mem(tba_hi, s_save_buf_rsrc0, s_save_mem_offset)                     //TBA_HI
+
+
+
+    /*      the first wave in the threadgroup    */
+        // save fist_wave bits in tba_hi unused bit.26
+    s_and_b32       s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK     // extract fisrt wave bit
+    //s_or_b32        tba_hi, s_save_tmp, tba_hi                                        // save first wave bit to tba_hi.bits[26]
+    s_mov_b32        s_save_exec_hi, 0x0
+    s_or_b32         s_save_exec_hi, s_save_tmp, s_save_exec_hi                          // save first wave bit to s_save_exec_hi.bits[26]
+
+
+    /*          save SGPRs      */
+        // Save SGPR before LDS save, then the s0 to s4 can be used during LDS save...
+    //////////////////////////////
+
+    // SGPR SR memory offset : size(VGPR)
+    get_vgpr_size_bytes(s_save_mem_offset)
+    // TODO, change RSRC word to rearrange memory layout for SGPRS
+
+    s_getreg_b32    s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE)               //spgr_size
+    s_add_u32       s_save_alloc_size, s_save_alloc_size, 1
+    s_lshl_b32      s_save_alloc_size, s_save_alloc_size, 4                         //Number of SGPRs = (sgpr_size + 1) * 16   (non-zero value)
+
+        s_lshl_b32      s_save_buf_rsrc2,   s_save_alloc_size, 2                    //NUM_RECORDS in bytes
+        s_mov_b32       s_save_buf_rsrc2,  0x1000000                                //NUM_RECORDS in bytes
+
+    // backup s_save_buf_rsrc0,1 to s_save_pc_lo/hi, since write_16sgpr_to_mem function will change the rsrc0
+    //s_mov_b64 s_save_pc_lo, s_save_buf_rsrc0
+    s_mov_b64 s_save_xnack_mask_lo, s_save_buf_rsrc0
+    s_add_u32 s_save_buf_rsrc0, s_save_buf_rsrc0, s_save_mem_offset
+    s_addc_u32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0
+
+    s_mov_b32       m0, 0x0                         //SGPR initial index value =0
+  L_SAVE_SGPR_LOOP:
+    // SGPR is allocated in 16 SGPR granularity
+    s_movrels_b64   s0, s0     //s0 = s[0+m0], s1 = s[1+m0]
+    s_movrels_b64   s2, s2     //s2 = s[2+m0], s3 = s[3+m0]
+    s_movrels_b64   s4, s4     //s4 = s[4+m0], s5 = s[5+m0]
+    s_movrels_b64   s6, s6     //s6 = s[6+m0], s7 = s[7+m0]
+    s_movrels_b64   s8, s8     //s8 = s[8+m0], s9 = s[9+m0]
+    s_movrels_b64   s10, s10   //s10 = s[10+m0], s11 = s[11+m0]
+    s_movrels_b64   s12, s12   //s12 = s[12+m0], s13 = s[13+m0]
+    s_movrels_b64   s14, s14   //s14 = s[14+m0], s15 = s[15+m0]
+
+    write_16sgpr_to_mem(s0, s_save_buf_rsrc0, s_save_mem_offset) //PV: the best performance should be using s_buffer_store_dwordx4
+    s_add_u32       m0, m0, 16                                                      //next sgpr index
+    s_cmp_lt_u32    m0, s_save_alloc_size                                           //scc = (m0 < s_save_alloc_size) ? 1 : 0
+    s_cbranch_scc1  L_SAVE_SGPR_LOOP                                    //SGPR save is complete?
+    // restore s_save_buf_rsrc0,1
+    //s_mov_b64 s_save_buf_rsrc0, s_save_pc_lo
+    s_mov_b64 s_save_buf_rsrc0, s_save_xnack_mask_lo
+
+
+
+
+    /*          save first 4 VGPR, then LDS save could use   */
+        // each wave will alloc 4 vgprs at least...
+    /////////////////////////////////////////////////////////////////////////////////////
+
+    s_mov_b32       s_save_mem_offset, 0
+    s_mov_b32       exec_lo, 0xFFFFFFFF                                             //need every thread from now on
+    s_mov_b32       exec_hi, 0xFFFFFFFF
+
+        s_mov_b32       s_save_buf_rsrc2,  0x1000000                                //NUM_RECORDS in bytes
+
+    // VGPR Allocated in 4-GPR granularity
+
+        buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+        buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1  offset:256
+        buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1  offset:256*2
+        buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1  offset:256*3
+
+
+
+    /*          save LDS        */
+    //////////////////////////////
+
+  L_SAVE_LDS:
+
+        // Change EXEC to all threads...
+    s_mov_b32       exec_lo, 0xFFFFFFFF   //need every thread from now on
+    s_mov_b32       exec_hi, 0xFFFFFFFF
+
+    s_getreg_b32    s_save_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE)             //lds_size
+    s_and_b32       s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF                //lds_size is zero?
+    s_cbranch_scc0  L_SAVE_LDS_DONE                                                                            //no lds used? jump to L_SAVE_DONE
+
+    s_barrier               //LDS is used? wait for other waves in the same TG
+    //s_and_b32     s_save_tmp, tba_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK                //exec is still used here
+    s_and_b32       s_save_tmp, s_save_exec_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK                //exec is still used here
+    s_cbranch_scc0  L_SAVE_LDS_DONE
+
+        // first wave do LDS save;
+
+    s_lshl_b32      s_save_alloc_size, s_save_alloc_size, 6                         //LDS size in dwords = lds_size * 64dw
+    s_lshl_b32      s_save_alloc_size, s_save_alloc_size, 2                         //LDS size in bytes
+    s_mov_b32       s_save_buf_rsrc2,  s_save_alloc_size                            //NUM_RECORDS in bytes
+
+    // LDS at offset: size(VGPR)+SIZE(SGPR)+SIZE(HWREG)
+    //
+    get_vgpr_size_bytes(s_save_mem_offset)
+    get_sgpr_size_bytes(s_save_tmp)
+    s_add_u32  s_save_mem_offset, s_save_mem_offset, s_save_tmp
+    s_add_u32 s_save_mem_offset, s_save_mem_offset, get_hwreg_size_bytes()
+
+
+        s_mov_b32       s_save_buf_rsrc2,  0x1000000                  //NUM_RECORDS in bytes
+    s_mov_b32       m0, 0x0                                               //lds_offset initial value = 0
+
+
+      v_mbcnt_lo_u32_b32 v2, 0xffffffff, 0x0
+      v_mbcnt_hi_u32_b32 v3, 0xffffffff, v2     // tid
+      v_mul_i32_i24 v2, v3, 8   // tid*8
+      v_mov_b32 v3, 256*2
+      s_mov_b32 m0, 0x10000
+      s_mov_b32 s0, s_save_buf_rsrc3
+      s_and_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, 0xFF7FFFFF    // disable add_tid
+      s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, 0x58000   //DFMT
+
+L_SAVE_LDS_LOOP_VECTOR:
+      ds_read_b64 v[0:1], v2    //x =LDS[a], byte address
+      s_waitcnt lgkmcnt(0)
+      buffer_store_dwordx2  v[0:1], v2, s_save_buf_rsrc0, s_save_mem_offset offen:1  glc:1  slc:1
+//      s_waitcnt vmcnt(0)
+      v_add_u32 v2, vcc[0:1], v2, v3
+      v_cmp_lt_u32 vcc[0:1], v2, s_save_alloc_size
+      s_cbranch_vccnz L_SAVE_LDS_LOOP_VECTOR
+
+      // restore rsrc3
+      s_mov_b32 s_save_buf_rsrc3, s0
+
+L_SAVE_LDS_DONE:
+
+
+    /*          save VGPRs  - set the Rest VGPRs        */
+    //////////////////////////////////////////////////////////////////////////////////////
+  L_SAVE_VGPR:
+    // VGPR SR memory offset: 0
+    // TODO rearrange the RSRC words to use swizzle for VGPR save...
+
+    s_mov_b32       s_save_mem_offset, (0+256*4)                                    // for the rest VGPRs
+    s_mov_b32       exec_lo, 0xFFFFFFFF                                             //need every thread from now on
+    s_mov_b32       exec_hi, 0xFFFFFFFF
+
+    s_getreg_b32    s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE)                   //vpgr_size
+    s_add_u32       s_save_alloc_size, s_save_alloc_size, 1
+    s_lshl_b32      s_save_alloc_size, s_save_alloc_size, 2                         //Number of VGPRs = (vgpr_size + 1) * 4    (non-zero value)   //FIXME for GFX, zero is possible
+    s_lshl_b32      s_save_buf_rsrc2,  s_save_alloc_size, 8                         //NUM_RECORDS in bytes (64 threads*4)
+        s_mov_b32       s_save_buf_rsrc2,  0x1000000                                //NUM_RECORDS in bytes
+
+    // VGPR store using dw burst
+    s_mov_b32         m0, 0x4   //VGPR initial index value =0
+    s_cmp_lt_u32      m0, s_save_alloc_size
+    s_cbranch_scc0    L_SAVE_VGPR_END
+
+
+    s_set_gpr_idx_on    m0, 0x1 //M0[7:0] = M0[7:0] and M0[15:12] = 0x1
+    s_add_u32       s_save_alloc_size, s_save_alloc_size, 0x1000                    //add 0x1000 since we compare m0 against it later
+
+  L_SAVE_VGPR_LOOP:
+    v_mov_b32       v0, v0              //v0 = v[0+m0]
+    v_mov_b32       v1, v1              //v0 = v[0+m0]
+    v_mov_b32       v2, v2              //v0 = v[0+m0]
+    v_mov_b32       v3, v3              //v0 = v[0+m0]
+
+        buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+        buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1  offset:256
+        buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1  offset:256*2
+        buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1  offset:256*3
+
+    s_add_u32       m0, m0, 4                                                       //next vgpr index
+    s_add_u32       s_save_mem_offset, s_save_mem_offset, 256*4                     //every buffer_store_dword does 256 bytes
+    s_cmp_lt_u32    m0, s_save_alloc_size                                           //scc = (m0 < s_save_alloc_size) ? 1 : 0
+    s_cbranch_scc1  L_SAVE_VGPR_LOOP                                                //VGPR save is complete?
+    s_set_gpr_idx_off
+
+L_SAVE_VGPR_END:
+    s_branch    L_END_PGM
+
+
+
+/**************************************************************************/
+/*                      restore routine                                   */
+/**************************************************************************/
+
+L_RESTORE:
+    /*      Setup Resource Contants    */
+    s_mov_b32       s_restore_buf_rsrc0,    s_restore_spi_init_lo                                                           //base_addr_lo
+    s_and_b32       s_restore_buf_rsrc1,    s_restore_spi_init_hi, 0x0000FFFF                                               //base_addr_hi
+    s_or_b32        s_restore_buf_rsrc1,    s_restore_buf_rsrc1,  S_RESTORE_BUF_RSRC_WORD1_STRIDE
+    s_mov_b32       s_restore_buf_rsrc2,    0                                                                               //NUM_RECORDS initial value = 0 (in bytes)
+    s_mov_b32       s_restore_buf_rsrc3,    S_RESTORE_BUF_RSRC_WORD3_MISC
+    s_and_b32       s_restore_tmp,          s_restore_spi_init_hi, S_RESTORE_SPI_INIT_ATC_MASK
+    s_lshr_b32      s_restore_tmp,          s_restore_tmp, (S_RESTORE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT)       //get ATC bit into position
+    s_or_b32        s_restore_buf_rsrc3,    s_restore_buf_rsrc3,  s_restore_tmp                                             //or ATC
+    s_and_b32       s_restore_tmp,          s_restore_spi_init_hi, S_RESTORE_SPI_INIT_MTYPE_MASK
+    s_lshr_b32      s_restore_tmp,          s_restore_tmp, (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT)   //get MTYPE bits into position
+    s_or_b32        s_restore_buf_rsrc3,    s_restore_buf_rsrc3,  s_restore_tmp                                             //or MTYPE
+
+    /*      global mem offset           */
+//  s_mov_b32       s_restore_mem_offset, 0x0                               //mem offset initial value = 0
+
+    /*      the first wave in the threadgroup    */
+    s_and_b32       s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK
+    s_cbranch_scc0  L_RESTORE_VGPR
+
+    /*          restore LDS     */
+    //////////////////////////////
+  L_RESTORE_LDS:
+
+    s_mov_b32       exec_lo, 0xFFFFFFFF                                                     //need every thread from now on   //be consistent with SAVE although can be moved ahead
+    s_mov_b32       exec_hi, 0xFFFFFFFF
+
+    s_getreg_b32    s_restore_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE)              //lds_size
+    s_and_b32       s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF                  //lds_size is zero?
+    s_cbranch_scc0  L_RESTORE_VGPR                                                          //no lds used? jump to L_RESTORE_VGPR
+    s_lshl_b32      s_restore_alloc_size, s_restore_alloc_size, 6                           //LDS size in dwords = lds_size * 64dw
+    s_lshl_b32      s_restore_alloc_size, s_restore_alloc_size, 2                           //LDS size in bytes
+    s_mov_b32       s_restore_buf_rsrc2,    s_restore_alloc_size                            //NUM_RECORDS in bytes
+
+    // LDS at offset: size(VGPR)+SIZE(SGPR)+SIZE(HWREG)
+    //
+    get_vgpr_size_bytes(s_restore_mem_offset)
+    get_sgpr_size_bytes(s_restore_tmp)
+    s_add_u32  s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+    s_add_u32  s_restore_mem_offset, s_restore_mem_offset, get_hwreg_size_bytes()            //FIXME, Check if offset overflow???
+
+
+        s_mov_b32       s_restore_buf_rsrc2,  0x1000000                                     //NUM_RECORDS in bytes
+    s_mov_b32       m0, 0x0                                                                 //lds_offset initial value = 0
+
+  L_RESTORE_LDS_LOOP:
+        buffer_load_dword   v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1                    // first 64DW
+        buffer_load_dword   v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:256         // second 64DW
+    s_add_u32       m0, m0, 256*2                                               // 128 DW
+    s_add_u32       s_restore_mem_offset, s_restore_mem_offset, 256*2           //mem offset increased by 128DW
+    s_cmp_lt_u32    m0, s_restore_alloc_size                                    //scc=(m0 < s_restore_alloc_size) ? 1 : 0
+    s_cbranch_scc1  L_RESTORE_LDS_LOOP                                                      //LDS restore is complete?
+
+
+    /*          restore VGPRs       */
+    //////////////////////////////
+  L_RESTORE_VGPR:
+        // VGPR SR memory offset : 0
+    s_mov_b32       s_restore_mem_offset, 0x0
+    s_mov_b32       exec_lo, 0xFFFFFFFF                                                     //need every thread from now on   //be consistent with SAVE although can be moved ahead
+    s_mov_b32       exec_hi, 0xFFFFFFFF
+
+    s_getreg_b32    s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE)    //vpgr_size
+    s_add_u32       s_restore_alloc_size, s_restore_alloc_size, 1
+    s_lshl_b32      s_restore_alloc_size, s_restore_alloc_size, 2                           //Number of VGPRs = (vgpr_size + 1) * 4    (non-zero value)
+    s_lshl_b32      s_restore_buf_rsrc2,  s_restore_alloc_size, 8                           //NUM_RECORDS in bytes (64 threads*4)
+        s_mov_b32       s_restore_buf_rsrc2,  0x1000000                                     //NUM_RECORDS in bytes
+
+    // VGPR load using dw burst
+    s_mov_b32       s_restore_mem_offset_save, s_restore_mem_offset     // restore start with v1, v0 will be the last
+    s_add_u32       s_restore_mem_offset, s_restore_mem_offset, 256*4
+    s_mov_b32       m0, 4                               //VGPR initial index value = 1
+    s_set_gpr_idx_on  m0, 0x8                       //M0[7:0] = M0[7:0] and M0[15:12] = 0x8
+    s_add_u32       s_restore_alloc_size, s_restore_alloc_size, 0x8000                      //add 0x8000 since we compare m0 against it later
+
+  L_RESTORE_VGPR_LOOP:
+        buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1
+        buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256
+        buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*2
+        buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*3
+    s_waitcnt       vmcnt(0)                                                                //ensure data ready
+    v_mov_b32       v0, v0                                                                  //v[0+m0] = v0
+    v_mov_b32       v1, v1
+    v_mov_b32       v2, v2
+    v_mov_b32       v3, v3
+    s_add_u32       m0, m0, 4                                                               //next vgpr index
+    s_add_u32       s_restore_mem_offset, s_restore_mem_offset, 256*4                           //every buffer_load_dword does 256 bytes
+    s_cmp_lt_u32    m0, s_restore_alloc_size                                                //scc = (m0 < s_restore_alloc_size) ? 1 : 0
+    s_cbranch_scc1  L_RESTORE_VGPR_LOOP                                                     //VGPR restore (except v0) is complete?
+    s_set_gpr_idx_off
+                                                                                            /* VGPR restore on v0 */
+        buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save    slc:1 glc:1
+        buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save    slc:1 glc:1 offset:256
+        buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save    slc:1 glc:1 offset:256*2
+        buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save    slc:1 glc:1 offset:256*3
+        s_waitcnt vmcnt(0)
+
+    /*          restore SGPRs       */
+    //////////////////////////////
+
+    // SGPR SR memory offset : size(VGPR)
+    get_vgpr_size_bytes(s_restore_mem_offset)
+    get_sgpr_size_bytes(s_restore_tmp)
+    s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+    s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 16*4     // restore SGPR from S[n] to S[0], by 16 sgprs group
+    // TODO, change RSRC word to rearrange memory layout for SGPRS
+
+    s_getreg_b32    s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE)                //spgr_size
+    s_add_u32       s_restore_alloc_size, s_restore_alloc_size, 1
+    s_lshl_b32      s_restore_alloc_size, s_restore_alloc_size, 4                           //Number of SGPRs = (sgpr_size + 1) * 16   (non-zero value)
+
+        s_lshl_b32      s_restore_buf_rsrc2,    s_restore_alloc_size, 2                     //NUM_RECORDS in bytes
+        s_mov_b32       s_restore_buf_rsrc2,  0x1000000                                     //NUM_RECORDS in bytes
+
+    /* If 112 SGPRs ar allocated, 4 sgprs are not used TBA(108,109),TMA(110,111),
+       However, we are safe to restore these 4 SGPRs anyway, since TBA,TMA will later be restored by HWREG
+    */
+    s_mov_b32 m0, s_restore_alloc_size
+
+ L_RESTORE_SGPR_LOOP:
+    read_16sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset)  //PV: further performance improvement can be made
+    s_waitcnt       lgkmcnt(0)                                                              //ensure data ready
+
+    s_sub_u32 m0, m0, 16    // Restore from S[n] to S[0]
+
+    s_movreld_b64   s0, s0      //s[0+m0] = s0
+    s_movreld_b64   s2, s2
+    s_movreld_b64   s4, s4
+    s_movreld_b64   s6, s6
+    s_movreld_b64   s8, s8
+    s_movreld_b64   s10, s10
+    s_movreld_b64   s12, s12
+    s_movreld_b64   s14, s14
+
+    s_cmp_eq_u32    m0, 0               //scc = (m0 < s_restore_alloc_size) ? 1 : 0
+    s_cbranch_scc0  L_RESTORE_SGPR_LOOP             //SGPR restore (except s0) is complete?
+
+    /*      restore HW registers    */
+    //////////////////////////////
+  L_RESTORE_HWREG:
+
+    // HWREG SR memory offset : size(VGPR)+size(SGPR)
+    get_vgpr_size_bytes(s_restore_mem_offset)
+    get_sgpr_size_bytes(s_restore_tmp)
+    s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+
+
+    s_mov_b32       s_restore_buf_rsrc2, 0x4                                                //NUM_RECORDS   in bytes
+        s_mov_b32       s_restore_buf_rsrc2,  0x1000000                                     //NUM_RECORDS in bytes
+
+    read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset)                    //M0
+    read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset)             //PC
+    read_hwreg_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset)
+    read_hwreg_from_mem(s_restore_exec_lo, s_restore_buf_rsrc0, s_restore_mem_offset)               //EXEC
+    read_hwreg_from_mem(s_restore_exec_hi, s_restore_buf_rsrc0, s_restore_mem_offset)
+    read_hwreg_from_mem(s_restore_status, s_restore_buf_rsrc0, s_restore_mem_offset)                //STATUS
+    read_hwreg_from_mem(s_restore_trapsts, s_restore_buf_rsrc0, s_restore_mem_offset)               //TRAPSTS
+    read_hwreg_from_mem(xnack_mask_lo, s_restore_buf_rsrc0, s_restore_mem_offset)                   //XNACK_MASK_LO
+    read_hwreg_from_mem(xnack_mask_hi, s_restore_buf_rsrc0, s_restore_mem_offset)                   //XNACK_MASK_HI
+    read_hwreg_from_mem(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset)              //MODE
+    read_hwreg_from_mem(tba_lo, s_restore_buf_rsrc0, s_restore_mem_offset)                      //TBA_LO
+    read_hwreg_from_mem(tba_hi, s_restore_buf_rsrc0, s_restore_mem_offset)                      //TBA_HI
+
+    s_waitcnt       lgkmcnt(0)                                                                                      //from now on, it is safe to restore STATUS and IB_STS
+
+    s_mov_b32       m0,         s_restore_m0
+    s_mov_b32       exec_lo,    s_restore_exec_lo
+    s_mov_b32       exec_hi,    s_restore_exec_hi
+
+    s_and_b32       s_restore_m0, SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK, s_restore_trapsts
+    s_setreg_b32    hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE), s_restore_m0
+    s_and_b32       s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK, s_restore_trapsts
+    s_lshr_b32      s_restore_m0, s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT
+    s_setreg_b32    hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE), s_restore_m0
+    //s_setreg_b32  hwreg(HW_REG_TRAPSTS),  s_restore_trapsts      //don't overwrite SAVECTX bit as it may be set through external SAVECTX during restore
+    s_setreg_b32    hwreg(HW_REG_MODE),     s_restore_mode
+    //reuse s_restore_m0 as a temp register
+    s_and_b32       s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_RCNT_MASK
+    s_lshr_b32      s_restore_m0, s_restore_m0, S_SAVE_PC_HI_RCNT_SHIFT
+    s_lshl_b32      s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_RCNT_SHIFT
+    s_mov_b32       s_restore_tmp, 0x0                                                                              //IB_STS is zero
+    s_or_b32        s_restore_tmp, s_restore_tmp, s_restore_m0
+    s_and_b32       s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_FIRST_REPLAY_MASK
+    s_lshr_b32      s_restore_m0, s_restore_m0, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
+    s_lshl_b32      s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT
+    s_or_b32        s_restore_tmp, s_restore_tmp, s_restore_m0
+    s_and_b32       s_restore_m0, s_restore_status, SQ_WAVE_STATUS_INST_ATC_MASK
+    s_lshr_b32      s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT
+    s_setreg_b32    hwreg(HW_REG_IB_STS),   s_restore_tmp
+
+    s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff      //pc[47:32]        //Do it here in order not to affect STATUS
+    s_and_b64    exec, exec, exec  // Restore STATUS.EXECZ, not writable by s_setreg_b32
+    s_and_b64    vcc, vcc, vcc  // Restore STATUS.VCCZ, not writable by s_setreg_b32
+    set_status_without_spi_prio(s_restore_status, s_restore_tmp) // SCC is included, which is changed by previous salu
+
+    s_barrier                                                   //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time
+
+//  s_rfe_b64 s_restore_pc_lo                                   //Return to the main shader program and resume execution
+    s_rfe_restore_b64  s_restore_pc_lo, s_restore_m0            // s_restore_m0[0] is used to set STATUS.inst_atc
+
+
+/**************************************************************************/
+/*                      the END                                           */
+/**************************************************************************/
+L_END_PGM:
+    s_endpgm
+
+end
+
+
+/**************************************************************************/
+/*                      the helper functions                              */
+/**************************************************************************/
+
+//Only for save hwreg to mem
+function write_hwreg_to_mem(s, s_rsrc, s_mem_offset)
+        s_mov_b32 exec_lo, m0                   //assuming exec_lo is not needed anymore from this point on
+        s_mov_b32 m0, s_mem_offset
+        s_buffer_store_dword s, s_rsrc, m0      glc:1
+        s_add_u32       s_mem_offset, s_mem_offset, 4
+        s_mov_b32   m0, exec_lo
+end
+
+
+// HWREG are saved before SGPRs, so all HWREG could be use.
+function write_16sgpr_to_mem(s, s_rsrc, s_mem_offset)
+
+        s_buffer_store_dwordx4 s[0], s_rsrc, 0  glc:1
+        s_buffer_store_dwordx4 s[4], s_rsrc, 16  glc:1
+        s_buffer_store_dwordx4 s[8], s_rsrc, 32  glc:1
+        s_buffer_store_dwordx4 s[12], s_rsrc, 48 glc:1
+        s_add_u32       s_rsrc[0], s_rsrc[0], 4*16
+        s_addc_u32      s_rsrc[1], s_rsrc[1], 0x0             // +scc
+end
+
+
+function read_hwreg_from_mem(s, s_rsrc, s_mem_offset)
+    s_buffer_load_dword s, s_rsrc, s_mem_offset     glc:1
+    s_add_u32       s_mem_offset, s_mem_offset, 4
+end
+
+function read_16sgpr_from_mem(s, s_rsrc, s_mem_offset)
+    s_buffer_load_dwordx16 s, s_rsrc, s_mem_offset      glc:1
+    s_sub_u32       s_mem_offset, s_mem_offset, 4*16
+end
+
+
+
+function get_lds_size_bytes(s_lds_size_byte)
+    // SQ LDS granularity is 64DW, while PGM_RSRC2.lds_size is in granularity 128DW
+    s_getreg_b32   s_lds_size_byte, hwreg(HW_REG_LDS_ALLOC, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE)          // lds_size
+    s_lshl_b32     s_lds_size_byte, s_lds_size_byte, 8                      //LDS size in dwords = lds_size * 64 *4Bytes    // granularity 64DW
+end
+
+function get_vgpr_size_bytes(s_vgpr_size_byte)
+    s_getreg_b32   s_vgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE)  //vpgr_size
+    s_add_u32      s_vgpr_size_byte, s_vgpr_size_byte, 1
+    s_lshl_b32     s_vgpr_size_byte, s_vgpr_size_byte, (2+8) //Number of VGPRs = (vgpr_size + 1) * 4 * 64 * 4   (non-zero value)   //FIXME for GFX, zero is possible
+end
+
+function get_sgpr_size_bytes(s_sgpr_size_byte)
+    s_getreg_b32   s_sgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE)  //spgr_size
+    s_add_u32      s_sgpr_size_byte, s_sgpr_size_byte, 1
+    s_lshl_b32     s_sgpr_size_byte, s_sgpr_size_byte, 6 //Number of SGPRs = (sgpr_size + 1) * 16 *4   (non-zero value)
+end
+
+function get_hwreg_size_bytes
+    return 128 //HWREG size 128 bytes
+end
+
+function set_status_without_spi_prio(status, tmp)
+    // Do not restore STATUS.SPI_PRIO since scheduler may have raised it.
+    s_lshr_b32      tmp, status, SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT
+    s_setreg_b32    hwreg(HW_REG_STATUS, SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT, SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE), tmp
+    s_nop           0x2 // avoid S_SETREG => S_SETREG hazard
+    s_setreg_b32    hwreg(HW_REG_STATUS, SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT, SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE), status
+end
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
new file mode 100644
index 000000000..e506411ad
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
@@ -0,0 +1,1099 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* To compile this assembly code:
+ *
+ * gfx9:
+ *   cpp -DASIC_FAMILY=CHIP_VEGAM cwsr_trap_handler_gfx9.asm -P -o gfx9.sp3
+ *   sp3 gfx9.sp3 -hex gfx9.hex
+ *
+ * arcturus:
+ *   cpp -DASIC_FAMILY=CHIP_ARCTURUS cwsr_trap_handler_gfx9.asm -P -o arcturus.sp3
+ *   sp3 arcturus.sp3 -hex arcturus.hex
+ *
+ * aldebaran:
+ *   cpp -DASIC_FAMILY=CHIP_ALDEBARAN cwsr_trap_handler_gfx9.asm -P -o aldebaran.sp3
+ *   sp3 aldebaran.sp3 -hex aldebaran.hex
+ *
+ * gc_9_4_3:
+ *   cpp -DASIC_FAMILY=GC_9_4_3 cwsr_trap_handler_gfx9.asm -P -o gc_9_4_3.sp3
+ *   sp3 gc_9_4_3.sp3 -hex gc_9_4_3.hex
+ */
+
+#define CHIP_VEGAM 18
+#define CHIP_ARCTURUS 23
+#define CHIP_ALDEBARAN 25
+#define CHIP_GC_9_4_3 26
+
+var ACK_SQC_STORE		    =	1		    //workaround for suspected SQC store bug causing incorrect stores under concurrency
+var SAVE_AFTER_XNACK_ERROR	    =	1		    //workaround for TCP store failure after XNACK error when ALLOW_REPLAY=0, for debugger
+var SINGLE_STEP_MISSED_WORKAROUND   =	(ASIC_FAMILY <= CHIP_ALDEBARAN)	//workaround for lost MODE.DEBUG_EN exception when SAVECTX raised
+
+/**************************************************************************/
+/*			variables					  */
+/**************************************************************************/
+var SQ_WAVE_STATUS_SPI_PRIO_SHIFT  = 1
+var SQ_WAVE_STATUS_SPI_PRIO_MASK   = 0x00000006
+var SQ_WAVE_STATUS_HALT_MASK       = 0x2000
+var SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT   = 0
+var SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE    = 1
+var SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT  = 3
+var SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE   = 29
+var SQ_WAVE_STATUS_ALLOW_REPLAY_MASK    = 0x400000
+var SQ_WAVE_STATUS_ECC_ERR_MASK         = 0x20000
+
+var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT	= 12
+var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE	= 9
+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE	= 6
+var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE	= 3			//FIXME	 sq.blk still has 4 bits at this time while SQ programming guide has 3 bits
+var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT	= 24
+
+#if ASIC_FAMILY >= CHIP_ALDEBARAN
+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT	= 6
+var SQ_WAVE_GPR_ALLOC_ACCV_OFFSET_SHIFT	= 12
+var SQ_WAVE_GPR_ALLOC_ACCV_OFFSET_SIZE	= 6
+#else
+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT	= 8
+#endif
+
+var SQ_WAVE_TRAPSTS_SAVECTX_MASK    =	0x400
+var SQ_WAVE_TRAPSTS_EXCP_MASK	    =	0x1FF
+var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT   =	10
+var SQ_WAVE_TRAPSTS_ADDR_WATCH_MASK =	0x80
+var SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT =	7
+var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK   =	0x100
+var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT  =	8
+var SQ_WAVE_TRAPSTS_HOST_TRAP_MASK  =	0x400000
+var SQ_WAVE_TRAPSTS_WAVE_BEGIN_MASK =	0x800000
+var SQ_WAVE_TRAPSTS_WAVE_END_MASK   =	0x1000000
+var SQ_WAVE_TRAPSTS_TRAP_AFTER_INST_MASK =  0x2000000
+var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK	=   0x3FF
+var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT	=   0x0
+var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE	=   10
+var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK	=   0xFFFFF800
+var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT	=   11
+var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE	=   21
+var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK	=   0x800
+var SQ_WAVE_TRAPSTS_EXCP_HI_MASK	=   0x7000
+var SQ_WAVE_TRAPSTS_XNACK_ERROR_MASK	=   0x10000000
+
+var SQ_WAVE_MODE_EXCP_EN_SHIFT		=   12
+var SQ_WAVE_MODE_EXCP_EN_ADDR_WATCH_SHIFT	= 19
+
+var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT	=   15			//FIXME
+var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK	= 0x1F8000
+
+var SQ_WAVE_MODE_DEBUG_EN_MASK		=   0x800
+
+var TTMP_SAVE_RCNT_FIRST_REPLAY_SHIFT	=   26			// bits [31:26] unused by SPI debug data
+var TTMP_SAVE_RCNT_FIRST_REPLAY_MASK	=   0xFC000000
+var TTMP_DEBUG_TRAP_ENABLED_SHIFT	=   23
+var TTMP_DEBUG_TRAP_ENABLED_MASK	=   0x800000
+
+/*	Save	    */
+var S_SAVE_BUF_RSRC_WORD1_STRIDE	=   0x00040000		//stride is 4 bytes
+var S_SAVE_BUF_RSRC_WORD3_MISC		=   0x00807FAC		//SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14] when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE
+var S_SAVE_PC_HI_TRAP_ID_MASK		=   0x00FF0000
+var S_SAVE_PC_HI_HT_MASK		=   0x01000000
+var S_SAVE_SPI_INIT_FIRST_WAVE_MASK	=   0x04000000		//bit[26]: FirstWaveInTG
+var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT	=   26
+
+var s_save_spi_init_lo		    =	exec_lo
+var s_save_spi_init_hi		    =	exec_hi
+
+var s_save_pc_lo	    =	ttmp0		//{TTMP1, TTMP0} = {3'h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]}
+var s_save_pc_hi	    =	ttmp1
+var s_save_exec_lo	    =	ttmp2
+var s_save_exec_hi	    =	ttmp3
+var s_save_tmp		    =	ttmp14
+var s_save_trapsts	    =	ttmp15		//not really used until the end of the SAVE routine
+var s_save_xnack_mask_lo    =	ttmp6
+var s_save_xnack_mask_hi    =	ttmp7
+var s_save_buf_rsrc0	    =	ttmp8
+var s_save_buf_rsrc1	    =	ttmp9
+var s_save_buf_rsrc2	    =	ttmp10
+var s_save_buf_rsrc3	    =	ttmp11
+var s_save_status	    =	ttmp12
+var s_save_mem_offset	    =	ttmp4
+var s_save_alloc_size	    =	s_save_trapsts		//conflict
+var s_save_m0		    =	ttmp5
+var s_save_ttmps_lo	    =	s_save_tmp		//no conflict
+var s_save_ttmps_hi	    =	s_save_trapsts		//no conflict
+#if ASIC_FAMILY >= CHIP_GC_9_4_3
+var s_save_ib_sts       =	ttmp13
+#else
+var s_save_ib_sts       =	ttmp11
+#endif
+
+/*	Restore	    */
+var S_RESTORE_BUF_RSRC_WORD1_STRIDE	    =	S_SAVE_BUF_RSRC_WORD1_STRIDE
+var S_RESTORE_BUF_RSRC_WORD3_MISC	    =	S_SAVE_BUF_RSRC_WORD3_MISC
+
+var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK	    =	0x04000000	    //bit[26]: FirstWaveInTG
+var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT	    =	26
+
+var s_restore_spi_init_lo		    =	exec_lo
+var s_restore_spi_init_hi		    =	exec_hi
+
+var s_restore_mem_offset	=   ttmp12
+var s_restore_tmp2		=   ttmp13
+var s_restore_alloc_size	=   ttmp3
+var s_restore_tmp		=   ttmp2
+var s_restore_mem_offset_save	=   s_restore_tmp	//no conflict
+var s_restore_accvgpr_offset_save = ttmp7
+
+var s_restore_m0	    =	s_restore_alloc_size	//no conflict
+
+var s_restore_mode	    =	s_restore_accvgpr_offset_save
+
+var s_restore_pc_lo	    =	ttmp0
+var s_restore_pc_hi	    =	ttmp1
+var s_restore_exec_lo	    =	ttmp4
+var s_restore_exec_hi	    = 	ttmp5
+var s_restore_status	    =	ttmp14
+var s_restore_trapsts	    =	ttmp15
+var s_restore_xnack_mask_lo =	xnack_mask_lo
+var s_restore_xnack_mask_hi =	xnack_mask_hi
+var s_restore_buf_rsrc0	    =	ttmp8
+var s_restore_buf_rsrc1	    =	ttmp9
+var s_restore_buf_rsrc2	    =	ttmp10
+var s_restore_buf_rsrc3	    =	ttmp11
+var s_restore_ttmps_lo	    =	s_restore_tmp		//no conflict
+var s_restore_ttmps_hi	    =	s_restore_alloc_size	//no conflict
+
+/**************************************************************************/
+/*			trap handler entry points			  */
+/**************************************************************************/
+/* Shader Main*/
+
+shader main
+  asic(DEFAULT)
+  type(CS)
+
+
+	s_branch L_SKIP_RESTORE					    //NOT restore. might be a regular trap or save
+
+L_JUMP_TO_RESTORE:
+    s_branch L_RESTORE						    //restore
+
+L_SKIP_RESTORE:
+
+    s_getreg_b32    s_save_status, hwreg(HW_REG_STATUS)				    //save STATUS since we will change SCC
+
+    // Clear SPI_PRIO: do not save with elevated priority.
+    // Clear ECC_ERR: prevents SQC store and triggers FATAL_HALT if setreg'd.
+    s_andn2_b32     s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK|SQ_WAVE_STATUS_ECC_ERR_MASK
+
+    s_getreg_b32    s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+
+    s_and_b32       ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
+    s_cbranch_scc0  L_NOT_HALTED
+
+L_HALTED:
+    // Host trap may occur while wave is halted.
+    s_and_b32       ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
+    s_cbranch_scc1  L_FETCH_2ND_TRAP
+
+L_CHECK_SAVE:
+    s_and_b32       ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK    //check whether this is for save
+    s_cbranch_scc1  L_SAVE					//this is the operation for save
+
+    // Wave is halted but neither host trap nor SAVECTX is raised.
+    // Caused by instruction fetch memory violation.
+    // Spin wait until context saved to prevent interrupt storm.
+    s_sleep         0x10
+    s_getreg_b32    s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+    s_branch        L_CHECK_SAVE
+
+L_NOT_HALTED:
+    // Let second-level handle non-SAVECTX exception or trap.
+    // Any concurrent SAVECTX will be handled upon re-entry once halted.
+
+    // Check non-maskable exceptions. memory_violation, illegal_instruction
+    // and debugger (host trap, wave start/end, trap after instruction)
+    // exceptions always cause the wave to enter the trap handler.
+    s_and_b32       ttmp2, s_save_trapsts,      \
+        SQ_WAVE_TRAPSTS_MEM_VIOL_MASK         | \
+        SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK     | \
+        SQ_WAVE_TRAPSTS_HOST_TRAP_MASK        | \
+        SQ_WAVE_TRAPSTS_WAVE_BEGIN_MASK       | \
+        SQ_WAVE_TRAPSTS_WAVE_END_MASK         | \
+        SQ_WAVE_TRAPSTS_TRAP_AFTER_INST_MASK
+    s_cbranch_scc1  L_FETCH_2ND_TRAP
+
+    // Check for maskable exceptions in trapsts.excp and trapsts.excp_hi.
+    // Maskable exceptions only cause the wave to enter the trap handler if
+    // their respective bit in mode.excp_en is set.
+    s_and_b32       ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCP_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK
+    s_cbranch_scc0  L_CHECK_TRAP_ID
+
+    s_and_b32       ttmp3, s_save_trapsts, SQ_WAVE_TRAPSTS_ADDR_WATCH_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK
+    s_cbranch_scc0  L_NOT_ADDR_WATCH
+    s_bitset1_b32   ttmp2, SQ_WAVE_TRAPSTS_ADDR_WATCH_SHIFT // Check all addr_watch[123] exceptions against excp_en.addr_watch
+
+L_NOT_ADDR_WATCH:
+    s_getreg_b32    ttmp3, hwreg(HW_REG_MODE)
+    s_lshl_b32      ttmp2, ttmp2, SQ_WAVE_MODE_EXCP_EN_SHIFT
+    s_and_b32       ttmp2, ttmp2, ttmp3
+    s_cbranch_scc1  L_FETCH_2ND_TRAP
+
+L_CHECK_TRAP_ID:
+    // Check trap_id != 0
+    s_and_b32       ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
+    s_cbranch_scc1  L_FETCH_2ND_TRAP
+
+if SINGLE_STEP_MISSED_WORKAROUND
+    // Prioritize single step exception over context save.
+    // Second-level trap will halt wave and RFE, re-entering for SAVECTX.
+    s_getreg_b32    ttmp2, hwreg(HW_REG_MODE)
+    s_and_b32       ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK
+    s_cbranch_scc1  L_FETCH_2ND_TRAP
+end
+
+    s_and_b32       ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK
+    s_cbranch_scc1  L_SAVE
+
+L_FETCH_2ND_TRAP:
+    // Preserve and clear scalar XNACK state before issuing scalar reads.
+    save_and_clear_ib_sts(ttmp14)
+
+    // Read second-level TBA/TMA from first-level TMA and jump if available.
+    // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data)
+    // ttmp12 holds SQ_WAVE_STATUS
+    s_getreg_b32    ttmp14, hwreg(HW_REG_SQ_SHADER_TMA_LO)
+    s_getreg_b32    ttmp15, hwreg(HW_REG_SQ_SHADER_TMA_HI)
+    s_lshl_b64      [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
+
+    s_bitcmp1_b32   ttmp15, 0xF
+    s_cbranch_scc0  L_NO_SIGN_EXTEND_TMA
+    s_or_b32        ttmp15, ttmp15, 0xFFFF0000
+L_NO_SIGN_EXTEND_TMA:
+
+    s_load_dword    ttmp2, [ttmp14, ttmp15], 0x10 glc:1 // debug trap enabled flag
+    s_waitcnt       lgkmcnt(0)
+    s_lshl_b32      ttmp2, ttmp2, TTMP_DEBUG_TRAP_ENABLED_SHIFT
+    s_andn2_b32     s_save_ib_sts, s_save_ib_sts, TTMP_DEBUG_TRAP_ENABLED_MASK
+    s_or_b32        s_save_ib_sts, s_save_ib_sts, ttmp2
+
+    s_load_dwordx2  [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 glc:1 // second-level TBA
+    s_waitcnt       lgkmcnt(0)
+    s_load_dwordx2  [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 glc:1 // second-level TMA
+    s_waitcnt       lgkmcnt(0)
+
+    s_and_b64       [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3]
+    s_cbranch_scc0  L_NO_NEXT_TRAP // second-level trap handler not been set
+    s_setpc_b64     [ttmp2, ttmp3] // jump to second-level trap handler
+
+L_NO_NEXT_TRAP:
+    // If not caused by trap then halt wave to prevent re-entry.
+    s_and_b32       ttmp2, s_save_pc_hi, (S_SAVE_PC_HI_TRAP_ID_MASK|S_SAVE_PC_HI_HT_MASK)
+    s_cbranch_scc1  L_TRAP_CASE
+    s_or_b32        s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
+
+    // If the PC points to S_ENDPGM then context save will fail if STATUS.HALT is set.
+    // Rewind the PC to prevent this from occurring.
+    s_sub_u32       ttmp0, ttmp0, 0x8
+    s_subb_u32      ttmp1, ttmp1, 0x0
+
+    s_branch        L_EXIT_TRAP
+
+L_TRAP_CASE:
+    // Host trap will not cause trap re-entry.
+    s_and_b32       ttmp2, s_save_pc_hi, S_SAVE_PC_HI_HT_MASK
+    s_cbranch_scc1  L_EXIT_TRAP
+
+    // Advance past trap instruction to prevent re-entry.
+    s_add_u32       ttmp0, ttmp0, 0x4
+    s_addc_u32      ttmp1, ttmp1, 0x0
+
+L_EXIT_TRAP:
+    s_and_b32	ttmp1, ttmp1, 0xFFFF
+
+    restore_ib_sts(ttmp14)
+
+    // Restore SQ_WAVE_STATUS.
+    s_and_b64       exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
+    s_and_b64       vcc, vcc, vcc    // Restore STATUS.VCCZ, not writable by s_setreg_b32
+    set_status_without_spi_prio(s_save_status, ttmp2)
+
+    s_rfe_b64       [ttmp0, ttmp1]
+
+    // *********	End handling of non-CWSR traps	 *******************
+
+/**************************************************************************/
+/*			save routine					  */
+/**************************************************************************/
+
+L_SAVE:
+    s_and_b32	    s_save_pc_hi, s_save_pc_hi, 0x0000ffff    //pc[47:32]
+
+    s_mov_b32	    s_save_tmp, 0							    //clear saveCtx bit
+    s_setreg_b32    hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp	    //clear saveCtx bit
+
+    save_and_clear_ib_sts(s_save_tmp)
+
+    /*	    inform SPI the readiness and wait for SPI's go signal */
+    s_mov_b32	    s_save_exec_lo, exec_lo						    //save EXEC and use EXEC for the go signal from SPI
+    s_mov_b32	    s_save_exec_hi, exec_hi
+    s_mov_b64	    exec,   0x0								    //clear EXEC to get ready to receive
+
+	s_sendmsg   sendmsg(MSG_SAVEWAVE)  //send SPI a message and wait for SPI's write to EXEC
+
+    // Set SPI_PRIO=2 to avoid starving instruction fetch in the waves we're waiting for.
+    s_or_b32 s_save_tmp, s_save_status, (2 << SQ_WAVE_STATUS_SPI_PRIO_SHIFT)
+    s_setreg_b32 hwreg(HW_REG_STATUS), s_save_tmp
+
+  L_SLEEP:
+    s_sleep 0x2		       // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to exec inst, while other waves are stuck into the sleep-loop and waiting for wrexec!=0
+
+	s_cbranch_execz L_SLEEP
+
+    // Save trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
+    // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
+    get_vgpr_size_bytes(s_save_ttmps_lo)
+    get_sgpr_size_bytes(s_save_ttmps_hi)
+    s_add_u32	    s_save_ttmps_lo, s_save_ttmps_lo, s_save_ttmps_hi
+    s_add_u32	    s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo
+    s_addc_u32	    s_save_ttmps_hi, s_save_spi_init_hi, 0x0
+    s_and_b32	    s_save_ttmps_hi, s_save_ttmps_hi, 0xFFFF
+    s_store_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x50 glc:1
+    ack_sqc_store_workaround()
+    s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x60 glc:1
+    ack_sqc_store_workaround()
+    s_store_dword   ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x74 glc:1
+    ack_sqc_store_workaround()
+
+    /*	    setup Resource Contants    */
+    s_mov_b32	    s_save_buf_rsrc0,	s_save_spi_init_lo							//base_addr_lo
+    s_and_b32	    s_save_buf_rsrc1,	s_save_spi_init_hi, 0x0000FFFF						//base_addr_hi
+    s_or_b32	    s_save_buf_rsrc1,	s_save_buf_rsrc1,  S_SAVE_BUF_RSRC_WORD1_STRIDE
+    s_mov_b32	    s_save_buf_rsrc2,	0									//NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited
+    s_mov_b32	    s_save_buf_rsrc3,	S_SAVE_BUF_RSRC_WORD3_MISC
+
+    //FIXME  right now s_save_m0/s_save_mem_offset use tma_lo/tma_hi  (might need to save them before using them?)
+    s_mov_b32	    s_save_m0,		m0								    //save M0
+
+    /*	    global mem offset		*/
+    s_mov_b32	    s_save_mem_offset,	0x0									//mem offset initial value = 0
+
+
+
+
+    /*	    save HW registers	*/
+    //////////////////////////////
+
+  L_SAVE_HWREG:
+	// HWREG SR memory offset : size(VGPR)+size(SGPR)
+       get_vgpr_size_bytes(s_save_mem_offset)
+       get_sgpr_size_bytes(s_save_tmp)
+       s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp
+
+
+    s_mov_b32	    s_save_buf_rsrc2, 0x4				//NUM_RECORDS	in bytes
+	s_mov_b32	s_save_buf_rsrc2,  0x1000000				    //NUM_RECORDS in bytes
+
+
+    write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)			//M0
+    write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset)		    //PC
+    write_hwreg_to_mem(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset)
+    write_hwreg_to_mem(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset)		//EXEC
+    write_hwreg_to_mem(s_save_exec_hi, s_save_buf_rsrc0, s_save_mem_offset)
+    write_hwreg_to_mem(s_save_status, s_save_buf_rsrc0, s_save_mem_offset)		//STATUS
+
+    //s_save_trapsts conflicts with s_save_alloc_size
+    s_getreg_b32    s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+    write_hwreg_to_mem(s_save_trapsts, s_save_buf_rsrc0, s_save_mem_offset)		//TRAPSTS
+
+    write_hwreg_to_mem(xnack_mask_lo, s_save_buf_rsrc0, s_save_mem_offset)	    //XNACK_MASK_LO
+    write_hwreg_to_mem(xnack_mask_hi, s_save_buf_rsrc0, s_save_mem_offset)	    //XNACK_MASK_HI
+
+    //use s_save_tmp would introduce conflict here between s_save_tmp and s_save_buf_rsrc2
+    s_getreg_b32    s_save_m0, hwreg(HW_REG_MODE)						    //MODE
+    write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
+
+
+
+    /*	    the first wave in the threadgroup	 */
+    s_and_b32	    s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK	// extract fisrt wave bit
+    s_mov_b32	     s_save_exec_hi, 0x0
+    s_or_b32	     s_save_exec_hi, s_save_tmp, s_save_exec_hi				 // save first wave bit to s_save_exec_hi.bits[26]
+
+
+    /*		save SGPRs	*/
+	// Save SGPR before LDS save, then the s0 to s4 can be used during LDS save...
+    //////////////////////////////
+
+    // SGPR SR memory offset : size(VGPR)
+    get_vgpr_size_bytes(s_save_mem_offset)
+    // TODO, change RSRC word to rearrange memory layout for SGPRS
+
+    s_getreg_b32    s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE)		//spgr_size
+    s_add_u32	    s_save_alloc_size, s_save_alloc_size, 1
+    s_lshl_b32	    s_save_alloc_size, s_save_alloc_size, 4			    //Number of SGPRs = (sgpr_size + 1) * 16   (non-zero value)
+
+	s_lshl_b32	s_save_buf_rsrc2,   s_save_alloc_size, 2		    //NUM_RECORDS in bytes
+
+	s_mov_b32	s_save_buf_rsrc2,  0x1000000				    //NUM_RECORDS in bytes
+
+
+    // backup s_save_buf_rsrc0,1 to s_save_pc_lo/hi, since write_16sgpr_to_mem function will change the rsrc0
+    //s_mov_b64 s_save_pc_lo, s_save_buf_rsrc0
+    s_mov_b64 s_save_xnack_mask_lo, s_save_buf_rsrc0
+    s_add_u32 s_save_buf_rsrc0, s_save_buf_rsrc0, s_save_mem_offset
+    s_addc_u32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0
+
+    s_mov_b32	    m0, 0x0			    //SGPR initial index value =0
+    s_nop	    0x0				    //Manually inserted wait states
+  L_SAVE_SGPR_LOOP:
+    // SGPR is allocated in 16 SGPR granularity
+    s_movrels_b64   s0, s0     //s0 = s[0+m0], s1 = s[1+m0]
+    s_movrels_b64   s2, s2     //s2 = s[2+m0], s3 = s[3+m0]
+    s_movrels_b64   s4, s4     //s4 = s[4+m0], s5 = s[5+m0]
+    s_movrels_b64   s6, s6     //s6 = s[6+m0], s7 = s[7+m0]
+    s_movrels_b64   s8, s8     //s8 = s[8+m0], s9 = s[9+m0]
+    s_movrels_b64   s10, s10   //s10 = s[10+m0], s11 = s[11+m0]
+    s_movrels_b64   s12, s12   //s12 = s[12+m0], s13 = s[13+m0]
+    s_movrels_b64   s14, s14   //s14 = s[14+m0], s15 = s[15+m0]
+
+    write_16sgpr_to_mem(s0, s_save_buf_rsrc0, s_save_mem_offset) //PV: the best performance should be using s_buffer_store_dwordx4
+    s_add_u32	    m0, m0, 16							    //next sgpr index
+    s_cmp_lt_u32    m0, s_save_alloc_size					    //scc = (m0 < s_save_alloc_size) ? 1 : 0
+    s_cbranch_scc1  L_SAVE_SGPR_LOOP					//SGPR save is complete?
+    // restore s_save_buf_rsrc0,1
+    //s_mov_b64 s_save_buf_rsrc0, s_save_pc_lo
+    s_mov_b64 s_save_buf_rsrc0, s_save_xnack_mask_lo
+
+
+
+
+    /*		save first 4 VGPR, then LDS save could use   */
+	// each wave will alloc 4 vgprs at least...
+    /////////////////////////////////////////////////////////////////////////////////////
+
+    s_mov_b32	    s_save_mem_offset, 0
+    s_mov_b32	    exec_lo, 0xFFFFFFFF						    //need every thread from now on
+    s_mov_b32	    exec_hi, 0xFFFFFFFF
+    s_mov_b32	    xnack_mask_lo, 0x0
+    s_mov_b32	    xnack_mask_hi, 0x0
+
+	s_mov_b32	s_save_buf_rsrc2,  0x1000000				    //NUM_RECORDS in bytes
+
+
+    // VGPR Allocated in 4-GPR granularity
+
+if SAVE_AFTER_XNACK_ERROR
+	check_if_tcp_store_ok()
+	s_cbranch_scc1 L_SAVE_FIRST_VGPRS_WITH_TCP
+
+	write_vgprs_to_mem_with_sqc(v0, 4, s_save_buf_rsrc0, s_save_mem_offset)
+	s_branch L_SAVE_LDS
+
+L_SAVE_FIRST_VGPRS_WITH_TCP:
+end
+
+    write_4vgprs_to_mem(s_save_buf_rsrc0, s_save_mem_offset)
+
+    /*		save LDS	*/
+    //////////////////////////////
+
+  L_SAVE_LDS:
+
+	// Change EXEC to all threads...
+    s_mov_b32	    exec_lo, 0xFFFFFFFF	  //need every thread from now on
+    s_mov_b32	    exec_hi, 0xFFFFFFFF
+
+    s_getreg_b32    s_save_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE)		    //lds_size
+    s_and_b32	    s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF		    //lds_size is zero?
+    s_cbranch_scc0  L_SAVE_LDS_DONE									       //no lds used? jump to L_SAVE_DONE
+
+    s_barrier		    //LDS is used? wait for other waves in the same TG
+    s_and_b32	    s_save_tmp, s_save_exec_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK		       //exec is still used here
+    s_cbranch_scc0  L_SAVE_LDS_DONE
+
+	// first wave do LDS save;
+
+    s_lshl_b32	    s_save_alloc_size, s_save_alloc_size, 6			    //LDS size in dwords = lds_size * 64dw
+    s_lshl_b32	    s_save_alloc_size, s_save_alloc_size, 2			    //LDS size in bytes
+    s_mov_b32	    s_save_buf_rsrc2,  s_save_alloc_size			    //NUM_RECORDS in bytes
+
+    // LDS at offset: size(VGPR)+SIZE(SGPR)+SIZE(HWREG)
+    //
+    get_vgpr_size_bytes(s_save_mem_offset)
+    get_sgpr_size_bytes(s_save_tmp)
+    s_add_u32  s_save_mem_offset, s_save_mem_offset, s_save_tmp
+    s_add_u32 s_save_mem_offset, s_save_mem_offset, get_hwreg_size_bytes()
+
+
+	s_mov_b32	s_save_buf_rsrc2,  0x1000000		      //NUM_RECORDS in bytes
+
+    s_mov_b32	    m0, 0x0						  //lds_offset initial value = 0
+
+
+      v_mbcnt_lo_u32_b32 v2, 0xffffffff, 0x0
+      v_mbcnt_hi_u32_b32 v3, 0xffffffff, v2	// tid
+
+if SAVE_AFTER_XNACK_ERROR
+	check_if_tcp_store_ok()
+	s_cbranch_scc1 L_SAVE_LDS_WITH_TCP
+
+	v_lshlrev_b32 v2, 2, v3
+L_SAVE_LDS_LOOP_SQC:
+	ds_read2_b32 v[0:1], v2 offset0:0 offset1:0x40
+	s_waitcnt lgkmcnt(0)
+
+	write_vgprs_to_mem_with_sqc(v0, 2, s_save_buf_rsrc0, s_save_mem_offset)
+
+	v_add_u32 v2, 0x200, v2
+	v_cmp_lt_u32 vcc[0:1], v2, s_save_alloc_size
+	s_cbranch_vccnz L_SAVE_LDS_LOOP_SQC
+
+	s_branch L_SAVE_LDS_DONE
+
+L_SAVE_LDS_WITH_TCP:
+end
+
+      v_mul_i32_i24 v2, v3, 8	// tid*8
+      v_mov_b32 v3, 256*2
+      s_mov_b32 m0, 0x10000
+      s_mov_b32 s0, s_save_buf_rsrc3
+      s_and_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, 0xFF7FFFFF	  // disable add_tid
+      s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, 0x58000   //DFMT
+
+L_SAVE_LDS_LOOP_VECTOR:
+      ds_read_b64 v[0:1], v2	//x =LDS[a], byte address
+      s_waitcnt lgkmcnt(0)
+      buffer_store_dwordx2  v[0:1], v2, s_save_buf_rsrc0, s_save_mem_offset offen:1  glc:1  slc:1
+//	s_waitcnt vmcnt(0)
+//	v_add_u32 v2, vcc[0:1], v2, v3
+      v_add_u32 v2, v2, v3
+      v_cmp_lt_u32 vcc[0:1], v2, s_save_alloc_size
+      s_cbranch_vccnz L_SAVE_LDS_LOOP_VECTOR
+
+      // restore rsrc3
+      s_mov_b32 s_save_buf_rsrc3, s0
+
+L_SAVE_LDS_DONE:
+
+
+    /*		save VGPRs  - set the Rest VGPRs	*/
+    //////////////////////////////////////////////////////////////////////////////////////
+  L_SAVE_VGPR:
+    // VGPR SR memory offset: 0
+    // TODO rearrange the RSRC words to use swizzle for VGPR save...
+
+    s_mov_b32	    s_save_mem_offset, (0+256*4)				    // for the rest VGPRs
+    s_mov_b32	    exec_lo, 0xFFFFFFFF						    //need every thread from now on
+    s_mov_b32	    exec_hi, 0xFFFFFFFF
+
+    get_num_arch_vgprs(s_save_alloc_size)
+    s_mov_b32	    s_save_buf_rsrc2,  0x1000000				    //NUM_RECORDS in bytes
+
+
+    // VGPR store using dw burst
+    s_mov_b32	      m0, 0x4	//VGPR initial index value =0
+    s_cmp_lt_u32      m0, s_save_alloc_size
+    s_cbranch_scc0    L_SAVE_VGPR_END
+
+
+    s_set_gpr_idx_on	m0, 0x1 //M0[7:0] = M0[7:0] and M0[15:12] = 0x1
+    s_add_u32	    s_save_alloc_size, s_save_alloc_size, 0x1000		    //add 0x1000 since we compare m0 against it later
+
+if SAVE_AFTER_XNACK_ERROR
+	check_if_tcp_store_ok()
+	s_cbranch_scc1 L_SAVE_VGPR_LOOP
+
+L_SAVE_VGPR_LOOP_SQC:
+	write_vgprs_to_mem_with_sqc(v0, 4, s_save_buf_rsrc0, s_save_mem_offset)
+
+	s_add_u32 m0, m0, 4
+	s_cmp_lt_u32 m0, s_save_alloc_size
+	s_cbranch_scc1 L_SAVE_VGPR_LOOP_SQC
+
+	s_set_gpr_idx_off
+	s_branch L_SAVE_VGPR_END
+end
+
+  L_SAVE_VGPR_LOOP:
+    v_mov_b32	    v0, v0		//v0 = v[0+m0]
+    v_mov_b32	    v1, v1		//v0 = v[0+m0]
+    v_mov_b32	    v2, v2		//v0 = v[0+m0]
+    v_mov_b32	    v3, v3		//v0 = v[0+m0]
+
+    write_4vgprs_to_mem(s_save_buf_rsrc0, s_save_mem_offset)
+
+    s_add_u32	    m0, m0, 4							    //next vgpr index
+    s_add_u32	    s_save_mem_offset, s_save_mem_offset, 256*4			    //every buffer_store_dword does 256 bytes
+    s_cmp_lt_u32    m0, s_save_alloc_size					    //scc = (m0 < s_save_alloc_size) ? 1 : 0
+    s_cbranch_scc1  L_SAVE_VGPR_LOOP						    //VGPR save is complete?
+    s_set_gpr_idx_off
+
+L_SAVE_VGPR_END:
+
+#if ASIC_FAMILY >= CHIP_ARCTURUS
+    // Save ACC VGPRs
+
+#if ASIC_FAMILY >= CHIP_ALDEBARAN
+    // ACC VGPR count may differ from ARCH VGPR count.
+    get_num_acc_vgprs(s_save_alloc_size, s_save_tmp)
+    s_and_b32       s_save_alloc_size, s_save_alloc_size, s_save_alloc_size
+    s_cbranch_scc0  L_SAVE_ACCVGPR_END
+    s_add_u32	    s_save_alloc_size, s_save_alloc_size, 0x1000		    //add 0x1000 since we compare m0 against it later
+#endif
+
+    s_mov_b32 m0, 0x0 //VGPR initial index value =0
+    s_set_gpr_idx_on m0, 0x1 //M0[7:0] = M0[7:0] and M0[15:12] = 0x1
+
+if SAVE_AFTER_XNACK_ERROR
+    check_if_tcp_store_ok()
+    s_cbranch_scc1 L_SAVE_ACCVGPR_LOOP
+
+L_SAVE_ACCVGPR_LOOP_SQC:
+    for var vgpr = 0; vgpr < 4; ++ vgpr
+        v_accvgpr_read v[vgpr], acc[vgpr]  // v[N] = acc[N+m0]
+    end
+
+    write_vgprs_to_mem_with_sqc(v0, 4, s_save_buf_rsrc0, s_save_mem_offset)
+
+    s_add_u32 m0, m0, 4
+    s_cmp_lt_u32 m0, s_save_alloc_size
+    s_cbranch_scc1 L_SAVE_ACCVGPR_LOOP_SQC
+
+    s_set_gpr_idx_off
+    s_branch L_SAVE_ACCVGPR_END
+end
+
+L_SAVE_ACCVGPR_LOOP:
+    for var vgpr = 0; vgpr < 4; ++ vgpr
+        v_accvgpr_read v[vgpr], acc[vgpr]  // v[N] = acc[N+m0]
+    end
+
+    write_4vgprs_to_mem(s_save_buf_rsrc0, s_save_mem_offset)
+
+    s_add_u32 m0, m0, 4
+    s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4
+    s_cmp_lt_u32 m0, s_save_alloc_size
+    s_cbranch_scc1 L_SAVE_ACCVGPR_LOOP
+    s_set_gpr_idx_off
+
+L_SAVE_ACCVGPR_END:
+#endif
+
+    s_branch	L_END_PGM
+
+
+
+/**************************************************************************/
+/*			restore routine					  */
+/**************************************************************************/
+
+L_RESTORE:
+    /*	    Setup Resource Contants    */
+    s_mov_b32	    s_restore_buf_rsrc0,    s_restore_spi_init_lo							    //base_addr_lo
+    s_and_b32	    s_restore_buf_rsrc1,    s_restore_spi_init_hi, 0x0000FFFF						    //base_addr_hi
+    s_or_b32	    s_restore_buf_rsrc1,    s_restore_buf_rsrc1,  S_RESTORE_BUF_RSRC_WORD1_STRIDE
+    s_mov_b32	    s_restore_buf_rsrc2,    0										    //NUM_RECORDS initial value = 0 (in bytes)
+    s_mov_b32	    s_restore_buf_rsrc3,    S_RESTORE_BUF_RSRC_WORD3_MISC
+
+    /*	    global mem offset		*/
+//  s_mov_b32	    s_restore_mem_offset, 0x0				    //mem offset initial value = 0
+
+    /*	    the first wave in the threadgroup	 */
+    s_and_b32	    s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK
+    s_cbranch_scc0  L_RESTORE_VGPR
+
+    /*		restore LDS	*/
+    //////////////////////////////
+  L_RESTORE_LDS:
+
+    s_mov_b32	    exec_lo, 0xFFFFFFFF							    //need every thread from now on   //be consistent with SAVE although can be moved ahead
+    s_mov_b32	    exec_hi, 0xFFFFFFFF
+
+    s_getreg_b32    s_restore_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE)		//lds_size
+    s_and_b32	    s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF		    //lds_size is zero?
+    s_cbranch_scc0  L_RESTORE_VGPR							    //no lds used? jump to L_RESTORE_VGPR
+    s_lshl_b32	    s_restore_alloc_size, s_restore_alloc_size, 6			    //LDS size in dwords = lds_size * 64dw
+    s_lshl_b32	    s_restore_alloc_size, s_restore_alloc_size, 2			    //LDS size in bytes
+    s_mov_b32	    s_restore_buf_rsrc2,    s_restore_alloc_size			    //NUM_RECORDS in bytes
+
+    // LDS at offset: size(VGPR)+SIZE(SGPR)+SIZE(HWREG)
+    //
+    get_vgpr_size_bytes(s_restore_mem_offset)
+    get_sgpr_size_bytes(s_restore_tmp)
+    s_add_u32  s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+    s_add_u32  s_restore_mem_offset, s_restore_mem_offset, get_hwreg_size_bytes()	     //FIXME, Check if offset overflow???
+
+
+	s_mov_b32	s_restore_buf_rsrc2,  0x1000000					    //NUM_RECORDS in bytes
+    s_mov_b32	    m0, 0x0								    //lds_offset initial value = 0
+
+  L_RESTORE_LDS_LOOP:
+	buffer_load_dword   v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1		       // first 64DW
+	buffer_load_dword   v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:256	       // second 64DW
+    s_add_u32	    m0, m0, 256*2						// 128 DW
+    s_add_u32	    s_restore_mem_offset, s_restore_mem_offset, 256*2		//mem offset increased by 128DW
+    s_cmp_lt_u32    m0, s_restore_alloc_size					//scc=(m0 < s_restore_alloc_size) ? 1 : 0
+    s_cbranch_scc1  L_RESTORE_LDS_LOOP							    //LDS restore is complete?
+
+
+    /*		restore VGPRs	    */
+    //////////////////////////////
+  L_RESTORE_VGPR:
+    s_mov_b32	    exec_lo, 0xFFFFFFFF							    //need every thread from now on   //be consistent with SAVE although can be moved ahead
+    s_mov_b32	    exec_hi, 0xFFFFFFFF
+    s_mov_b32	    s_restore_buf_rsrc2,  0x1000000					    //NUM_RECORDS in bytes
+
+    // Save ARCH VGPRs 4-N, then all ACC VGPRs, then ARCH VGPRs 0-3.
+    get_num_arch_vgprs(s_restore_alloc_size)
+    s_add_u32	    s_restore_alloc_size, s_restore_alloc_size, 0x8000			    //add 0x8000 since we compare m0 against it later
+
+    // ARCH VGPRs at offset: 0
+    s_mov_b32	    s_restore_mem_offset, 0x0
+    s_mov_b32	    s_restore_mem_offset_save, s_restore_mem_offset	// restore start with v1, v0 will be the last
+    s_add_u32	    s_restore_mem_offset, s_restore_mem_offset, 256*4
+    s_mov_b32	    m0, 4				//VGPR initial index value = 1
+    s_set_gpr_idx_on	m0, 0x8								    //M0[7:0] = M0[7:0] and M0[15:12] = 0x8
+
+  L_RESTORE_VGPR_LOOP:
+    read_4vgprs_from_mem(s_restore_buf_rsrc0, s_restore_mem_offset)
+    v_mov_b32	    v0, v0								    //v[0+m0] = v0
+    v_mov_b32	    v1, v1
+    v_mov_b32	    v2, v2
+    v_mov_b32	    v3, v3
+    s_add_u32	    m0, m0, 4								    //next vgpr index
+    s_add_u32	    s_restore_mem_offset, s_restore_mem_offset, 256*4				//every buffer_load_dword does 256 bytes
+    s_cmp_lt_u32    m0, s_restore_alloc_size						    //scc = (m0 < s_restore_alloc_size) ? 1 : 0
+    s_cbranch_scc1  L_RESTORE_VGPR_LOOP							    //VGPR restore (except v0) is complete?
+
+#if ASIC_FAMILY >= CHIP_ALDEBARAN
+    // ACC VGPR count may differ from ARCH VGPR count.
+    get_num_acc_vgprs(s_restore_alloc_size, s_restore_tmp2)
+    s_and_b32       s_restore_alloc_size, s_restore_alloc_size, s_restore_alloc_size
+    s_cbranch_scc0  L_RESTORE_ACCVGPR_END
+    s_add_u32	    s_restore_alloc_size, s_restore_alloc_size, 0x8000			    //add 0x8000 since we compare m0 against it later
+#endif
+
+#if ASIC_FAMILY >= CHIP_ARCTURUS
+    // ACC VGPRs at offset: size(ARCH VGPRs)
+    s_mov_b32	    m0, 0
+    s_set_gpr_idx_on	m0, 0x8								    //M0[7:0] = M0[7:0] and M0[15:12] = 0x8
+
+  L_RESTORE_ACCVGPR_LOOP:
+    read_4vgprs_from_mem(s_restore_buf_rsrc0, s_restore_mem_offset)
+
+    for var vgpr = 0; vgpr < 4; ++ vgpr
+        v_accvgpr_write acc[vgpr], v[vgpr]
+    end
+
+    s_add_u32	    m0, m0, 4								    //next vgpr index
+    s_add_u32	    s_restore_mem_offset, s_restore_mem_offset, 256*4			    //every buffer_load_dword does 256 bytes
+    s_cmp_lt_u32    m0, s_restore_alloc_size						    //scc = (m0 < s_restore_alloc_size) ? 1 : 0
+    s_cbranch_scc1  L_RESTORE_ACCVGPR_LOOP						    //VGPR restore (except v0) is complete?
+  L_RESTORE_ACCVGPR_END:
+#endif
+
+    s_set_gpr_idx_off
+
+    // Restore VGPRs 0-3 last, no longer needed.
+    read_4vgprs_from_mem(s_restore_buf_rsrc0, s_restore_mem_offset_save)
+
+    /*		restore SGPRs	    */
+    //////////////////////////////
+
+    // SGPR SR memory offset : size(VGPR)
+    get_vgpr_size_bytes(s_restore_mem_offset)
+    get_sgpr_size_bytes(s_restore_tmp)
+    s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+    s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 16*4	   // restore SGPR from S[n] to S[0], by 16 sgprs group
+    // TODO, change RSRC word to rearrange memory layout for SGPRS
+
+    s_getreg_b32    s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE)		    //spgr_size
+    s_add_u32	    s_restore_alloc_size, s_restore_alloc_size, 1
+    s_lshl_b32	    s_restore_alloc_size, s_restore_alloc_size, 4			    //Number of SGPRs = (sgpr_size + 1) * 16   (non-zero value)
+
+	s_lshl_b32	s_restore_buf_rsrc2,	s_restore_alloc_size, 2			    //NUM_RECORDS in bytes
+	s_mov_b32	s_restore_buf_rsrc2,  0x1000000					    //NUM_RECORDS in bytes
+
+    s_mov_b32 m0, s_restore_alloc_size
+
+ L_RESTORE_SGPR_LOOP:
+    read_16sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset)	 //PV: further performance improvement can be made
+    s_waitcnt	    lgkmcnt(0)								    //ensure data ready
+
+    s_sub_u32 m0, m0, 16    // Restore from S[n] to S[0]
+    s_nop 0 // hazard SALU M0=> S_MOVREL
+
+    s_movreld_b64   s0, s0	//s[0+m0] = s0
+    s_movreld_b64   s2, s2
+    s_movreld_b64   s4, s4
+    s_movreld_b64   s6, s6
+    s_movreld_b64   s8, s8
+    s_movreld_b64   s10, s10
+    s_movreld_b64   s12, s12
+    s_movreld_b64   s14, s14
+
+    s_cmp_eq_u32    m0, 0		//scc = (m0 < s_restore_alloc_size) ? 1 : 0
+    s_cbranch_scc0  L_RESTORE_SGPR_LOOP		    //SGPR restore (except s0) is complete?
+
+    /*	    restore HW registers    */
+    //////////////////////////////
+  L_RESTORE_HWREG:
+
+
+    // HWREG SR memory offset : size(VGPR)+size(SGPR)
+    get_vgpr_size_bytes(s_restore_mem_offset)
+    get_sgpr_size_bytes(s_restore_tmp)
+    s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+
+
+    s_mov_b32	    s_restore_buf_rsrc2, 0x4						    //NUM_RECORDS   in bytes
+	s_mov_b32	s_restore_buf_rsrc2,  0x1000000					    //NUM_RECORDS in bytes
+
+    read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset)		    //M0
+    read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset)		//PC
+    read_hwreg_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset)
+    read_hwreg_from_mem(s_restore_exec_lo, s_restore_buf_rsrc0, s_restore_mem_offset)		    //EXEC
+    read_hwreg_from_mem(s_restore_exec_hi, s_restore_buf_rsrc0, s_restore_mem_offset)
+    read_hwreg_from_mem(s_restore_status, s_restore_buf_rsrc0, s_restore_mem_offset)		    //STATUS
+    read_hwreg_from_mem(s_restore_trapsts, s_restore_buf_rsrc0, s_restore_mem_offset)		    //TRAPSTS
+    read_hwreg_from_mem(xnack_mask_lo, s_restore_buf_rsrc0, s_restore_mem_offset)		    //XNACK_MASK_LO
+    read_hwreg_from_mem(xnack_mask_hi, s_restore_buf_rsrc0, s_restore_mem_offset)		    //XNACK_MASK_HI
+    read_hwreg_from_mem(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset)		//MODE
+
+    s_waitcnt	    lgkmcnt(0)											    //from now on, it is safe to restore STATUS and IB_STS
+
+    s_mov_b32	    m0,		s_restore_m0
+    s_mov_b32	    exec_lo,	s_restore_exec_lo
+    s_mov_b32	    exec_hi,	s_restore_exec_hi
+
+    s_and_b32	    s_restore_m0, SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK, s_restore_trapsts
+    s_setreg_b32    hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE), s_restore_m0
+    s_and_b32	    s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK, s_restore_trapsts
+    s_lshr_b32	    s_restore_m0, s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT
+    s_setreg_b32    hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE), s_restore_m0
+    //s_setreg_b32  hwreg(HW_REG_TRAPSTS),  s_restore_trapsts	   //don't overwrite SAVECTX bit as it may be set through external SAVECTX during restore
+    s_setreg_b32    hwreg(HW_REG_MODE),	    s_restore_mode
+
+    // Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
+    // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
+    get_vgpr_size_bytes(s_restore_ttmps_lo)
+    get_sgpr_size_bytes(s_restore_ttmps_hi)
+    s_add_u32	    s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_ttmps_hi
+    s_add_u32	    s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0
+    s_addc_u32	    s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0
+    s_and_b32	    s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF
+    s_load_dwordx4  [ttmp4, ttmp5, ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x50 glc:1
+    s_load_dwordx4  [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x60 glc:1
+    s_load_dword    ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 glc:1
+    s_waitcnt	    lgkmcnt(0)
+
+    restore_ib_sts(s_restore_tmp)
+
+    s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff	//pc[47:32]	   //Do it here in order not to affect STATUS
+    s_and_b64	 exec, exec, exec  // Restore STATUS.EXECZ, not writable by s_setreg_b32
+    s_and_b64	 vcc, vcc, vcc	// Restore STATUS.VCCZ, not writable by s_setreg_b32
+    set_status_without_spi_prio(s_restore_status, s_restore_tmp) // SCC is included, which is changed by previous salu
+
+    s_barrier							//barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time
+
+    s_rfe_b64 s_restore_pc_lo					//Return to the main shader program and resume execution
+
+
+/**************************************************************************/
+/*			the END						  */
+/**************************************************************************/
+L_END_PGM:
+    s_endpgm
+
+end
+
+
+/**************************************************************************/
+/*			the helper functions				  */
+/**************************************************************************/
+
+//Only for save hwreg to mem
+function write_hwreg_to_mem(s, s_rsrc, s_mem_offset)
+	s_mov_b32 exec_lo, m0			//assuming exec_lo is not needed anymore from this point on
+	s_mov_b32 m0, s_mem_offset
+	s_buffer_store_dword s, s_rsrc, m0	glc:1
+	ack_sqc_store_workaround()
+	s_add_u32	s_mem_offset, s_mem_offset, 4
+	s_mov_b32   m0, exec_lo
+end
+
+
+// HWREG are saved before SGPRs, so all HWREG could be use.
+function write_16sgpr_to_mem(s, s_rsrc, s_mem_offset)
+
+	s_buffer_store_dwordx4 s[0], s_rsrc, 0	glc:1
+	ack_sqc_store_workaround()
+	s_buffer_store_dwordx4 s[4], s_rsrc, 16	 glc:1
+	ack_sqc_store_workaround()
+	s_buffer_store_dwordx4 s[8], s_rsrc, 32	 glc:1
+	ack_sqc_store_workaround()
+	s_buffer_store_dwordx4 s[12], s_rsrc, 48 glc:1
+	ack_sqc_store_workaround()
+	s_add_u32	s_rsrc[0], s_rsrc[0], 4*16
+	s_addc_u32	s_rsrc[1], s_rsrc[1], 0x0	      // +scc
+end
+
+
+function read_hwreg_from_mem(s, s_rsrc, s_mem_offset)
+    s_buffer_load_dword s, s_rsrc, s_mem_offset	    glc:1
+    s_add_u32	    s_mem_offset, s_mem_offset, 4
+end
+
+function read_16sgpr_from_mem(s, s_rsrc, s_mem_offset)
+    s_buffer_load_dwordx16 s, s_rsrc, s_mem_offset	glc:1
+    s_sub_u32	    s_mem_offset, s_mem_offset, 4*16
+end
+
+function check_if_tcp_store_ok
+	// If STATUS.ALLOW_REPLAY=0 and TRAPSTS.XNACK_ERROR=1 then TCP stores will fail.
+	s_and_b32 s_save_tmp, s_save_status, SQ_WAVE_STATUS_ALLOW_REPLAY_MASK
+	s_cbranch_scc1 L_TCP_STORE_CHECK_DONE
+
+	s_getreg_b32 s_save_tmp, hwreg(HW_REG_TRAPSTS)
+	s_andn2_b32 s_save_tmp, SQ_WAVE_TRAPSTS_XNACK_ERROR_MASK, s_save_tmp
+
+L_TCP_STORE_CHECK_DONE:
+end
+
+function write_4vgprs_to_mem(s_rsrc, s_mem_offset)
+	buffer_store_dword v0, v0, s_rsrc, s_mem_offset slc:1 glc:1
+	buffer_store_dword v1, v0, s_rsrc, s_mem_offset slc:1 glc:1  offset:256
+	buffer_store_dword v2, v0, s_rsrc, s_mem_offset slc:1 glc:1  offset:256*2
+	buffer_store_dword v3, v0, s_rsrc, s_mem_offset slc:1 glc:1  offset:256*3
+end
+
+function read_4vgprs_from_mem(s_rsrc, s_mem_offset)
+	buffer_load_dword v0, v0, s_rsrc, s_mem_offset slc:1 glc:1
+	buffer_load_dword v1, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256
+	buffer_load_dword v2, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*2
+	buffer_load_dword v3, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*3
+	s_waitcnt vmcnt(0)
+end
+
+function write_vgpr_to_mem_with_sqc(v, s_rsrc, s_mem_offset)
+	s_mov_b32 s4, 0
+
+L_WRITE_VGPR_LANE_LOOP:
+	for var lane = 0; lane < 4; ++ lane
+		v_readlane_b32 s[lane], v, s4
+		s_add_u32 s4, s4, 1
+	end
+
+	s_buffer_store_dwordx4 s[0:3], s_rsrc, s_mem_offset glc:1
+	ack_sqc_store_workaround()
+
+	s_add_u32 s_mem_offset, s_mem_offset, 0x10
+	s_cmp_eq_u32 s4, 0x40
+	s_cbranch_scc0 L_WRITE_VGPR_LANE_LOOP
+end
+
+function write_vgprs_to_mem_with_sqc(v, n_vgprs, s_rsrc, s_mem_offset)
+	for var vgpr = 0; vgpr < n_vgprs; ++ vgpr
+		write_vgpr_to_mem_with_sqc(v[vgpr], s_rsrc, s_mem_offset)
+	end
+end
+
+function get_lds_size_bytes(s_lds_size_byte)
+    // SQ LDS granularity is 64DW, while PGM_RSRC2.lds_size is in granularity 128DW
+    s_getreg_b32   s_lds_size_byte, hwreg(HW_REG_LDS_ALLOC, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE)		// lds_size
+    s_lshl_b32	   s_lds_size_byte, s_lds_size_byte, 8			    //LDS size in dwords = lds_size * 64 *4Bytes    // granularity 64DW
+end
+
+function get_vgpr_size_bytes(s_vgpr_size_byte)
+    s_getreg_b32   s_vgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE)	 //vpgr_size
+    s_add_u32	   s_vgpr_size_byte, s_vgpr_size_byte, 1
+    s_lshl_b32	   s_vgpr_size_byte, s_vgpr_size_byte, (2+8) //Number of VGPRs = (vgpr_size + 1) * 4 * 64 * 4	(non-zero value)   //FIXME for GFX, zero is possible
+
+#if ASIC_FAMILY >= CHIP_ARCTURUS
+    s_lshl_b32     s_vgpr_size_byte, s_vgpr_size_byte, 1  // Double size for ACC VGPRs
+#endif
+end
+
+function get_sgpr_size_bytes(s_sgpr_size_byte)
+    s_getreg_b32   s_sgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE)	 //spgr_size
+    s_add_u32	   s_sgpr_size_byte, s_sgpr_size_byte, 1
+    s_lshl_b32	   s_sgpr_size_byte, s_sgpr_size_byte, 6 //Number of SGPRs = (sgpr_size + 1) * 16 *4   (non-zero value)
+end
+
+function get_hwreg_size_bytes
+    return 128 //HWREG size 128 bytes
+end
+
+function get_num_arch_vgprs(s_num_arch_vgprs)
+#if ASIC_FAMILY >= CHIP_ALDEBARAN
+    // VGPR count includes ACC VGPRs, use ACC VGPR offset for ARCH VGPR count.
+    s_getreg_b32    s_num_arch_vgprs, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_ACCV_OFFSET_SHIFT,SQ_WAVE_GPR_ALLOC_ACCV_OFFSET_SIZE)
+#else
+    s_getreg_b32    s_num_arch_vgprs, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE)
+#endif
+
+    // Number of VGPRs = (vgpr_size + 1) * 4
+    s_add_u32	    s_num_arch_vgprs, s_num_arch_vgprs, 1
+    s_lshl_b32	    s_num_arch_vgprs, s_num_arch_vgprs, 2
+end
+
+#if ASIC_FAMILY >= CHIP_ALDEBARAN
+function get_num_acc_vgprs(s_num_acc_vgprs, s_tmp)
+    // VGPR count = (GPR_ALLOC.VGPR_SIZE + 1) * 8
+    s_getreg_b32    s_num_acc_vgprs, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE)
+    s_add_u32	    s_num_acc_vgprs, s_num_acc_vgprs, 1
+    s_lshl_b32	    s_num_acc_vgprs, s_num_acc_vgprs, 3
+
+    // ACC VGPR count = VGPR count - ARCH VGPR count.
+    get_num_arch_vgprs(s_tmp)
+    s_sub_u32	    s_num_acc_vgprs, s_num_acc_vgprs, s_tmp
+end
+#endif
+
+function ack_sqc_store_workaround
+    if ACK_SQC_STORE
+        s_waitcnt lgkmcnt(0)
+    end
+end
+
+function set_status_without_spi_prio(status, tmp)
+    // Do not restore STATUS.SPI_PRIO since scheduler may have raised it.
+    s_lshr_b32      tmp, status, SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT
+    s_setreg_b32    hwreg(HW_REG_STATUS, SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT, SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE), tmp
+    s_nop           0x2 // avoid S_SETREG => S_SETREG hazard
+    s_setreg_b32    hwreg(HW_REG_STATUS, SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT, SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE), status
+end
+
+function save_and_clear_ib_sts(tmp)
+    // Save IB_STS.FIRST_REPLAY[15] and IB_STS.RCNT[20:16] into unused space s_save_ib_sts[31:26].
+    s_getreg_b32    tmp, hwreg(HW_REG_IB_STS)
+    s_and_b32       tmp, tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
+    s_lshl_b32      tmp, tmp, (TTMP_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
+    s_andn2_b32     s_save_ib_sts, s_save_ib_sts, TTMP_SAVE_RCNT_FIRST_REPLAY_MASK
+    s_or_b32        s_save_ib_sts, s_save_ib_sts, tmp
+    s_setreg_imm32_b32 hwreg(HW_REG_IB_STS), 0x0
+end
+
+function restore_ib_sts(tmp)
+    s_lshr_b32      tmp, s_save_ib_sts, (TTMP_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
+    s_and_b32       tmp, tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
+    s_setreg_b32    hwreg(HW_REG_IB_STS), tmp
+end
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
new file mode 100644
index 000000000..c37f1fcd2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -0,0 +1,3410 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/device.h>
+#include <linux/export.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/compat.h>
+#include <uapi/linux/kfd_ioctl.h>
+#include <linux/time.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/ptrace.h>
+#include <linux/dma-buf.h>
+#include <linux/fdtable.h>
+#include <linux/processor.h>
+#include "kfd_priv.h"
+#include "kfd_device_queue_manager.h"
+#include "kfd_svm.h"
+#include "amdgpu_amdkfd.h"
+#include "kfd_smi_events.h"
+#include "amdgpu_dma_buf.h"
+#include "kfd_debug.h"
+
+static long kfd_ioctl(struct file *, unsigned int, unsigned long);
+static int kfd_open(struct inode *, struct file *);
+static int kfd_release(struct inode *, struct file *);
+static int kfd_mmap(struct file *, struct vm_area_struct *);
+
+static const char kfd_dev_name[] = "kfd";
+
+static const struct file_operations kfd_fops = {
+	.owner = THIS_MODULE,
+	.unlocked_ioctl = kfd_ioctl,
+	.compat_ioctl = compat_ptr_ioctl,
+	.open = kfd_open,
+	.release = kfd_release,
+	.mmap = kfd_mmap,
+};
+
+static int kfd_char_dev_major = -1;
+static struct class *kfd_class;
+struct device *kfd_device;
+
+static inline struct kfd_process_device *kfd_lock_pdd_by_id(struct kfd_process *p, __u32 gpu_id)
+{
+	struct kfd_process_device *pdd;
+
+	mutex_lock(&p->mutex);
+	pdd = kfd_process_device_data_by_id(p, gpu_id);
+
+	if (pdd)
+		return pdd;
+
+	mutex_unlock(&p->mutex);
+	return NULL;
+}
+
+static inline void kfd_unlock_pdd(struct kfd_process_device *pdd)
+{
+	mutex_unlock(&pdd->process->mutex);
+}
+
+int kfd_chardev_init(void)
+{
+	int err = 0;
+
+	kfd_char_dev_major = register_chrdev(0, kfd_dev_name, &kfd_fops);
+	err = kfd_char_dev_major;
+	if (err < 0)
+		goto err_register_chrdev;
+
+	kfd_class = class_create(kfd_dev_name);
+	err = PTR_ERR(kfd_class);
+	if (IS_ERR(kfd_class))
+		goto err_class_create;
+
+	kfd_device = device_create(kfd_class, NULL,
+					MKDEV(kfd_char_dev_major, 0),
+					NULL, kfd_dev_name);
+	err = PTR_ERR(kfd_device);
+	if (IS_ERR(kfd_device))
+		goto err_device_create;
+
+	return 0;
+
+err_device_create:
+	class_destroy(kfd_class);
+err_class_create:
+	unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
+err_register_chrdev:
+	return err;
+}
+
+void kfd_chardev_exit(void)
+{
+	device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0));
+	class_destroy(kfd_class);
+	unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
+	kfd_device = NULL;
+}
+
+
+static int kfd_open(struct inode *inode, struct file *filep)
+{
+	struct kfd_process *process;
+	bool is_32bit_user_mode;
+
+	if (iminor(inode) != 0)
+		return -ENODEV;
+
+	is_32bit_user_mode = in_compat_syscall();
+
+	if (is_32bit_user_mode) {
+		dev_warn(kfd_device,
+			"Process %d (32-bit) failed to open /dev/kfd\n"
+			"32-bit processes are not supported by amdkfd\n",
+			current->pid);
+		return -EPERM;
+	}
+
+	process = kfd_create_process(current);
+	if (IS_ERR(process))
+		return PTR_ERR(process);
+
+	if (kfd_process_init_cwsr_apu(process, filep)) {
+		kfd_unref_process(process);
+		return -EFAULT;
+	}
+
+	/* filep now owns the reference returned by kfd_create_process */
+	filep->private_data = process;
+
+	dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
+		process->pasid, process->is_32bit_user_mode);
+
+	return 0;
+}
+
+static int kfd_release(struct inode *inode, struct file *filep)
+{
+	struct kfd_process *process = filep->private_data;
+
+	if (process)
+		kfd_unref_process(process);
+
+	return 0;
+}
+
+static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p,
+					void *data)
+{
+	struct kfd_ioctl_get_version_args *args = data;
+
+	args->major_version = KFD_IOCTL_MAJOR_VERSION;
+	args->minor_version = KFD_IOCTL_MINOR_VERSION;
+
+	return 0;
+}
+
+static int set_queue_properties_from_user(struct queue_properties *q_properties,
+				struct kfd_ioctl_create_queue_args *args)
+{
+	/*
+	 * Repurpose queue percentage to accommodate new features:
+	 * bit 0-7: queue percentage
+	 * bit 8-15: pm4_target_xcc
+	 */
+	if ((args->queue_percentage & 0xFF) > KFD_MAX_QUEUE_PERCENTAGE) {
+		pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
+		return -EINVAL;
+	}
+
+	if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
+		pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
+		return -EINVAL;
+	}
+
+	if ((args->ring_base_address) &&
+		(!access_ok((const void __user *) args->ring_base_address,
+			sizeof(uint64_t)))) {
+		pr_err("Can't access ring base address\n");
+		return -EFAULT;
+	}
+
+	if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
+		pr_err("Ring size must be a power of 2 or 0\n");
+		return -EINVAL;
+	}
+
+	if (!access_ok((const void __user *) args->read_pointer_address,
+			sizeof(uint32_t))) {
+		pr_err("Can't access read pointer\n");
+		return -EFAULT;
+	}
+
+	if (!access_ok((const void __user *) args->write_pointer_address,
+			sizeof(uint32_t))) {
+		pr_err("Can't access write pointer\n");
+		return -EFAULT;
+	}
+
+	if (args->eop_buffer_address &&
+		!access_ok((const void __user *) args->eop_buffer_address,
+			sizeof(uint32_t))) {
+		pr_debug("Can't access eop buffer");
+		return -EFAULT;
+	}
+
+	if (args->ctx_save_restore_address &&
+		!access_ok((const void __user *) args->ctx_save_restore_address,
+			sizeof(uint32_t))) {
+		pr_debug("Can't access ctx save restore buffer");
+		return -EFAULT;
+	}
+
+	q_properties->is_interop = false;
+	q_properties->is_gws = false;
+	q_properties->queue_percent = args->queue_percentage & 0xFF;
+	/* bit 8-15 are repurposed to be PM4 target XCC */
+	q_properties->pm4_target_xcc = (args->queue_percentage >> 8) & 0xFF;
+	q_properties->priority = args->queue_priority;
+	q_properties->queue_address = args->ring_base_address;
+	q_properties->queue_size = args->ring_size;
+	q_properties->read_ptr = (uint32_t *) args->read_pointer_address;
+	q_properties->write_ptr = (uint32_t *) args->write_pointer_address;
+	q_properties->eop_ring_buffer_address = args->eop_buffer_address;
+	q_properties->eop_ring_buffer_size = args->eop_buffer_size;
+	q_properties->ctx_save_restore_area_address =
+			args->ctx_save_restore_address;
+	q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;
+	q_properties->ctl_stack_size = args->ctl_stack_size;
+	if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
+		args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
+		q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
+	else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)
+		q_properties->type = KFD_QUEUE_TYPE_SDMA;
+	else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI)
+		q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI;
+	else
+		return -ENOTSUPP;
+
+	if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
+		q_properties->format = KFD_QUEUE_FORMAT_AQL;
+	else
+		q_properties->format = KFD_QUEUE_FORMAT_PM4;
+
+	pr_debug("Queue Percentage: %d, %d\n",
+			q_properties->queue_percent, args->queue_percentage);
+
+	pr_debug("Queue Priority: %d, %d\n",
+			q_properties->priority, args->queue_priority);
+
+	pr_debug("Queue Address: 0x%llX, 0x%llX\n",
+			q_properties->queue_address, args->ring_base_address);
+
+	pr_debug("Queue Size: 0x%llX, %u\n",
+			q_properties->queue_size, args->ring_size);
+
+	pr_debug("Queue r/w Pointers: %px, %px\n",
+			q_properties->read_ptr,
+			q_properties->write_ptr);
+
+	pr_debug("Queue Format: %d\n", q_properties->format);
+
+	pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address);
+
+	pr_debug("Queue CTX save area: 0x%llX\n",
+			q_properties->ctx_save_restore_area_address);
+
+	return 0;
+}
+
+static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
+					void *data)
+{
+	struct kfd_ioctl_create_queue_args *args = data;
+	struct kfd_node *dev;
+	int err = 0;
+	unsigned int queue_id;
+	struct kfd_process_device *pdd;
+	struct queue_properties q_properties;
+	uint32_t doorbell_offset_in_process = 0;
+	struct amdgpu_bo *wptr_bo = NULL;
+
+	memset(&q_properties, 0, sizeof(struct queue_properties));
+
+	pr_debug("Creating queue ioctl\n");
+
+	err = set_queue_properties_from_user(&q_properties, args);
+	if (err)
+		return err;
+
+	pr_debug("Looking for gpu id 0x%x\n", args->gpu_id);
+
+	mutex_lock(&p->mutex);
+
+	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+	if (!pdd) {
+		pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
+		err = -EINVAL;
+		goto err_pdd;
+	}
+	dev = pdd->dev;
+
+	pdd = kfd_bind_process_to_device(dev, p);
+	if (IS_ERR(pdd)) {
+		err = -ESRCH;
+		goto err_bind_process;
+	}
+
+	if (!pdd->qpd.proc_doorbells) {
+		err = kfd_alloc_process_doorbells(dev->kfd, pdd);
+		if (err) {
+			pr_debug("failed to allocate process doorbells\n");
+			goto err_bind_process;
+		}
+	}
+
+	/* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work
+	 * on unmapped queues for usermode queue oversubscription (no aggregated doorbell)
+	 */
+	if (dev->kfd->shared_resources.enable_mes &&
+			((dev->adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK)
+			>> AMDGPU_MES_API_VERSION_SHIFT) >= 2) {
+		struct amdgpu_bo_va_mapping *wptr_mapping;
+		struct amdgpu_vm *wptr_vm;
+
+		wptr_vm = drm_priv_to_vm(pdd->drm_priv);
+		err = amdgpu_bo_reserve(wptr_vm->root.bo, false);
+		if (err)
+			goto err_wptr_map_gart;
+
+		wptr_mapping = amdgpu_vm_bo_lookup_mapping(
+				wptr_vm, args->write_pointer_address >> PAGE_SHIFT);
+		amdgpu_bo_unreserve(wptr_vm->root.bo);
+		if (!wptr_mapping) {
+			pr_err("Failed to lookup wptr bo\n");
+			err = -EINVAL;
+			goto err_wptr_map_gart;
+		}
+
+		wptr_bo = wptr_mapping->bo_va->base.bo;
+		if (wptr_bo->tbo.base.size > PAGE_SIZE) {
+			pr_err("Requested GART mapping for wptr bo larger than one page\n");
+			err = -EINVAL;
+			goto err_wptr_map_gart;
+		}
+
+		err = amdgpu_amdkfd_map_gtt_bo_to_gart(dev->adev, wptr_bo);
+		if (err) {
+			pr_err("Failed to map wptr bo to GART\n");
+			goto err_wptr_map_gart;
+		}
+	}
+
+	pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n",
+			p->pasid,
+			dev->id);
+
+	err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, wptr_bo,
+			NULL, NULL, NULL, &doorbell_offset_in_process);
+	if (err != 0)
+		goto err_create_queue;
+
+	args->queue_id = queue_id;
+
+
+	/* Return gpu_id as doorbell offset for mmap usage */
+	args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
+	args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
+	if (KFD_IS_SOC15(dev))
+		/* On SOC15 ASICs, include the doorbell offset within the
+		 * process doorbell frame, which is 2 pages.
+		 */
+		args->doorbell_offset |= doorbell_offset_in_process;
+
+	mutex_unlock(&p->mutex);
+
+	pr_debug("Queue id %d was created successfully\n", args->queue_id);
+
+	pr_debug("Ring buffer address == 0x%016llX\n",
+			args->ring_base_address);
+
+	pr_debug("Read ptr address    == 0x%016llX\n",
+			args->read_pointer_address);
+
+	pr_debug("Write ptr address   == 0x%016llX\n",
+			args->write_pointer_address);
+
+	kfd_dbg_ev_raise(KFD_EC_MASK(EC_QUEUE_NEW), p, dev, queue_id, false, NULL, 0);
+	return 0;
+
+err_create_queue:
+	if (wptr_bo)
+		amdgpu_amdkfd_free_gtt_mem(dev->adev, wptr_bo);
+err_wptr_map_gart:
+err_bind_process:
+err_pdd:
+	mutex_unlock(&p->mutex);
+	return err;
+}
+
+static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,
+					void *data)
+{
+	int retval;
+	struct kfd_ioctl_destroy_queue_args *args = data;
+
+	pr_debug("Destroying queue id %d for pasid 0x%x\n",
+				args->queue_id,
+				p->pasid);
+
+	mutex_lock(&p->mutex);
+
+	retval = pqm_destroy_queue(&p->pqm, args->queue_id);
+
+	mutex_unlock(&p->mutex);
+	return retval;
+}
+
+static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
+					void *data)
+{
+	int retval;
+	struct kfd_ioctl_update_queue_args *args = data;
+	struct queue_properties properties;
+
+	/*
+	 * Repurpose queue percentage to accommodate new features:
+	 * bit 0-7: queue percentage
+	 * bit 8-15: pm4_target_xcc
+	 */
+	if ((args->queue_percentage & 0xFF) > KFD_MAX_QUEUE_PERCENTAGE) {
+		pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
+		return -EINVAL;
+	}
+
+	if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
+		pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
+		return -EINVAL;
+	}
+
+	if ((args->ring_base_address) &&
+		(!access_ok((const void __user *) args->ring_base_address,
+			sizeof(uint64_t)))) {
+		pr_err("Can't access ring base address\n");
+		return -EFAULT;
+	}
+
+	if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
+		pr_err("Ring size must be a power of 2 or 0\n");
+		return -EINVAL;
+	}
+
+	properties.queue_address = args->ring_base_address;
+	properties.queue_size = args->ring_size;
+	properties.queue_percent = args->queue_percentage & 0xFF;
+	/* bit 8-15 are repurposed to be PM4 target XCC */
+	properties.pm4_target_xcc = (args->queue_percentage >> 8) & 0xFF;
+	properties.priority = args->queue_priority;
+
+	pr_debug("Updating queue id %d for pasid 0x%x\n",
+			args->queue_id, p->pasid);
+
+	mutex_lock(&p->mutex);
+
+	retval = pqm_update_queue_properties(&p->pqm, args->queue_id, &properties);
+
+	mutex_unlock(&p->mutex);
+
+	return retval;
+}
+
+static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p,
+					void *data)
+{
+	int retval;
+	const int max_num_cus = 1024;
+	struct kfd_ioctl_set_cu_mask_args *args = data;
+	struct mqd_update_info minfo = {0};
+	uint32_t __user *cu_mask_ptr = (uint32_t __user *)args->cu_mask_ptr;
+	size_t cu_mask_size = sizeof(uint32_t) * (args->num_cu_mask / 32);
+
+	if ((args->num_cu_mask % 32) != 0) {
+		pr_debug("num_cu_mask 0x%x must be a multiple of 32",
+				args->num_cu_mask);
+		return -EINVAL;
+	}
+
+	minfo.cu_mask.count = args->num_cu_mask;
+	if (minfo.cu_mask.count == 0) {
+		pr_debug("CU mask cannot be 0");
+		return -EINVAL;
+	}
+
+	/* To prevent an unreasonably large CU mask size, set an arbitrary
+	 * limit of max_num_cus bits.  We can then just drop any CU mask bits
+	 * past max_num_cus bits and just use the first max_num_cus bits.
+	 */
+	if (minfo.cu_mask.count > max_num_cus) {
+		pr_debug("CU mask cannot be greater than 1024 bits");
+		minfo.cu_mask.count = max_num_cus;
+		cu_mask_size = sizeof(uint32_t) * (max_num_cus/32);
+	}
+
+	minfo.cu_mask.ptr = kzalloc(cu_mask_size, GFP_KERNEL);
+	if (!minfo.cu_mask.ptr)
+		return -ENOMEM;
+
+	retval = copy_from_user(minfo.cu_mask.ptr, cu_mask_ptr, cu_mask_size);
+	if (retval) {
+		pr_debug("Could not copy CU mask from userspace");
+		retval = -EFAULT;
+		goto out;
+	}
+
+	mutex_lock(&p->mutex);
+
+	retval = pqm_update_mqd(&p->pqm, args->queue_id, &minfo);
+
+	mutex_unlock(&p->mutex);
+
+out:
+	kfree(minfo.cu_mask.ptr);
+	return retval;
+}
+
+static int kfd_ioctl_get_queue_wave_state(struct file *filep,
+					  struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_get_queue_wave_state_args *args = data;
+	int r;
+
+	mutex_lock(&p->mutex);
+
+	r = pqm_get_wave_state(&p->pqm, args->queue_id,
+			       (void __user *)args->ctl_stack_address,
+			       &args->ctl_stack_used_size,
+			       &args->save_area_used_size);
+
+	mutex_unlock(&p->mutex);
+
+	return r;
+}
+
+static int kfd_ioctl_set_memory_policy(struct file *filep,
+					struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_set_memory_policy_args *args = data;
+	int err = 0;
+	struct kfd_process_device *pdd;
+	enum cache_policy default_policy, alternate_policy;
+
+	if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT
+	    && args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
+		return -EINVAL;
+	}
+
+	if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT
+	    && args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
+		return -EINVAL;
+	}
+
+	mutex_lock(&p->mutex);
+	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+	if (!pdd) {
+		pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
+		err = -EINVAL;
+		goto err_pdd;
+	}
+
+	pdd = kfd_bind_process_to_device(pdd->dev, p);
+	if (IS_ERR(pdd)) {
+		err = -ESRCH;
+		goto out;
+	}
+
+	default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT)
+			 ? cache_policy_coherent : cache_policy_noncoherent;
+
+	alternate_policy =
+		(args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)
+		   ? cache_policy_coherent : cache_policy_noncoherent;
+
+	if (!pdd->dev->dqm->ops.set_cache_memory_policy(pdd->dev->dqm,
+				&pdd->qpd,
+				default_policy,
+				alternate_policy,
+				(void __user *)args->alternate_aperture_base,
+				args->alternate_aperture_size))
+		err = -EINVAL;
+
+out:
+err_pdd:
+	mutex_unlock(&p->mutex);
+
+	return err;
+}
+
+static int kfd_ioctl_set_trap_handler(struct file *filep,
+					struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_set_trap_handler_args *args = data;
+	int err = 0;
+	struct kfd_process_device *pdd;
+
+	mutex_lock(&p->mutex);
+
+	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+	if (!pdd) {
+		err = -EINVAL;
+		goto err_pdd;
+	}
+
+	pdd = kfd_bind_process_to_device(pdd->dev, p);
+	if (IS_ERR(pdd)) {
+		err = -ESRCH;
+		goto out;
+	}
+
+	kfd_process_set_trap_handler(&pdd->qpd, args->tba_addr, args->tma_addr);
+
+out:
+err_pdd:
+	mutex_unlock(&p->mutex);
+
+	return err;
+}
+
+static int kfd_ioctl_dbg_register(struct file *filep,
+				struct kfd_process *p, void *data)
+{
+	return -EPERM;
+}
+
+static int kfd_ioctl_dbg_unregister(struct file *filep,
+				struct kfd_process *p, void *data)
+{
+	return -EPERM;
+}
+
+static int kfd_ioctl_dbg_address_watch(struct file *filep,
+					struct kfd_process *p, void *data)
+{
+	return -EPERM;
+}
+
+/* Parse and generate fixed size data structure for wave control */
+static int kfd_ioctl_dbg_wave_control(struct file *filep,
+					struct kfd_process *p, void *data)
+{
+	return -EPERM;
+}
+
+static int kfd_ioctl_get_clock_counters(struct file *filep,
+				struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_get_clock_counters_args *args = data;
+	struct kfd_process_device *pdd;
+
+	mutex_lock(&p->mutex);
+	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+	mutex_unlock(&p->mutex);
+	if (pdd)
+		/* Reading GPU clock counter from KGD */
+		args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(pdd->dev->adev);
+	else
+		/* Node without GPU resource */
+		args->gpu_clock_counter = 0;
+
+	/* No access to rdtsc. Using raw monotonic time */
+	args->cpu_clock_counter = ktime_get_raw_ns();
+	args->system_clock_counter = ktime_get_boottime_ns();
+
+	/* Since the counter is in nano-seconds we use 1GHz frequency */
+	args->system_clock_freq = 1000000000;
+
+	return 0;
+}
+
+
+static int kfd_ioctl_get_process_apertures(struct file *filp,
+				struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_get_process_apertures_args *args = data;
+	struct kfd_process_device_apertures *pAperture;
+	int i;
+
+	dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
+
+	args->num_of_nodes = 0;
+
+	mutex_lock(&p->mutex);
+	/* Run over all pdd of the process */
+	for (i = 0; i < p->n_pdds; i++) {
+		struct kfd_process_device *pdd = p->pdds[i];
+
+		pAperture =
+			&args->process_apertures[args->num_of_nodes];
+		pAperture->gpu_id = pdd->dev->id;
+		pAperture->lds_base = pdd->lds_base;
+		pAperture->lds_limit = pdd->lds_limit;
+		pAperture->gpuvm_base = pdd->gpuvm_base;
+		pAperture->gpuvm_limit = pdd->gpuvm_limit;
+		pAperture->scratch_base = pdd->scratch_base;
+		pAperture->scratch_limit = pdd->scratch_limit;
+
+		dev_dbg(kfd_device,
+			"node id %u\n", args->num_of_nodes);
+		dev_dbg(kfd_device,
+			"gpu id %u\n", pdd->dev->id);
+		dev_dbg(kfd_device,
+			"lds_base %llX\n", pdd->lds_base);
+		dev_dbg(kfd_device,
+			"lds_limit %llX\n", pdd->lds_limit);
+		dev_dbg(kfd_device,
+			"gpuvm_base %llX\n", pdd->gpuvm_base);
+		dev_dbg(kfd_device,
+			"gpuvm_limit %llX\n", pdd->gpuvm_limit);
+		dev_dbg(kfd_device,
+			"scratch_base %llX\n", pdd->scratch_base);
+		dev_dbg(kfd_device,
+			"scratch_limit %llX\n", pdd->scratch_limit);
+
+		if (++args->num_of_nodes >= NUM_OF_SUPPORTED_GPUS)
+			break;
+	}
+	mutex_unlock(&p->mutex);
+
+	return 0;
+}
+
+static int kfd_ioctl_get_process_apertures_new(struct file *filp,
+				struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_get_process_apertures_new_args *args = data;
+	struct kfd_process_device_apertures *pa;
+	int ret;
+	int i;
+
+	dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
+
+	if (args->num_of_nodes == 0) {
+		/* Return number of nodes, so that user space can alloacate
+		 * sufficient memory
+		 */
+		mutex_lock(&p->mutex);
+		args->num_of_nodes = p->n_pdds;
+		goto out_unlock;
+	}
+
+	/* Fill in process-aperture information for all available
+	 * nodes, but not more than args->num_of_nodes as that is
+	 * the amount of memory allocated by user
+	 */
+	pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
+				args->num_of_nodes), GFP_KERNEL);
+	if (!pa)
+		return -ENOMEM;
+
+	mutex_lock(&p->mutex);
+
+	if (!p->n_pdds) {
+		args->num_of_nodes = 0;
+		kfree(pa);
+		goto out_unlock;
+	}
+
+	/* Run over all pdd of the process */
+	for (i = 0; i < min(p->n_pdds, args->num_of_nodes); i++) {
+		struct kfd_process_device *pdd = p->pdds[i];
+
+		pa[i].gpu_id = pdd->dev->id;
+		pa[i].lds_base = pdd->lds_base;
+		pa[i].lds_limit = pdd->lds_limit;
+		pa[i].gpuvm_base = pdd->gpuvm_base;
+		pa[i].gpuvm_limit = pdd->gpuvm_limit;
+		pa[i].scratch_base = pdd->scratch_base;
+		pa[i].scratch_limit = pdd->scratch_limit;
+
+		dev_dbg(kfd_device,
+			"gpu id %u\n", pdd->dev->id);
+		dev_dbg(kfd_device,
+			"lds_base %llX\n", pdd->lds_base);
+		dev_dbg(kfd_device,
+			"lds_limit %llX\n", pdd->lds_limit);
+		dev_dbg(kfd_device,
+			"gpuvm_base %llX\n", pdd->gpuvm_base);
+		dev_dbg(kfd_device,
+			"gpuvm_limit %llX\n", pdd->gpuvm_limit);
+		dev_dbg(kfd_device,
+			"scratch_base %llX\n", pdd->scratch_base);
+		dev_dbg(kfd_device,
+			"scratch_limit %llX\n", pdd->scratch_limit);
+	}
+	mutex_unlock(&p->mutex);
+
+	args->num_of_nodes = i;
+	ret = copy_to_user(
+			(void __user *)args->kfd_process_device_apertures_ptr,
+			pa,
+			(i * sizeof(struct kfd_process_device_apertures)));
+	kfree(pa);
+	return ret ? -EFAULT : 0;
+
+out_unlock:
+	mutex_unlock(&p->mutex);
+	return 0;
+}
+
+static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
+					void *data)
+{
+	struct kfd_ioctl_create_event_args *args = data;
+	int err;
+
+	/* For dGPUs the event page is allocated in user mode. The
+	 * handle is passed to KFD with the first call to this IOCTL
+	 * through the event_page_offset field.
+	 */
+	if (args->event_page_offset) {
+		mutex_lock(&p->mutex);
+		err = kfd_kmap_event_page(p, args->event_page_offset);
+		mutex_unlock(&p->mutex);
+		if (err)
+			return err;
+	}
+
+	err = kfd_event_create(filp, p, args->event_type,
+				args->auto_reset != 0, args->node_id,
+				&args->event_id, &args->event_trigger_data,
+				&args->event_page_offset,
+				&args->event_slot_index);
+
+	pr_debug("Created event (id:0x%08x) (%s)\n", args->event_id, __func__);
+	return err;
+}
+
+static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p,
+					void *data)
+{
+	struct kfd_ioctl_destroy_event_args *args = data;
+
+	return kfd_event_destroy(p, args->event_id);
+}
+
+static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p,
+				void *data)
+{
+	struct kfd_ioctl_set_event_args *args = data;
+
+	return kfd_set_event(p, args->event_id);
+}
+
+static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p,
+				void *data)
+{
+	struct kfd_ioctl_reset_event_args *args = data;
+
+	return kfd_reset_event(p, args->event_id);
+}
+
+static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
+				void *data)
+{
+	struct kfd_ioctl_wait_events_args *args = data;
+
+	return kfd_wait_on_events(p, args->num_events,
+			(void __user *)args->events_ptr,
+			(args->wait_for_all != 0),
+			&args->timeout, &args->wait_result);
+}
+static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
+					struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_set_scratch_backing_va_args *args = data;
+	struct kfd_process_device *pdd;
+	struct kfd_node *dev;
+	long err;
+
+	mutex_lock(&p->mutex);
+	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+	if (!pdd) {
+		err = -EINVAL;
+		goto err_pdd;
+	}
+	dev = pdd->dev;
+
+	pdd = kfd_bind_process_to_device(dev, p);
+	if (IS_ERR(pdd)) {
+		err = PTR_ERR(pdd);
+		goto bind_process_to_device_fail;
+	}
+
+	pdd->qpd.sh_hidden_private_base = args->va_addr;
+
+	mutex_unlock(&p->mutex);
+
+	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&
+	    pdd->qpd.vmid != 0 && dev->kfd2kgd->set_scratch_backing_va)
+		dev->kfd2kgd->set_scratch_backing_va(
+			dev->adev, args->va_addr, pdd->qpd.vmid);
+
+	return 0;
+
+bind_process_to_device_fail:
+err_pdd:
+	mutex_unlock(&p->mutex);
+	return err;
+}
+
+static int kfd_ioctl_get_tile_config(struct file *filep,
+		struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_get_tile_config_args *args = data;
+	struct kfd_process_device *pdd;
+	struct tile_config config;
+	int err = 0;
+
+	mutex_lock(&p->mutex);
+	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+	mutex_unlock(&p->mutex);
+	if (!pdd)
+		return -EINVAL;
+
+	amdgpu_amdkfd_get_tile_config(pdd->dev->adev, &config);
+
+	args->gb_addr_config = config.gb_addr_config;
+	args->num_banks = config.num_banks;
+	args->num_ranks = config.num_ranks;
+
+	if (args->num_tile_configs > config.num_tile_configs)
+		args->num_tile_configs = config.num_tile_configs;
+	err = copy_to_user((void __user *)args->tile_config_ptr,
+			config.tile_config_ptr,
+			args->num_tile_configs * sizeof(uint32_t));
+	if (err) {
+		args->num_tile_configs = 0;
+		return -EFAULT;
+	}
+
+	if (args->num_macro_tile_configs > config.num_macro_tile_configs)
+		args->num_macro_tile_configs =
+				config.num_macro_tile_configs;
+	err = copy_to_user((void __user *)args->macro_tile_config_ptr,
+			config.macro_tile_config_ptr,
+			args->num_macro_tile_configs * sizeof(uint32_t));
+	if (err) {
+		args->num_macro_tile_configs = 0;
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,
+				void *data)
+{
+	struct kfd_ioctl_acquire_vm_args *args = data;
+	struct kfd_process_device *pdd;
+	struct file *drm_file;
+	int ret;
+
+	drm_file = fget(args->drm_fd);
+	if (!drm_file)
+		return -EINVAL;
+
+	mutex_lock(&p->mutex);
+	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+	if (!pdd) {
+		ret = -EINVAL;
+		goto err_pdd;
+	}
+
+	if (pdd->drm_file) {
+		ret = pdd->drm_file == drm_file ? 0 : -EBUSY;
+		goto err_drm_file;
+	}
+
+	ret = kfd_process_device_init_vm(pdd, drm_file);
+	if (ret)
+		goto err_unlock;
+
+	/* On success, the PDD keeps the drm_file reference */
+	mutex_unlock(&p->mutex);
+
+	return 0;
+
+err_unlock:
+err_pdd:
+err_drm_file:
+	mutex_unlock(&p->mutex);
+	fput(drm_file);
+	return ret;
+}
+
+bool kfd_dev_is_large_bar(struct kfd_node *dev)
+{
+	if (debug_largebar) {
+		pr_debug("Simulate large-bar allocation on non large-bar machine\n");
+		return true;
+	}
+
+	if (dev->local_mem_info.local_mem_size_private == 0 &&
+	    dev->local_mem_info.local_mem_size_public > 0)
+		return true;
+
+	if (dev->local_mem_info.local_mem_size_public == 0 &&
+	    dev->kfd->adev->gmc.is_app_apu) {
+		pr_debug("APP APU, Consider like a large bar system\n");
+		return true;
+	}
+
+	return false;
+}
+
+static int kfd_ioctl_get_available_memory(struct file *filep,
+					  struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_get_available_memory_args *args = data;
+	struct kfd_process_device *pdd = kfd_lock_pdd_by_id(p, args->gpu_id);
+
+	if (!pdd)
+		return -EINVAL;
+	args->available = amdgpu_amdkfd_get_available_memory(pdd->dev->adev,
+							pdd->dev->node_id);
+	kfd_unlock_pdd(pdd);
+	return 0;
+}
+
+static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
+					struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_alloc_memory_of_gpu_args *args = data;
+	struct kfd_process_device *pdd;
+	void *mem;
+	struct kfd_node *dev;
+	int idr_handle;
+	long err;
+	uint64_t offset = args->mmap_offset;
+	uint32_t flags = args->flags;
+
+	if (args->size == 0)
+		return -EINVAL;
+
+#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
+	/* Flush pending deferred work to avoid racing with deferred actions
+	 * from previous memory map changes (e.g. munmap).
+	 */
+	svm_range_list_lock_and_flush_work(&p->svms, current->mm);
+	mutex_lock(&p->svms.lock);
+	mmap_write_unlock(current->mm);
+	if (interval_tree_iter_first(&p->svms.objects,
+				     args->va_addr >> PAGE_SHIFT,
+				     (args->va_addr + args->size - 1) >> PAGE_SHIFT)) {
+		pr_err("Address: 0x%llx already allocated by SVM\n",
+			args->va_addr);
+		mutex_unlock(&p->svms.lock);
+		return -EADDRINUSE;
+	}
+
+	/* When register user buffer check if it has been registered by svm by
+	 * buffer cpu virtual address.
+	 */
+	if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) &&
+	    interval_tree_iter_first(&p->svms.objects,
+				     args->mmap_offset >> PAGE_SHIFT,
+				     (args->mmap_offset  + args->size - 1) >> PAGE_SHIFT)) {
+		pr_err("User Buffer Address: 0x%llx already allocated by SVM\n",
+			args->mmap_offset);
+		mutex_unlock(&p->svms.lock);
+		return -EADDRINUSE;
+	}
+
+	mutex_unlock(&p->svms.lock);
+#endif
+	mutex_lock(&p->mutex);
+	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+	if (!pdd) {
+		err = -EINVAL;
+		goto err_pdd;
+	}
+
+	dev = pdd->dev;
+
+	if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) &&
+		(flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&
+		!kfd_dev_is_large_bar(dev)) {
+		pr_err("Alloc host visible vram on small bar is not allowed\n");
+		err = -EINVAL;
+		goto err_large_bar;
+	}
+
+	pdd = kfd_bind_process_to_device(dev, p);
+	if (IS_ERR(pdd)) {
+		err = PTR_ERR(pdd);
+		goto err_unlock;
+	}
+
+	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
+		if (args->size != kfd_doorbell_process_slice(dev->kfd)) {
+			err = -EINVAL;
+			goto err_unlock;
+		}
+		offset = kfd_get_process_doorbells(pdd);
+		if (!offset) {
+			err = -ENOMEM;
+			goto err_unlock;
+		}
+	} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
+		if (args->size != PAGE_SIZE) {
+			err = -EINVAL;
+			goto err_unlock;
+		}
+		offset = dev->adev->rmmio_remap.bus_addr;
+		if (!offset) {
+			err = -ENOMEM;
+			goto err_unlock;
+		}
+	}
+
+	err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
+		dev->adev, args->va_addr, args->size,
+		pdd->drm_priv, (struct kgd_mem **) &mem, &offset,
+		flags, false);
+
+	if (err)
+		goto err_unlock;
+
+	idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
+	if (idr_handle < 0) {
+		err = -EFAULT;
+		goto err_free;
+	}
+
+	/* Update the VRAM usage count */
+	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+		uint64_t size = args->size;
+
+		if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM)
+			size >>= 1;
+		WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + PAGE_ALIGN(size));
+	}
+
+	mutex_unlock(&p->mutex);
+
+	args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
+	args->mmap_offset = offset;
+
+	/* MMIO is mapped through kfd device
+	 * Generate a kfd mmap offset
+	 */
+	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)
+		args->mmap_offset = KFD_MMAP_TYPE_MMIO
+					| KFD_MMAP_GPU_ID(args->gpu_id);
+
+	return 0;
+
+err_free:
+	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, (struct kgd_mem *)mem,
+					       pdd->drm_priv, NULL);
+err_unlock:
+err_pdd:
+err_large_bar:
+	mutex_unlock(&p->mutex);
+	return err;
+}
+
+static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
+					struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_free_memory_of_gpu_args *args = data;
+	struct kfd_process_device *pdd;
+	void *mem;
+	int ret;
+	uint64_t size = 0;
+
+	mutex_lock(&p->mutex);
+	/*
+	 * Safeguard to prevent user space from freeing signal BO.
+	 * It will be freed at process termination.
+	 */
+	if (p->signal_handle && (p->signal_handle == args->handle)) {
+		pr_err("Free signal BO is not allowed\n");
+		ret = -EPERM;
+		goto err_unlock;
+	}
+
+	pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle));
+	if (!pdd) {
+		pr_err("Process device data doesn't exist\n");
+		ret = -EINVAL;
+		goto err_pdd;
+	}
+
+	mem = kfd_process_device_translate_handle(
+		pdd, GET_IDR_HANDLE(args->handle));
+	if (!mem) {
+		ret = -EINVAL;
+		goto err_unlock;
+	}
+
+	ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev,
+				(struct kgd_mem *)mem, pdd->drm_priv, &size);
+
+	/* If freeing the buffer failed, leave the handle in place for
+	 * clean-up during process tear-down.
+	 */
+	if (!ret)
+		kfd_process_device_remove_obj_handle(
+			pdd, GET_IDR_HANDLE(args->handle));
+
+	WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size);
+
+err_unlock:
+err_pdd:
+	mutex_unlock(&p->mutex);
+	return ret;
+}
+
+static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
+					struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_map_memory_to_gpu_args *args = data;
+	struct kfd_process_device *pdd, *peer_pdd;
+	void *mem;
+	struct kfd_node *dev;
+	long err = 0;
+	int i;
+	uint32_t *devices_arr = NULL;
+
+	if (!args->n_devices) {
+		pr_debug("Device IDs array empty\n");
+		return -EINVAL;
+	}
+	if (args->n_success > args->n_devices) {
+		pr_debug("n_success exceeds n_devices\n");
+		return -EINVAL;
+	}
+
+	devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
+				    GFP_KERNEL);
+	if (!devices_arr)
+		return -ENOMEM;
+
+	err = copy_from_user(devices_arr,
+			     (void __user *)args->device_ids_array_ptr,
+			     args->n_devices * sizeof(*devices_arr));
+	if (err != 0) {
+		err = -EFAULT;
+		goto copy_from_user_failed;
+	}
+
+	mutex_lock(&p->mutex);
+	pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle));
+	if (!pdd) {
+		err = -EINVAL;
+		goto get_process_device_data_failed;
+	}
+	dev = pdd->dev;
+
+	pdd = kfd_bind_process_to_device(dev, p);
+	if (IS_ERR(pdd)) {
+		err = PTR_ERR(pdd);
+		goto bind_process_to_device_failed;
+	}
+
+	mem = kfd_process_device_translate_handle(pdd,
+						GET_IDR_HANDLE(args->handle));
+	if (!mem) {
+		err = -ENOMEM;
+		goto get_mem_obj_from_handle_failed;
+	}
+
+	for (i = args->n_success; i < args->n_devices; i++) {
+		peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
+		if (!peer_pdd) {
+			pr_debug("Getting device by id failed for 0x%x\n",
+				 devices_arr[i]);
+			err = -EINVAL;
+			goto get_mem_obj_from_handle_failed;
+		}
+
+		peer_pdd = kfd_bind_process_to_device(peer_pdd->dev, p);
+		if (IS_ERR(peer_pdd)) {
+			err = PTR_ERR(peer_pdd);
+			goto get_mem_obj_from_handle_failed;
+		}
+
+		err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
+			peer_pdd->dev->adev, (struct kgd_mem *)mem,
+			peer_pdd->drm_priv);
+		if (err) {
+			struct pci_dev *pdev = peer_pdd->dev->adev->pdev;
+
+			dev_err(dev->adev->dev,
+			       "Failed to map peer:%04x:%02x:%02x.%d mem_domain:%d\n",
+			       pci_domain_nr(pdev->bus),
+			       pdev->bus->number,
+			       PCI_SLOT(pdev->devfn),
+			       PCI_FUNC(pdev->devfn),
+			       ((struct kgd_mem *)mem)->domain);
+			goto map_memory_to_gpu_failed;
+		}
+		args->n_success = i+1;
+	}
+
+	err = amdgpu_amdkfd_gpuvm_sync_memory(dev->adev, (struct kgd_mem *) mem, true);
+	if (err) {
+		pr_debug("Sync memory failed, wait interrupted by user signal\n");
+		goto sync_memory_failed;
+	}
+
+	mutex_unlock(&p->mutex);
+
+	/* Flush TLBs after waiting for the page table updates to complete */
+	for (i = 0; i < args->n_devices; i++) {
+		peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
+		if (WARN_ON_ONCE(!peer_pdd))
+			continue;
+		kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY);
+	}
+	kfree(devices_arr);
+
+	return err;
+
+get_process_device_data_failed:
+bind_process_to_device_failed:
+get_mem_obj_from_handle_failed:
+map_memory_to_gpu_failed:
+sync_memory_failed:
+	mutex_unlock(&p->mutex);
+copy_from_user_failed:
+	kfree(devices_arr);
+
+	return err;
+}
+
+static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
+					struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_unmap_memory_from_gpu_args *args = data;
+	struct kfd_process_device *pdd, *peer_pdd;
+	void *mem;
+	long err = 0;
+	uint32_t *devices_arr = NULL, i;
+	bool flush_tlb;
+
+	if (!args->n_devices) {
+		pr_debug("Device IDs array empty\n");
+		return -EINVAL;
+	}
+	if (args->n_success > args->n_devices) {
+		pr_debug("n_success exceeds n_devices\n");
+		return -EINVAL;
+	}
+
+	devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
+				    GFP_KERNEL);
+	if (!devices_arr)
+		return -ENOMEM;
+
+	err = copy_from_user(devices_arr,
+			     (void __user *)args->device_ids_array_ptr,
+			     args->n_devices * sizeof(*devices_arr));
+	if (err != 0) {
+		err = -EFAULT;
+		goto copy_from_user_failed;
+	}
+
+	mutex_lock(&p->mutex);
+	pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle));
+	if (!pdd) {
+		err = -EINVAL;
+		goto bind_process_to_device_failed;
+	}
+
+	mem = kfd_process_device_translate_handle(pdd,
+						GET_IDR_HANDLE(args->handle));
+	if (!mem) {
+		err = -ENOMEM;
+		goto get_mem_obj_from_handle_failed;
+	}
+
+	for (i = args->n_success; i < args->n_devices; i++) {
+		peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
+		if (!peer_pdd) {
+			err = -EINVAL;
+			goto get_mem_obj_from_handle_failed;
+		}
+		err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
+			peer_pdd->dev->adev, (struct kgd_mem *)mem, peer_pdd->drm_priv);
+		if (err) {
+			pr_err("Failed to unmap from gpu %d/%d\n",
+			       i, args->n_devices);
+			goto unmap_memory_from_gpu_failed;
+		}
+		args->n_success = i+1;
+	}
+
+	flush_tlb = kfd_flush_tlb_after_unmap(pdd->dev->kfd);
+	if (flush_tlb) {
+		err = amdgpu_amdkfd_gpuvm_sync_memory(pdd->dev->adev,
+				(struct kgd_mem *) mem, true);
+		if (err) {
+			pr_debug("Sync memory failed, wait interrupted by user signal\n");
+			goto sync_memory_failed;
+		}
+	}
+	mutex_unlock(&p->mutex);
+
+	if (flush_tlb) {
+		/* Flush TLBs after waiting for the page table updates to complete */
+		for (i = 0; i < args->n_devices; i++) {
+			peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
+			if (WARN_ON_ONCE(!peer_pdd))
+				continue;
+			kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
+		}
+	}
+	kfree(devices_arr);
+
+	return 0;
+
+bind_process_to_device_failed:
+get_mem_obj_from_handle_failed:
+unmap_memory_from_gpu_failed:
+sync_memory_failed:
+	mutex_unlock(&p->mutex);
+copy_from_user_failed:
+	kfree(devices_arr);
+	return err;
+}
+
+static int kfd_ioctl_alloc_queue_gws(struct file *filep,
+		struct kfd_process *p, void *data)
+{
+	int retval;
+	struct kfd_ioctl_alloc_queue_gws_args *args = data;
+	struct queue *q;
+	struct kfd_node *dev;
+
+	mutex_lock(&p->mutex);
+	q = pqm_get_user_queue(&p->pqm, args->queue_id);
+
+	if (q) {
+		dev = q->device;
+	} else {
+		retval = -EINVAL;
+		goto out_unlock;
+	}
+
+	if (!dev->gws) {
+		retval = -ENODEV;
+		goto out_unlock;
+	}
+
+	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
+		retval = -ENODEV;
+		goto out_unlock;
+	}
+
+	if (p->debug_trap_enabled && (!kfd_dbg_has_gws_support(dev) ||
+				      kfd_dbg_has_cwsr_workaround(dev))) {
+		retval = -EBUSY;
+		goto out_unlock;
+	}
+
+	retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL);
+	mutex_unlock(&p->mutex);
+
+	args->first_gws = 0;
+	return retval;
+
+out_unlock:
+	mutex_unlock(&p->mutex);
+	return retval;
+}
+
+static int kfd_ioctl_get_dmabuf_info(struct file *filep,
+		struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_get_dmabuf_info_args *args = data;
+	struct kfd_node *dev = NULL;
+	struct amdgpu_device *dmabuf_adev;
+	void *metadata_buffer = NULL;
+	uint32_t flags;
+	int8_t xcp_id;
+	unsigned int i;
+	int r;
+
+	/* Find a KFD GPU device that supports the get_dmabuf_info query */
+	for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++)
+		if (dev)
+			break;
+	if (!dev)
+		return -EINVAL;
+
+	if (args->metadata_ptr) {
+		metadata_buffer = kzalloc(args->metadata_size, GFP_KERNEL);
+		if (!metadata_buffer)
+			return -ENOMEM;
+	}
+
+	/* Get dmabuf info from KGD */
+	r = amdgpu_amdkfd_get_dmabuf_info(dev->adev, args->dmabuf_fd,
+					  &dmabuf_adev, &args->size,
+					  metadata_buffer, args->metadata_size,
+					  &args->metadata_size, &flags, &xcp_id);
+	if (r)
+		goto exit;
+
+	if (xcp_id >= 0)
+		args->gpu_id = dmabuf_adev->kfd.dev->nodes[xcp_id]->id;
+	else
+		args->gpu_id = dmabuf_adev->kfd.dev->nodes[0]->id;
+	args->flags = flags;
+
+	/* Copy metadata buffer to user mode */
+	if (metadata_buffer) {
+		r = copy_to_user((void __user *)args->metadata_ptr,
+				 metadata_buffer, args->metadata_size);
+		if (r != 0)
+			r = -EFAULT;
+	}
+
+exit:
+	kfree(metadata_buffer);
+
+	return r;
+}
+
+static int kfd_ioctl_import_dmabuf(struct file *filep,
+				   struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_import_dmabuf_args *args = data;
+	struct kfd_process_device *pdd;
+	struct dma_buf *dmabuf;
+	int idr_handle;
+	uint64_t size;
+	void *mem;
+	int r;
+
+	dmabuf = dma_buf_get(args->dmabuf_fd);
+	if (IS_ERR(dmabuf))
+		return PTR_ERR(dmabuf);
+
+	mutex_lock(&p->mutex);
+	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
+	if (!pdd) {
+		r = -EINVAL;
+		goto err_unlock;
+	}
+
+	pdd = kfd_bind_process_to_device(pdd->dev, p);
+	if (IS_ERR(pdd)) {
+		r = PTR_ERR(pdd);
+		goto err_unlock;
+	}
+
+	r = amdgpu_amdkfd_gpuvm_import_dmabuf(pdd->dev->adev, dmabuf,
+					      args->va_addr, pdd->drm_priv,
+					      (struct kgd_mem **)&mem, &size,
+					      NULL);
+	if (r)
+		goto err_unlock;
+
+	idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
+	if (idr_handle < 0) {
+		r = -EFAULT;
+		goto err_free;
+	}
+
+	mutex_unlock(&p->mutex);
+	dma_buf_put(dmabuf);
+
+	args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
+
+	return 0;
+
+err_free:
+	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, (struct kgd_mem *)mem,
+					       pdd->drm_priv, NULL);
+err_unlock:
+	mutex_unlock(&p->mutex);
+	dma_buf_put(dmabuf);
+	return r;
+}
+
+static int kfd_ioctl_export_dmabuf(struct file *filep,
+				   struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_export_dmabuf_args *args = data;
+	struct kfd_process_device *pdd;
+	struct dma_buf *dmabuf;
+	struct kfd_node *dev;
+	void *mem;
+	int ret = 0;
+
+	dev = kfd_device_by_id(GET_GPU_ID(args->handle));
+	if (!dev)
+		return -EINVAL;
+
+	mutex_lock(&p->mutex);
+
+	pdd = kfd_get_process_device_data(dev, p);
+	if (!pdd) {
+		ret = -EINVAL;
+		goto err_unlock;
+	}
+
+	mem = kfd_process_device_translate_handle(pdd,
+						GET_IDR_HANDLE(args->handle));
+	if (!mem) {
+		ret = -EINVAL;
+		goto err_unlock;
+	}
+
+	ret = amdgpu_amdkfd_gpuvm_export_dmabuf(mem, &dmabuf);
+	mutex_unlock(&p->mutex);
+	if (ret)
+		goto err_out;
+
+	ret = dma_buf_fd(dmabuf, args->flags);
+	if (ret < 0) {
+		dma_buf_put(dmabuf);
+		goto err_out;
+	}
+	/* dma_buf_fd assigns the reference count to the fd, no need to
+	 * put the reference here.
+	 */
+	args->dmabuf_fd = ret;
+
+	return 0;
+
+err_unlock:
+	mutex_unlock(&p->mutex);
+err_out:
+	return ret;
+}
+
+/* Handle requests for watching SMI events */
+static int kfd_ioctl_smi_events(struct file *filep,
+				struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_smi_events_args *args = data;
+	struct kfd_process_device *pdd;
+
+	mutex_lock(&p->mutex);
+
+	pdd = kfd_process_device_data_by_id(p, args->gpuid);
+	mutex_unlock(&p->mutex);
+	if (!pdd)
+		return -EINVAL;
+
+	return kfd_smi_event_open(pdd->dev, &args->anon_fd);
+}
+
+#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
+
+static int kfd_ioctl_set_xnack_mode(struct file *filep,
+				    struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_set_xnack_mode_args *args = data;
+	int r = 0;
+
+	mutex_lock(&p->mutex);
+	if (args->xnack_enabled >= 0) {
+		if (!list_empty(&p->pqm.queues)) {
+			pr_debug("Process has user queues running\n");
+			r = -EBUSY;
+			goto out_unlock;
+		}
+
+		if (p->xnack_enabled == args->xnack_enabled)
+			goto out_unlock;
+
+		if (args->xnack_enabled && !kfd_process_xnack_mode(p, true)) {
+			r = -EPERM;
+			goto out_unlock;
+		}
+
+		r = svm_range_switch_xnack_reserve_mem(p, args->xnack_enabled);
+	} else {
+		args->xnack_enabled = p->xnack_enabled;
+	}
+
+out_unlock:
+	mutex_unlock(&p->mutex);
+
+	return r;
+}
+
+static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_svm_args *args = data;
+	int r = 0;
+
+	pr_debug("start 0x%llx size 0x%llx op 0x%x nattr 0x%x\n",
+		 args->start_addr, args->size, args->op, args->nattr);
+
+	if ((args->start_addr & ~PAGE_MASK) || (args->size & ~PAGE_MASK))
+		return -EINVAL;
+	if (!args->start_addr || !args->size)
+		return -EINVAL;
+
+	r = svm_ioctl(p, args->op, args->start_addr, args->size, args->nattr,
+		      args->attrs);
+
+	return r;
+}
+#else
+static int kfd_ioctl_set_xnack_mode(struct file *filep,
+				    struct kfd_process *p, void *data)
+{
+	return -EPERM;
+}
+static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
+{
+	return -EPERM;
+}
+#endif
+
+static int criu_checkpoint_process(struct kfd_process *p,
+			     uint8_t __user *user_priv_data,
+			     uint64_t *priv_offset)
+{
+	struct kfd_criu_process_priv_data process_priv;
+	int ret;
+
+	memset(&process_priv, 0, sizeof(process_priv));
+
+	process_priv.version = KFD_CRIU_PRIV_VERSION;
+	/* For CR, we don't consider negative xnack mode which is used for
+	 * querying without changing it, here 0 simply means disabled and 1
+	 * means enabled so retry for finding a valid PTE.
+	 */
+	process_priv.xnack_mode = p->xnack_enabled ? 1 : 0;
+
+	ret = copy_to_user(user_priv_data + *priv_offset,
+				&process_priv, sizeof(process_priv));
+
+	if (ret) {
+		pr_err("Failed to copy process information to user\n");
+		ret = -EFAULT;
+	}
+
+	*priv_offset += sizeof(process_priv);
+	return ret;
+}
+
+static int criu_checkpoint_devices(struct kfd_process *p,
+			     uint32_t num_devices,
+			     uint8_t __user *user_addr,
+			     uint8_t __user *user_priv_data,
+			     uint64_t *priv_offset)
+{
+	struct kfd_criu_device_priv_data *device_priv = NULL;
+	struct kfd_criu_device_bucket *device_buckets = NULL;
+	int ret = 0, i;
+
+	device_buckets = kvzalloc(num_devices * sizeof(*device_buckets), GFP_KERNEL);
+	if (!device_buckets) {
+		ret = -ENOMEM;
+		goto exit;
+	}
+
+	device_priv = kvzalloc(num_devices * sizeof(*device_priv), GFP_KERNEL);
+	if (!device_priv) {
+		ret = -ENOMEM;
+		goto exit;
+	}
+
+	for (i = 0; i < num_devices; i++) {
+		struct kfd_process_device *pdd = p->pdds[i];
+
+		device_buckets[i].user_gpu_id = pdd->user_gpu_id;
+		device_buckets[i].actual_gpu_id = pdd->dev->id;
+
+		/*
+		 * priv_data does not contain useful information for now and is reserved for
+		 * future use, so we do not set its contents.
+		 */
+	}
+
+	ret = copy_to_user(user_addr, device_buckets, num_devices * sizeof(*device_buckets));
+	if (ret) {
+		pr_err("Failed to copy device information to user\n");
+		ret = -EFAULT;
+		goto exit;
+	}
+
+	ret = copy_to_user(user_priv_data + *priv_offset,
+			   device_priv,
+			   num_devices * sizeof(*device_priv));
+	if (ret) {
+		pr_err("Failed to copy device information to user\n");
+		ret = -EFAULT;
+	}
+	*priv_offset += num_devices * sizeof(*device_priv);
+
+exit:
+	kvfree(device_buckets);
+	kvfree(device_priv);
+	return ret;
+}
+
+static uint32_t get_process_num_bos(struct kfd_process *p)
+{
+	uint32_t num_of_bos = 0;
+	int i;
+
+	/* Run over all PDDs of the process */
+	for (i = 0; i < p->n_pdds; i++) {
+		struct kfd_process_device *pdd = p->pdds[i];
+		void *mem;
+		int id;
+
+		idr_for_each_entry(&pdd->alloc_idr, mem, id) {
+			struct kgd_mem *kgd_mem = (struct kgd_mem *)mem;
+
+			if (!kgd_mem->va || kgd_mem->va > pdd->gpuvm_base)
+				num_of_bos++;
+		}
+	}
+	return num_of_bos;
+}
+
+static int criu_get_prime_handle(struct kgd_mem *mem, int flags,
+				      u32 *shared_fd)
+{
+	struct dma_buf *dmabuf;
+	int ret;
+
+	ret = amdgpu_amdkfd_gpuvm_export_dmabuf(mem, &dmabuf);
+	if (ret) {
+		pr_err("dmabuf export failed for the BO\n");
+		return ret;
+	}
+
+	ret = dma_buf_fd(dmabuf, flags);
+	if (ret < 0) {
+		pr_err("dmabuf create fd failed, ret:%d\n", ret);
+		goto out_free_dmabuf;
+	}
+
+	*shared_fd = ret;
+	return 0;
+
+out_free_dmabuf:
+	dma_buf_put(dmabuf);
+	return ret;
+}
+
+static int criu_checkpoint_bos(struct kfd_process *p,
+			       uint32_t num_bos,
+			       uint8_t __user *user_bos,
+			       uint8_t __user *user_priv_data,
+			       uint64_t *priv_offset)
+{
+	struct kfd_criu_bo_bucket *bo_buckets;
+	struct kfd_criu_bo_priv_data *bo_privs;
+	int ret = 0, pdd_index, bo_index = 0, id;
+	void *mem;
+
+	bo_buckets = kvzalloc(num_bos * sizeof(*bo_buckets), GFP_KERNEL);
+	if (!bo_buckets)
+		return -ENOMEM;
+
+	bo_privs = kvzalloc(num_bos * sizeof(*bo_privs), GFP_KERNEL);
+	if (!bo_privs) {
+		ret = -ENOMEM;
+		goto exit;
+	}
+
+	for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) {
+		struct kfd_process_device *pdd = p->pdds[pdd_index];
+		struct amdgpu_bo *dumper_bo;
+		struct kgd_mem *kgd_mem;
+
+		idr_for_each_entry(&pdd->alloc_idr, mem, id) {
+			struct kfd_criu_bo_bucket *bo_bucket;
+			struct kfd_criu_bo_priv_data *bo_priv;
+			int i, dev_idx = 0;
+
+			if (!mem) {
+				ret = -ENOMEM;
+				goto exit;
+			}
+
+			kgd_mem = (struct kgd_mem *)mem;
+			dumper_bo = kgd_mem->bo;
+
+			/* Skip checkpointing BOs that are used for Trap handler
+			 * code and state. Currently, these BOs have a VA that
+			 * is less GPUVM Base
+			 */
+			if (kgd_mem->va && kgd_mem->va <= pdd->gpuvm_base)
+				continue;
+
+			bo_bucket = &bo_buckets[bo_index];
+			bo_priv = &bo_privs[bo_index];
+
+			bo_bucket->gpu_id = pdd->user_gpu_id;
+			bo_bucket->addr = (uint64_t)kgd_mem->va;
+			bo_bucket->size = amdgpu_bo_size(dumper_bo);
+			bo_bucket->alloc_flags = (uint32_t)kgd_mem->alloc_flags;
+			bo_priv->idr_handle = id;
+
+			if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
+				ret = amdgpu_ttm_tt_get_userptr(&dumper_bo->tbo,
+								&bo_priv->user_addr);
+				if (ret) {
+					pr_err("Failed to obtain user address for user-pointer bo\n");
+					goto exit;
+				}
+			}
+			if (bo_bucket->alloc_flags
+			    & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {
+				ret = criu_get_prime_handle(kgd_mem,
+						bo_bucket->alloc_flags &
+						KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DRM_RDWR : 0,
+						&bo_bucket->dmabuf_fd);
+				if (ret)
+					goto exit;
+			} else {
+				bo_bucket->dmabuf_fd = KFD_INVALID_FD;
+			}
+
+			if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL)
+				bo_bucket->offset = KFD_MMAP_TYPE_DOORBELL |
+					KFD_MMAP_GPU_ID(pdd->dev->id);
+			else if (bo_bucket->alloc_flags &
+				KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)
+				bo_bucket->offset = KFD_MMAP_TYPE_MMIO |
+					KFD_MMAP_GPU_ID(pdd->dev->id);
+			else
+				bo_bucket->offset = amdgpu_bo_mmap_offset(dumper_bo);
+
+			for (i = 0; i < p->n_pdds; i++) {
+				if (amdgpu_amdkfd_bo_mapped_to_dev(p->pdds[i]->dev->adev, kgd_mem))
+					bo_priv->mapped_gpuids[dev_idx++] = p->pdds[i]->user_gpu_id;
+			}
+
+			pr_debug("bo_size = 0x%llx, bo_addr = 0x%llx bo_offset = 0x%llx\n"
+					"gpu_id = 0x%x alloc_flags = 0x%x idr_handle = 0x%x",
+					bo_bucket->size,
+					bo_bucket->addr,
+					bo_bucket->offset,
+					bo_bucket->gpu_id,
+					bo_bucket->alloc_flags,
+					bo_priv->idr_handle);
+			bo_index++;
+		}
+	}
+
+	ret = copy_to_user(user_bos, bo_buckets, num_bos * sizeof(*bo_buckets));
+	if (ret) {
+		pr_err("Failed to copy BO information to user\n");
+		ret = -EFAULT;
+		goto exit;
+	}
+
+	ret = copy_to_user(user_priv_data + *priv_offset, bo_privs, num_bos * sizeof(*bo_privs));
+	if (ret) {
+		pr_err("Failed to copy BO priv information to user\n");
+		ret = -EFAULT;
+		goto exit;
+	}
+
+	*priv_offset += num_bos * sizeof(*bo_privs);
+
+exit:
+	while (ret && bo_index--) {
+		if (bo_buckets[bo_index].alloc_flags
+		    & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT))
+			close_fd(bo_buckets[bo_index].dmabuf_fd);
+	}
+
+	kvfree(bo_buckets);
+	kvfree(bo_privs);
+	return ret;
+}
+
+static int criu_get_process_object_info(struct kfd_process *p,
+					uint32_t *num_devices,
+					uint32_t *num_bos,
+					uint32_t *num_objects,
+					uint64_t *objs_priv_size)
+{
+	uint64_t queues_priv_data_size, svm_priv_data_size, priv_size;
+	uint32_t num_queues, num_events, num_svm_ranges;
+	int ret;
+
+	*num_devices = p->n_pdds;
+	*num_bos = get_process_num_bos(p);
+
+	ret = kfd_process_get_queue_info(p, &num_queues, &queues_priv_data_size);
+	if (ret)
+		return ret;
+
+	num_events = kfd_get_num_events(p);
+
+	ret = svm_range_get_info(p, &num_svm_ranges, &svm_priv_data_size);
+	if (ret)
+		return ret;
+
+	*num_objects = num_queues + num_events + num_svm_ranges;
+
+	if (objs_priv_size) {
+		priv_size = sizeof(struct kfd_criu_process_priv_data);
+		priv_size += *num_devices * sizeof(struct kfd_criu_device_priv_data);
+		priv_size += *num_bos * sizeof(struct kfd_criu_bo_priv_data);
+		priv_size += queues_priv_data_size;
+		priv_size += num_events * sizeof(struct kfd_criu_event_priv_data);
+		priv_size += svm_priv_data_size;
+		*objs_priv_size = priv_size;
+	}
+	return 0;
+}
+
+static int criu_checkpoint(struct file *filep,
+			   struct kfd_process *p,
+			   struct kfd_ioctl_criu_args *args)
+{
+	int ret;
+	uint32_t num_devices, num_bos, num_objects;
+	uint64_t priv_size, priv_offset = 0, bo_priv_offset;
+
+	if (!args->devices || !args->bos || !args->priv_data)
+		return -EINVAL;
+
+	mutex_lock(&p->mutex);
+
+	if (!p->n_pdds) {
+		pr_err("No pdd for given process\n");
+		ret = -ENODEV;
+		goto exit_unlock;
+	}
+
+	/* Confirm all process queues are evicted */
+	if (!p->queues_paused) {
+		pr_err("Cannot dump process when queues are not in evicted state\n");
+		/* CRIU plugin did not call op PROCESS_INFO before checkpointing */
+		ret = -EINVAL;
+		goto exit_unlock;
+	}
+
+	ret = criu_get_process_object_info(p, &num_devices, &num_bos, &num_objects, &priv_size);
+	if (ret)
+		goto exit_unlock;
+
+	if (num_devices != args->num_devices ||
+	    num_bos != args->num_bos ||
+	    num_objects != args->num_objects ||
+	    priv_size != args->priv_data_size) {
+
+		ret = -EINVAL;
+		goto exit_unlock;
+	}
+
+	/* each function will store private data inside priv_data and adjust priv_offset */
+	ret = criu_checkpoint_process(p, (uint8_t __user *)args->priv_data, &priv_offset);
+	if (ret)
+		goto exit_unlock;
+
+	ret = criu_checkpoint_devices(p, num_devices, (uint8_t __user *)args->devices,
+				(uint8_t __user *)args->priv_data, &priv_offset);
+	if (ret)
+		goto exit_unlock;
+
+	/* Leave room for BOs in the private data. They need to be restored
+	 * before events, but we checkpoint them last to simplify the error
+	 * handling.
+	 */
+	bo_priv_offset = priv_offset;
+	priv_offset += num_bos * sizeof(struct kfd_criu_bo_priv_data);
+
+	if (num_objects) {
+		ret = kfd_criu_checkpoint_queues(p, (uint8_t __user *)args->priv_data,
+						 &priv_offset);
+		if (ret)
+			goto exit_unlock;
+
+		ret = kfd_criu_checkpoint_events(p, (uint8_t __user *)args->priv_data,
+						 &priv_offset);
+		if (ret)
+			goto exit_unlock;
+
+		ret = kfd_criu_checkpoint_svm(p, (uint8_t __user *)args->priv_data, &priv_offset);
+		if (ret)
+			goto exit_unlock;
+	}
+
+	/* This must be the last thing in this function that can fail.
+	 * Otherwise we leak dmabuf file descriptors.
+	 */
+	ret = criu_checkpoint_bos(p, num_bos, (uint8_t __user *)args->bos,
+			   (uint8_t __user *)args->priv_data, &bo_priv_offset);
+
+exit_unlock:
+	mutex_unlock(&p->mutex);
+	if (ret)
+		pr_err("Failed to dump CRIU ret:%d\n", ret);
+	else
+		pr_debug("CRIU dump ret:%d\n", ret);
+
+	return ret;
+}
+
+static int criu_restore_process(struct kfd_process *p,
+				struct kfd_ioctl_criu_args *args,
+				uint64_t *priv_offset,
+				uint64_t max_priv_data_size)
+{
+	int ret = 0;
+	struct kfd_criu_process_priv_data process_priv;
+
+	if (*priv_offset + sizeof(process_priv) > max_priv_data_size)
+		return -EINVAL;
+
+	ret = copy_from_user(&process_priv,
+				(void __user *)(args->priv_data + *priv_offset),
+				sizeof(process_priv));
+	if (ret) {
+		pr_err("Failed to copy process private information from user\n");
+		ret = -EFAULT;
+		goto exit;
+	}
+	*priv_offset += sizeof(process_priv);
+
+	if (process_priv.version != KFD_CRIU_PRIV_VERSION) {
+		pr_err("Invalid CRIU API version (checkpointed:%d current:%d)\n",
+			process_priv.version, KFD_CRIU_PRIV_VERSION);
+		return -EINVAL;
+	}
+
+	pr_debug("Setting XNACK mode\n");
+	if (process_priv.xnack_mode && !kfd_process_xnack_mode(p, true)) {
+		pr_err("xnack mode cannot be set\n");
+		ret = -EPERM;
+		goto exit;
+	} else {
+		pr_debug("set xnack mode: %d\n", process_priv.xnack_mode);
+		p->xnack_enabled = process_priv.xnack_mode;
+	}
+
+exit:
+	return ret;
+}
+
+static int criu_restore_devices(struct kfd_process *p,
+				struct kfd_ioctl_criu_args *args,
+				uint64_t *priv_offset,
+				uint64_t max_priv_data_size)
+{
+	struct kfd_criu_device_bucket *device_buckets;
+	struct kfd_criu_device_priv_data *device_privs;
+	int ret = 0;
+	uint32_t i;
+
+	if (args->num_devices != p->n_pdds)
+		return -EINVAL;
+
+	if (*priv_offset + (args->num_devices * sizeof(*device_privs)) > max_priv_data_size)
+		return -EINVAL;
+
+	device_buckets = kmalloc_array(args->num_devices, sizeof(*device_buckets), GFP_KERNEL);
+	if (!device_buckets)
+		return -ENOMEM;
+
+	ret = copy_from_user(device_buckets, (void __user *)args->devices,
+				args->num_devices * sizeof(*device_buckets));
+	if (ret) {
+		pr_err("Failed to copy devices buckets from user\n");
+		ret = -EFAULT;
+		goto exit;
+	}
+
+	for (i = 0; i < args->num_devices; i++) {
+		struct kfd_node *dev;
+		struct kfd_process_device *pdd;
+		struct file *drm_file;
+
+		/* device private data is not currently used */
+
+		if (!device_buckets[i].user_gpu_id) {
+			pr_err("Invalid user gpu_id\n");
+			ret = -EINVAL;
+			goto exit;
+		}
+
+		dev = kfd_device_by_id(device_buckets[i].actual_gpu_id);
+		if (!dev) {
+			pr_err("Failed to find device with gpu_id = %x\n",
+				device_buckets[i].actual_gpu_id);
+			ret = -EINVAL;
+			goto exit;
+		}
+
+		pdd = kfd_get_process_device_data(dev, p);
+		if (!pdd) {
+			pr_err("Failed to get pdd for gpu_id = %x\n",
+					device_buckets[i].actual_gpu_id);
+			ret = -EINVAL;
+			goto exit;
+		}
+		pdd->user_gpu_id = device_buckets[i].user_gpu_id;
+
+		drm_file = fget(device_buckets[i].drm_fd);
+		if (!drm_file) {
+			pr_err("Invalid render node file descriptor sent from plugin (%d)\n",
+				device_buckets[i].drm_fd);
+			ret = -EINVAL;
+			goto exit;
+		}
+
+		if (pdd->drm_file) {
+			ret = -EINVAL;
+			goto exit;
+		}
+
+		/* create the vm using render nodes for kfd pdd */
+		if (kfd_process_device_init_vm(pdd, drm_file)) {
+			pr_err("could not init vm for given pdd\n");
+			/* On success, the PDD keeps the drm_file reference */
+			fput(drm_file);
+			ret = -EINVAL;
+			goto exit;
+		}
+		/*
+		 * pdd now already has the vm bound to render node so below api won't create a new
+		 * exclusive kfd mapping but use existing one with renderDXXX but is still needed
+		 * for iommu v2 binding  and runtime pm.
+		 */
+		pdd = kfd_bind_process_to_device(dev, p);
+		if (IS_ERR(pdd)) {
+			ret = PTR_ERR(pdd);
+			goto exit;
+		}
+
+		if (!pdd->qpd.proc_doorbells) {
+			ret = kfd_alloc_process_doorbells(dev->kfd, pdd);
+			if (ret)
+				goto exit;
+		}
+	}
+
+	/*
+	 * We are not copying device private data from user as we are not using the data for now,
+	 * but we still adjust for its private data.
+	 */
+	*priv_offset += args->num_devices * sizeof(*device_privs);
+
+exit:
+	kfree(device_buckets);
+	return ret;
+}
+
+static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd,
+				      struct kfd_criu_bo_bucket *bo_bucket,
+				      struct kfd_criu_bo_priv_data *bo_priv,
+				      struct kgd_mem **kgd_mem)
+{
+	int idr_handle;
+	int ret;
+	const bool criu_resume = true;
+	u64 offset;
+
+	if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
+		if (bo_bucket->size !=
+				kfd_doorbell_process_slice(pdd->dev->kfd))
+			return -EINVAL;
+
+		offset = kfd_get_process_doorbells(pdd);
+		if (!offset)
+			return -ENOMEM;
+	} else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
+		/* MMIO BOs need remapped bus address */
+		if (bo_bucket->size != PAGE_SIZE) {
+			pr_err("Invalid page size\n");
+			return -EINVAL;
+		}
+		offset = pdd->dev->adev->rmmio_remap.bus_addr;
+		if (!offset) {
+			pr_err("amdgpu_amdkfd_get_mmio_remap_phys_addr failed\n");
+			return -ENOMEM;
+		}
+	} else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
+		offset = bo_priv->user_addr;
+	}
+	/* Create the BO */
+	ret = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(pdd->dev->adev, bo_bucket->addr,
+						      bo_bucket->size, pdd->drm_priv, kgd_mem,
+						      &offset, bo_bucket->alloc_flags, criu_resume);
+	if (ret) {
+		pr_err("Could not create the BO\n");
+		return ret;
+	}
+	pr_debug("New BO created: size:0x%llx addr:0x%llx offset:0x%llx\n",
+		 bo_bucket->size, bo_bucket->addr, offset);
+
+	/* Restore previous IDR handle */
+	pr_debug("Restoring old IDR handle for the BO");
+	idr_handle = idr_alloc(&pdd->alloc_idr, *kgd_mem, bo_priv->idr_handle,
+			       bo_priv->idr_handle + 1, GFP_KERNEL);
+
+	if (idr_handle < 0) {
+		pr_err("Could not allocate idr\n");
+		amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, *kgd_mem, pdd->drm_priv,
+						       NULL);
+		return -ENOMEM;
+	}
+
+	if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL)
+		bo_bucket->restored_offset = KFD_MMAP_TYPE_DOORBELL | KFD_MMAP_GPU_ID(pdd->dev->id);
+	if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
+		bo_bucket->restored_offset = KFD_MMAP_TYPE_MMIO | KFD_MMAP_GPU_ID(pdd->dev->id);
+	} else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
+		bo_bucket->restored_offset = offset;
+	} else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+		bo_bucket->restored_offset = offset;
+		/* Update the VRAM usage count */
+		WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + bo_bucket->size);
+	}
+	return 0;
+}
+
+static int criu_restore_bo(struct kfd_process *p,
+			   struct kfd_criu_bo_bucket *bo_bucket,
+			   struct kfd_criu_bo_priv_data *bo_priv)
+{
+	struct kfd_process_device *pdd;
+	struct kgd_mem *kgd_mem;
+	int ret;
+	int j;
+
+	pr_debug("Restoring BO size:0x%llx addr:0x%llx gpu_id:0x%x flags:0x%x idr_handle:0x%x\n",
+		 bo_bucket->size, bo_bucket->addr, bo_bucket->gpu_id, bo_bucket->alloc_flags,
+		 bo_priv->idr_handle);
+
+	pdd = kfd_process_device_data_by_id(p, bo_bucket->gpu_id);
+	if (!pdd) {
+		pr_err("Failed to get pdd\n");
+		return -ENODEV;
+	}
+
+	ret = criu_restore_memory_of_gpu(pdd, bo_bucket, bo_priv, &kgd_mem);
+	if (ret)
+		return ret;
+
+	/* now map these BOs to GPU/s */
+	for (j = 0; j < p->n_pdds; j++) {
+		struct kfd_node *peer;
+		struct kfd_process_device *peer_pdd;
+
+		if (!bo_priv->mapped_gpuids[j])
+			break;
+
+		peer_pdd = kfd_process_device_data_by_id(p, bo_priv->mapped_gpuids[j]);
+		if (!peer_pdd)
+			return -EINVAL;
+
+		peer = peer_pdd->dev;
+
+		peer_pdd = kfd_bind_process_to_device(peer, p);
+		if (IS_ERR(peer_pdd))
+			return PTR_ERR(peer_pdd);
+
+		ret = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(peer->adev, kgd_mem,
+							    peer_pdd->drm_priv);
+		if (ret) {
+			pr_err("Failed to map to gpu %d/%d\n", j, p->n_pdds);
+			return ret;
+		}
+	}
+
+	pr_debug("map memory was successful for the BO\n");
+	/* create the dmabuf object and export the bo */
+	if (bo_bucket->alloc_flags
+	    & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {
+		ret = criu_get_prime_handle(kgd_mem, DRM_RDWR,
+					    &bo_bucket->dmabuf_fd);
+		if (ret)
+			return ret;
+	} else {
+		bo_bucket->dmabuf_fd = KFD_INVALID_FD;
+	}
+
+	return 0;
+}
+
+static int criu_restore_bos(struct kfd_process *p,
+			    struct kfd_ioctl_criu_args *args,
+			    uint64_t *priv_offset,
+			    uint64_t max_priv_data_size)
+{
+	struct kfd_criu_bo_bucket *bo_buckets = NULL;
+	struct kfd_criu_bo_priv_data *bo_privs = NULL;
+	int ret = 0;
+	uint32_t i = 0;
+
+	if (*priv_offset + (args->num_bos * sizeof(*bo_privs)) > max_priv_data_size)
+		return -EINVAL;
+
+	/* Prevent MMU notifications until stage-4 IOCTL (CRIU_RESUME) is received */
+	amdgpu_amdkfd_block_mmu_notifications(p->kgd_process_info);
+
+	bo_buckets = kvmalloc_array(args->num_bos, sizeof(*bo_buckets), GFP_KERNEL);
+	if (!bo_buckets)
+		return -ENOMEM;
+
+	ret = copy_from_user(bo_buckets, (void __user *)args->bos,
+			     args->num_bos * sizeof(*bo_buckets));
+	if (ret) {
+		pr_err("Failed to copy BOs information from user\n");
+		ret = -EFAULT;
+		goto exit;
+	}
+
+	bo_privs = kvmalloc_array(args->num_bos, sizeof(*bo_privs), GFP_KERNEL);
+	if (!bo_privs) {
+		ret = -ENOMEM;
+		goto exit;
+	}
+
+	ret = copy_from_user(bo_privs, (void __user *)args->priv_data + *priv_offset,
+			     args->num_bos * sizeof(*bo_privs));
+	if (ret) {
+		pr_err("Failed to copy BOs information from user\n");
+		ret = -EFAULT;
+		goto exit;
+	}
+	*priv_offset += args->num_bos * sizeof(*bo_privs);
+
+	/* Create and map new BOs */
+	for (; i < args->num_bos; i++) {
+		ret = criu_restore_bo(p, &bo_buckets[i], &bo_privs[i]);
+		if (ret) {
+			pr_debug("Failed to restore BO[%d] ret%d\n", i, ret);
+			goto exit;
+		}
+	} /* done */
+
+	/* Copy only the buckets back so user can read bo_buckets[N].restored_offset */
+	ret = copy_to_user((void __user *)args->bos,
+				bo_buckets,
+				(args->num_bos * sizeof(*bo_buckets)));
+	if (ret)
+		ret = -EFAULT;
+
+exit:
+	while (ret && i--) {
+		if (bo_buckets[i].alloc_flags
+		   & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT))
+			close_fd(bo_buckets[i].dmabuf_fd);
+	}
+	kvfree(bo_buckets);
+	kvfree(bo_privs);
+	return ret;
+}
+
+static int criu_restore_objects(struct file *filep,
+				struct kfd_process *p,
+				struct kfd_ioctl_criu_args *args,
+				uint64_t *priv_offset,
+				uint64_t max_priv_data_size)
+{
+	int ret = 0;
+	uint32_t i;
+
+	BUILD_BUG_ON(offsetof(struct kfd_criu_queue_priv_data, object_type));
+	BUILD_BUG_ON(offsetof(struct kfd_criu_event_priv_data, object_type));
+	BUILD_BUG_ON(offsetof(struct kfd_criu_svm_range_priv_data, object_type));
+
+	for (i = 0; i < args->num_objects; i++) {
+		uint32_t object_type;
+
+		if (*priv_offset + sizeof(object_type) > max_priv_data_size) {
+			pr_err("Invalid private data size\n");
+			return -EINVAL;
+		}
+
+		ret = get_user(object_type, (uint32_t __user *)(args->priv_data + *priv_offset));
+		if (ret) {
+			pr_err("Failed to copy private information from user\n");
+			goto exit;
+		}
+
+		switch (object_type) {
+		case KFD_CRIU_OBJECT_TYPE_QUEUE:
+			ret = kfd_criu_restore_queue(p, (uint8_t __user *)args->priv_data,
+						     priv_offset, max_priv_data_size);
+			if (ret)
+				goto exit;
+			break;
+		case KFD_CRIU_OBJECT_TYPE_EVENT:
+			ret = kfd_criu_restore_event(filep, p, (uint8_t __user *)args->priv_data,
+						     priv_offset, max_priv_data_size);
+			if (ret)
+				goto exit;
+			break;
+		case KFD_CRIU_OBJECT_TYPE_SVM_RANGE:
+			ret = kfd_criu_restore_svm(p, (uint8_t __user *)args->priv_data,
+						     priv_offset, max_priv_data_size);
+			if (ret)
+				goto exit;
+			break;
+		default:
+			pr_err("Invalid object type:%u at index:%d\n", object_type, i);
+			ret = -EINVAL;
+			goto exit;
+		}
+	}
+exit:
+	return ret;
+}
+
+static int criu_restore(struct file *filep,
+			struct kfd_process *p,
+			struct kfd_ioctl_criu_args *args)
+{
+	uint64_t priv_offset = 0;
+	int ret = 0;
+
+	pr_debug("CRIU restore (num_devices:%u num_bos:%u num_objects:%u priv_data_size:%llu)\n",
+		 args->num_devices, args->num_bos, args->num_objects, args->priv_data_size);
+
+	if (!args->bos || !args->devices || !args->priv_data || !args->priv_data_size ||
+	    !args->num_devices || !args->num_bos)
+		return -EINVAL;
+
+	mutex_lock(&p->mutex);
+
+	/*
+	 * Set the process to evicted state to avoid running any new queues before all the memory
+	 * mappings are ready.
+	 */
+	ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_RESTORE);
+	if (ret)
+		goto exit_unlock;
+
+	/* Each function will adjust priv_offset based on how many bytes they consumed */
+	ret = criu_restore_process(p, args, &priv_offset, args->priv_data_size);
+	if (ret)
+		goto exit_unlock;
+
+	ret = criu_restore_devices(p, args, &priv_offset, args->priv_data_size);
+	if (ret)
+		goto exit_unlock;
+
+	ret = criu_restore_bos(p, args, &priv_offset, args->priv_data_size);
+	if (ret)
+		goto exit_unlock;
+
+	ret = criu_restore_objects(filep, p, args, &priv_offset, args->priv_data_size);
+	if (ret)
+		goto exit_unlock;
+
+	if (priv_offset != args->priv_data_size) {
+		pr_err("Invalid private data size\n");
+		ret = -EINVAL;
+	}
+
+exit_unlock:
+	mutex_unlock(&p->mutex);
+	if (ret)
+		pr_err("Failed to restore CRIU ret:%d\n", ret);
+	else
+		pr_debug("CRIU restore successful\n");
+
+	return ret;
+}
+
+static int criu_unpause(struct file *filep,
+			struct kfd_process *p,
+			struct kfd_ioctl_criu_args *args)
+{
+	int ret;
+
+	mutex_lock(&p->mutex);
+
+	if (!p->queues_paused) {
+		mutex_unlock(&p->mutex);
+		return -EINVAL;
+	}
+
+	ret = kfd_process_restore_queues(p);
+	if (ret)
+		pr_err("Failed to unpause queues ret:%d\n", ret);
+	else
+		p->queues_paused = false;
+
+	mutex_unlock(&p->mutex);
+
+	return ret;
+}
+
+static int criu_resume(struct file *filep,
+			struct kfd_process *p,
+			struct kfd_ioctl_criu_args *args)
+{
+	struct kfd_process *target = NULL;
+	struct pid *pid = NULL;
+	int ret = 0;
+
+	pr_debug("Inside %s, target pid for criu restore: %d\n", __func__,
+		 args->pid);
+
+	pid = find_get_pid(args->pid);
+	if (!pid) {
+		pr_err("Cannot find pid info for %i\n", args->pid);
+		return -ESRCH;
+	}
+
+	pr_debug("calling kfd_lookup_process_by_pid\n");
+	target = kfd_lookup_process_by_pid(pid);
+
+	put_pid(pid);
+
+	if (!target) {
+		pr_debug("Cannot find process info for %i\n", args->pid);
+		return -ESRCH;
+	}
+
+	mutex_lock(&target->mutex);
+	ret = kfd_criu_resume_svm(target);
+	if (ret) {
+		pr_err("kfd_criu_resume_svm failed for %i\n", args->pid);
+		goto exit;
+	}
+
+	ret =  amdgpu_amdkfd_criu_resume(target->kgd_process_info);
+	if (ret)
+		pr_err("amdgpu_amdkfd_criu_resume failed for %i\n", args->pid);
+
+exit:
+	mutex_unlock(&target->mutex);
+
+	kfd_unref_process(target);
+	return ret;
+}
+
+static int criu_process_info(struct file *filep,
+				struct kfd_process *p,
+				struct kfd_ioctl_criu_args *args)
+{
+	int ret = 0;
+
+	mutex_lock(&p->mutex);
+
+	if (!p->n_pdds) {
+		pr_err("No pdd for given process\n");
+		ret = -ENODEV;
+		goto err_unlock;
+	}
+
+	ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_CHECKPOINT);
+	if (ret)
+		goto err_unlock;
+
+	p->queues_paused = true;
+
+	args->pid = task_pid_nr_ns(p->lead_thread,
+					task_active_pid_ns(p->lead_thread));
+
+	ret = criu_get_process_object_info(p, &args->num_devices, &args->num_bos,
+					   &args->num_objects, &args->priv_data_size);
+	if (ret)
+		goto err_unlock;
+
+	dev_dbg(kfd_device, "Num of devices:%u bos:%u objects:%u priv_data_size:%lld\n",
+				args->num_devices, args->num_bos, args->num_objects,
+				args->priv_data_size);
+
+err_unlock:
+	if (ret) {
+		kfd_process_restore_queues(p);
+		p->queues_paused = false;
+	}
+	mutex_unlock(&p->mutex);
+	return ret;
+}
+
+static int kfd_ioctl_criu(struct file *filep, struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_criu_args *args = data;
+	int ret;
+
+	dev_dbg(kfd_device, "CRIU operation: %d\n", args->op);
+	switch (args->op) {
+	case KFD_CRIU_OP_PROCESS_INFO:
+		ret = criu_process_info(filep, p, args);
+		break;
+	case KFD_CRIU_OP_CHECKPOINT:
+		ret = criu_checkpoint(filep, p, args);
+		break;
+	case KFD_CRIU_OP_UNPAUSE:
+		ret = criu_unpause(filep, p, args);
+		break;
+	case KFD_CRIU_OP_RESTORE:
+		ret = criu_restore(filep, p, args);
+		break;
+	case KFD_CRIU_OP_RESUME:
+		ret = criu_resume(filep, p, args);
+		break;
+	default:
+		dev_dbg(kfd_device, "Unsupported CRIU operation:%d\n", args->op);
+		ret = -EINVAL;
+		break;
+	}
+
+	if (ret)
+		dev_dbg(kfd_device, "CRIU operation:%d err:%d\n", args->op, ret);
+
+	return ret;
+}
+
+static int runtime_enable(struct kfd_process *p, uint64_t r_debug,
+			bool enable_ttmp_setup)
+{
+	int i = 0, ret = 0;
+
+	if (p->is_runtime_retry)
+		goto retry;
+
+	if (p->runtime_info.runtime_state != DEBUG_RUNTIME_STATE_DISABLED)
+		return -EBUSY;
+
+	for (i = 0; i < p->n_pdds; i++) {
+		struct kfd_process_device *pdd = p->pdds[i];
+
+		if (pdd->qpd.queue_count)
+			return -EEXIST;
+
+		/*
+		 * Setup TTMPs by default.
+		 * Note that this call must remain here for MES ADD QUEUE to
+		 * skip_process_ctx_clear unconditionally as the first call to
+		 * SET_SHADER_DEBUGGER clears any stale process context data
+		 * saved in MES.
+		 */
+		if (pdd->dev->kfd->shared_resources.enable_mes)
+			kfd_dbg_set_mes_debug_mode(pdd, !kfd_dbg_has_cwsr_workaround(pdd->dev));
+	}
+
+	p->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_ENABLED;
+	p->runtime_info.r_debug = r_debug;
+	p->runtime_info.ttmp_setup = enable_ttmp_setup;
+
+	if (p->runtime_info.ttmp_setup) {
+		for (i = 0; i < p->n_pdds; i++) {
+			struct kfd_process_device *pdd = p->pdds[i];
+
+			if (!kfd_dbg_is_rlc_restore_supported(pdd->dev)) {
+				amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
+				pdd->dev->kfd2kgd->enable_debug_trap(
+						pdd->dev->adev,
+						true,
+						pdd->dev->vm_info.last_vmid_kfd);
+			} else if (kfd_dbg_is_per_vmid_supported(pdd->dev)) {
+				pdd->spi_dbg_override = pdd->dev->kfd2kgd->enable_debug_trap(
+						pdd->dev->adev,
+						false,
+						0);
+			}
+		}
+	}
+
+retry:
+	if (p->debug_trap_enabled) {
+		if (!p->is_runtime_retry) {
+			kfd_dbg_trap_activate(p);
+			kfd_dbg_ev_raise(KFD_EC_MASK(EC_PROCESS_RUNTIME),
+					p, NULL, 0, false, NULL, 0);
+		}
+
+		mutex_unlock(&p->mutex);
+		ret = down_interruptible(&p->runtime_enable_sema);
+		mutex_lock(&p->mutex);
+
+		p->is_runtime_retry = !!ret;
+	}
+
+	return ret;
+}
+
+static int runtime_disable(struct kfd_process *p)
+{
+	int i = 0, ret;
+	bool was_enabled = p->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED;
+
+	p->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_DISABLED;
+	p->runtime_info.r_debug = 0;
+
+	if (p->debug_trap_enabled) {
+		if (was_enabled)
+			kfd_dbg_trap_deactivate(p, false, 0);
+
+		if (!p->is_runtime_retry)
+			kfd_dbg_ev_raise(KFD_EC_MASK(EC_PROCESS_RUNTIME),
+					p, NULL, 0, false, NULL, 0);
+
+		mutex_unlock(&p->mutex);
+		ret = down_interruptible(&p->runtime_enable_sema);
+		mutex_lock(&p->mutex);
+
+		p->is_runtime_retry = !!ret;
+		if (ret)
+			return ret;
+	}
+
+	if (was_enabled && p->runtime_info.ttmp_setup) {
+		for (i = 0; i < p->n_pdds; i++) {
+			struct kfd_process_device *pdd = p->pdds[i];
+
+			if (!kfd_dbg_is_rlc_restore_supported(pdd->dev))
+				amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
+		}
+	}
+
+	p->runtime_info.ttmp_setup = false;
+
+	/* disable ttmp setup */
+	for (i = 0; i < p->n_pdds; i++) {
+		struct kfd_process_device *pdd = p->pdds[i];
+
+		if (kfd_dbg_is_per_vmid_supported(pdd->dev)) {
+			pdd->spi_dbg_override =
+					pdd->dev->kfd2kgd->disable_debug_trap(
+					pdd->dev->adev,
+					false,
+					pdd->dev->vm_info.last_vmid_kfd);
+
+			if (!pdd->dev->kfd->shared_resources.enable_mes)
+				debug_refresh_runlist(pdd->dev->dqm);
+			else
+				kfd_dbg_set_mes_debug_mode(pdd,
+							   !kfd_dbg_has_cwsr_workaround(pdd->dev));
+		}
+	}
+
+	return 0;
+}
+
+static int kfd_ioctl_runtime_enable(struct file *filep, struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_runtime_enable_args *args = data;
+	int r;
+
+	mutex_lock(&p->mutex);
+
+	if (args->mode_mask & KFD_RUNTIME_ENABLE_MODE_ENABLE_MASK)
+		r = runtime_enable(p, args->r_debug,
+				!!(args->mode_mask & KFD_RUNTIME_ENABLE_MODE_TTMP_SAVE_MASK));
+	else
+		r = runtime_disable(p);
+
+	mutex_unlock(&p->mutex);
+
+	return r;
+}
+
+static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_dbg_trap_args *args = data;
+	struct task_struct *thread = NULL;
+	struct mm_struct *mm = NULL;
+	struct pid *pid = NULL;
+	struct kfd_process *target = NULL;
+	struct kfd_process_device *pdd = NULL;
+	int r = 0;
+
+	if (sched_policy == KFD_SCHED_POLICY_NO_HWS) {
+		pr_err("Debugging does not support sched_policy %i", sched_policy);
+		return -EINVAL;
+	}
+
+	pid = find_get_pid(args->pid);
+	if (!pid) {
+		pr_debug("Cannot find pid info for %i\n", args->pid);
+		r = -ESRCH;
+		goto out;
+	}
+
+	thread = get_pid_task(pid, PIDTYPE_PID);
+	if (!thread) {
+		r = -ESRCH;
+		goto out;
+	}
+
+	mm = get_task_mm(thread);
+	if (!mm) {
+		r = -ESRCH;
+		goto out;
+	}
+
+	if (args->op == KFD_IOC_DBG_TRAP_ENABLE) {
+		bool create_process;
+
+		rcu_read_lock();
+		create_process = thread && thread != current && ptrace_parent(thread) == current;
+		rcu_read_unlock();
+
+		target = create_process ? kfd_create_process(thread) :
+					kfd_lookup_process_by_pid(pid);
+	} else {
+		target = kfd_lookup_process_by_pid(pid);
+	}
+
+	if (IS_ERR_OR_NULL(target)) {
+		pr_debug("Cannot find process PID %i to debug\n", args->pid);
+		r = target ? PTR_ERR(target) : -ESRCH;
+		goto out;
+	}
+
+	/* Check if target is still PTRACED. */
+	rcu_read_lock();
+	if (target != p && args->op != KFD_IOC_DBG_TRAP_DISABLE
+				&& ptrace_parent(target->lead_thread) != current) {
+		pr_err("PID %i is not PTRACED and cannot be debugged\n", args->pid);
+		r = -EPERM;
+	}
+	rcu_read_unlock();
+
+	if (r)
+		goto out;
+
+	mutex_lock(&target->mutex);
+
+	if (args->op != KFD_IOC_DBG_TRAP_ENABLE && !target->debug_trap_enabled) {
+		pr_err("PID %i not debug enabled for op %i\n", args->pid, args->op);
+		r = -EINVAL;
+		goto unlock_out;
+	}
+
+	if (target->runtime_info.runtime_state != DEBUG_RUNTIME_STATE_ENABLED &&
+			(args->op == KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE ||
+			 args->op == KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE ||
+			 args->op == KFD_IOC_DBG_TRAP_SUSPEND_QUEUES ||
+			 args->op == KFD_IOC_DBG_TRAP_RESUME_QUEUES ||
+			 args->op == KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH ||
+			 args->op == KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH ||
+			 args->op == KFD_IOC_DBG_TRAP_SET_FLAGS)) {
+		r = -EPERM;
+		goto unlock_out;
+	}
+
+	if (args->op == KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH ||
+	    args->op == KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH) {
+		int user_gpu_id = kfd_process_get_user_gpu_id(target,
+				args->op == KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH ?
+					args->set_node_address_watch.gpu_id :
+					args->clear_node_address_watch.gpu_id);
+
+		pdd = kfd_process_device_data_by_id(target, user_gpu_id);
+		if (user_gpu_id == -EINVAL || !pdd) {
+			r = -ENODEV;
+			goto unlock_out;
+		}
+	}
+
+	switch (args->op) {
+	case KFD_IOC_DBG_TRAP_ENABLE:
+		if (target != p)
+			target->debugger_process = p;
+
+		r = kfd_dbg_trap_enable(target,
+					args->enable.dbg_fd,
+					(void __user *)args->enable.rinfo_ptr,
+					&args->enable.rinfo_size);
+		if (!r)
+			target->exception_enable_mask = args->enable.exception_mask;
+
+		break;
+	case KFD_IOC_DBG_TRAP_DISABLE:
+		r = kfd_dbg_trap_disable(target);
+		break;
+	case KFD_IOC_DBG_TRAP_SEND_RUNTIME_EVENT:
+		r = kfd_dbg_send_exception_to_runtime(target,
+				args->send_runtime_event.gpu_id,
+				args->send_runtime_event.queue_id,
+				args->send_runtime_event.exception_mask);
+		break;
+	case KFD_IOC_DBG_TRAP_SET_EXCEPTIONS_ENABLED:
+		kfd_dbg_set_enabled_debug_exception_mask(target,
+				args->set_exceptions_enabled.exception_mask);
+		break;
+	case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE:
+		r = kfd_dbg_trap_set_wave_launch_override(target,
+				args->launch_override.override_mode,
+				args->launch_override.enable_mask,
+				args->launch_override.support_request_mask,
+				&args->launch_override.enable_mask,
+				&args->launch_override.support_request_mask);
+		break;
+	case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE:
+		r = kfd_dbg_trap_set_wave_launch_mode(target,
+				args->launch_mode.launch_mode);
+		break;
+	case KFD_IOC_DBG_TRAP_SUSPEND_QUEUES:
+		r = suspend_queues(target,
+				args->suspend_queues.num_queues,
+				args->suspend_queues.grace_period,
+				args->suspend_queues.exception_mask,
+				(uint32_t *)args->suspend_queues.queue_array_ptr);
+
+		break;
+	case KFD_IOC_DBG_TRAP_RESUME_QUEUES:
+		r = resume_queues(target, args->resume_queues.num_queues,
+				(uint32_t *)args->resume_queues.queue_array_ptr);
+		break;
+	case KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH:
+		r = kfd_dbg_trap_set_dev_address_watch(pdd,
+				args->set_node_address_watch.address,
+				args->set_node_address_watch.mask,
+				&args->set_node_address_watch.id,
+				args->set_node_address_watch.mode);
+		break;
+	case KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH:
+		r = kfd_dbg_trap_clear_dev_address_watch(pdd,
+				args->clear_node_address_watch.id);
+		break;
+	case KFD_IOC_DBG_TRAP_SET_FLAGS:
+		r = kfd_dbg_trap_set_flags(target, &args->set_flags.flags);
+		break;
+	case KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT:
+		r = kfd_dbg_ev_query_debug_event(target,
+				&args->query_debug_event.queue_id,
+				&args->query_debug_event.gpu_id,
+				args->query_debug_event.exception_mask,
+				&args->query_debug_event.exception_mask);
+		break;
+	case KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO:
+		r = kfd_dbg_trap_query_exception_info(target,
+				args->query_exception_info.source_id,
+				args->query_exception_info.exception_code,
+				args->query_exception_info.clear_exception,
+				(void __user *)args->query_exception_info.info_ptr,
+				&args->query_exception_info.info_size);
+		break;
+	case KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT:
+		r = pqm_get_queue_snapshot(&target->pqm,
+				args->queue_snapshot.exception_mask,
+				(void __user *)args->queue_snapshot.snapshot_buf_ptr,
+				&args->queue_snapshot.num_queues,
+				&args->queue_snapshot.entry_size);
+		break;
+	case KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT:
+		r = kfd_dbg_trap_device_snapshot(target,
+				args->device_snapshot.exception_mask,
+				(void __user *)args->device_snapshot.snapshot_buf_ptr,
+				&args->device_snapshot.num_devices,
+				&args->device_snapshot.entry_size);
+		break;
+	default:
+		pr_err("Invalid option: %i\n", args->op);
+		r = -EINVAL;
+	}
+
+unlock_out:
+	mutex_unlock(&target->mutex);
+
+out:
+	if (thread)
+		put_task_struct(thread);
+
+	if (mm)
+		mmput(mm);
+
+	if (pid)
+		put_pid(pid);
+
+	if (target)
+		kfd_unref_process(target);
+
+	return r;
+}
+
+#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
+	[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
+			    .cmd_drv = 0, .name = #ioctl}
+
+/** Ioctl table */
+static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION,
+			kfd_ioctl_get_version, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE,
+			kfd_ioctl_create_queue, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE,
+			kfd_ioctl_destroy_queue, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY,
+			kfd_ioctl_set_memory_policy, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS,
+			kfd_ioctl_get_clock_counters, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES,
+			kfd_ioctl_get_process_apertures, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE,
+			kfd_ioctl_update_queue, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT,
+			kfd_ioctl_create_event, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT,
+			kfd_ioctl_destroy_event, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT,
+			kfd_ioctl_set_event, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT,
+			kfd_ioctl_reset_event, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,
+			kfd_ioctl_wait_events, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER_DEPRECATED,
+			kfd_ioctl_dbg_register, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER_DEPRECATED,
+			kfd_ioctl_dbg_unregister, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH_DEPRECATED,
+			kfd_ioctl_dbg_address_watch, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL_DEPRECATED,
+			kfd_ioctl_dbg_wave_control, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA,
+			kfd_ioctl_set_scratch_backing_va, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG,
+			kfd_ioctl_get_tile_config, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER,
+			kfd_ioctl_set_trap_handler, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW,
+			kfd_ioctl_get_process_apertures_new, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM,
+			kfd_ioctl_acquire_vm, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU,
+			kfd_ioctl_alloc_memory_of_gpu, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU,
+			kfd_ioctl_free_memory_of_gpu, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU,
+			kfd_ioctl_map_memory_to_gpu, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU,
+			kfd_ioctl_unmap_memory_from_gpu, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK,
+			kfd_ioctl_set_cu_mask, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_QUEUE_WAVE_STATE,
+			kfd_ioctl_get_queue_wave_state, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_DMABUF_INFO,
+				kfd_ioctl_get_dmabuf_info, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF,
+				kfd_ioctl_import_dmabuf, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS,
+			kfd_ioctl_alloc_queue_gws, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS,
+			kfd_ioctl_smi_events, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SVM, kfd_ioctl_svm, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE,
+			kfd_ioctl_set_xnack_mode, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_OP,
+			kfd_ioctl_criu, KFD_IOC_FLAG_CHECKPOINT_RESTORE),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_AVAILABLE_MEMORY,
+			kfd_ioctl_get_available_memory, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_EXPORT_DMABUF,
+				kfd_ioctl_export_dmabuf, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_RUNTIME_ENABLE,
+			kfd_ioctl_runtime_enable, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_TRAP,
+			kfd_ioctl_set_debug_trap, 0),
+};
+
+#define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
+
+static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
+{
+	struct kfd_process *process;
+	amdkfd_ioctl_t *func;
+	const struct amdkfd_ioctl_desc *ioctl = NULL;
+	unsigned int nr = _IOC_NR(cmd);
+	char stack_kdata[128];
+	char *kdata = NULL;
+	unsigned int usize, asize;
+	int retcode = -EINVAL;
+	bool ptrace_attached = false;
+
+	if (nr >= AMDKFD_CORE_IOCTL_COUNT)
+		goto err_i1;
+
+	if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) {
+		u32 amdkfd_size;
+
+		ioctl = &amdkfd_ioctls[nr];
+
+		amdkfd_size = _IOC_SIZE(ioctl->cmd);
+		usize = asize = _IOC_SIZE(cmd);
+		if (amdkfd_size > asize)
+			asize = amdkfd_size;
+
+		cmd = ioctl->cmd;
+	} else
+		goto err_i1;
+
+	dev_dbg(kfd_device, "ioctl cmd 0x%x (#0x%x), arg 0x%lx\n", cmd, nr, arg);
+
+	/* Get the process struct from the filep. Only the process
+	 * that opened /dev/kfd can use the file descriptor. Child
+	 * processes need to create their own KFD device context.
+	 */
+	process = filep->private_data;
+
+	rcu_read_lock();
+	if ((ioctl->flags & KFD_IOC_FLAG_CHECKPOINT_RESTORE) &&
+	    ptrace_parent(process->lead_thread) == current)
+		ptrace_attached = true;
+	rcu_read_unlock();
+
+	if (process->lead_thread != current->group_leader
+	    && !ptrace_attached) {
+		dev_dbg(kfd_device, "Using KFD FD in wrong process\n");
+		retcode = -EBADF;
+		goto err_i1;
+	}
+
+	/* Do not trust userspace, use our own definition */
+	func = ioctl->func;
+
+	if (unlikely(!func)) {
+		dev_dbg(kfd_device, "no function\n");
+		retcode = -EINVAL;
+		goto err_i1;
+	}
+
+	/*
+	 * Versions of docker shipped in Ubuntu 18.xx and 20.xx do not support
+	 * CAP_CHECKPOINT_RESTORE, so we also allow access if CAP_SYS_ADMIN as CAP_SYS_ADMIN is a
+	 * more priviledged access.
+	 */
+	if (unlikely(ioctl->flags & KFD_IOC_FLAG_CHECKPOINT_RESTORE)) {
+		if (!capable(CAP_CHECKPOINT_RESTORE) &&
+						!capable(CAP_SYS_ADMIN)) {
+			retcode = -EACCES;
+			goto err_i1;
+		}
+	}
+
+	if (cmd & (IOC_IN | IOC_OUT)) {
+		if (asize <= sizeof(stack_kdata)) {
+			kdata = stack_kdata;
+		} else {
+			kdata = kmalloc(asize, GFP_KERNEL);
+			if (!kdata) {
+				retcode = -ENOMEM;
+				goto err_i1;
+			}
+		}
+		if (asize > usize)
+			memset(kdata + usize, 0, asize - usize);
+	}
+
+	if (cmd & IOC_IN) {
+		if (copy_from_user(kdata, (void __user *)arg, usize) != 0) {
+			retcode = -EFAULT;
+			goto err_i1;
+		}
+	} else if (cmd & IOC_OUT) {
+		memset(kdata, 0, usize);
+	}
+
+	retcode = func(filep, process, kdata);
+
+	if (cmd & IOC_OUT)
+		if (copy_to_user((void __user *)arg, kdata, usize) != 0)
+			retcode = -EFAULT;
+
+err_i1:
+	if (!ioctl)
+		dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",
+			  task_pid_nr(current), cmd, nr);
+
+	if (kdata != stack_kdata)
+		kfree(kdata);
+
+	if (retcode)
+		dev_dbg(kfd_device, "ioctl cmd (#0x%x), arg 0x%lx, ret = %d\n",
+				nr, arg, retcode);
+
+	return retcode;
+}
+
+static int kfd_mmio_mmap(struct kfd_node *dev, struct kfd_process *process,
+		      struct vm_area_struct *vma)
+{
+	phys_addr_t address;
+
+	if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+		return -EINVAL;
+
+	address = dev->adev->rmmio_remap.bus_addr;
+
+	vm_flags_set(vma, VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
+				VM_DONTDUMP | VM_PFNMAP);
+
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	pr_debug("pasid 0x%x mapping mmio page\n"
+		 "     target user address == 0x%08llX\n"
+		 "     physical address    == 0x%08llX\n"
+		 "     vm_flags            == 0x%04lX\n"
+		 "     size                == 0x%04lX\n",
+		 process->pasid, (unsigned long long) vma->vm_start,
+		 address, vma->vm_flags, PAGE_SIZE);
+
+	return io_remap_pfn_range(vma,
+				vma->vm_start,
+				address >> PAGE_SHIFT,
+				PAGE_SIZE,
+				vma->vm_page_prot);
+}
+
+
+static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct kfd_process *process;
+	struct kfd_node *dev = NULL;
+	unsigned long mmap_offset;
+	unsigned int gpu_id;
+
+	process = kfd_get_process(current);
+	if (IS_ERR(process))
+		return PTR_ERR(process);
+
+	mmap_offset = vma->vm_pgoff << PAGE_SHIFT;
+	gpu_id = KFD_MMAP_GET_GPU_ID(mmap_offset);
+	if (gpu_id)
+		dev = kfd_device_by_id(gpu_id);
+
+	switch (mmap_offset & KFD_MMAP_TYPE_MASK) {
+	case KFD_MMAP_TYPE_DOORBELL:
+		if (!dev)
+			return -ENODEV;
+		return kfd_doorbell_mmap(dev, process, vma);
+
+	case KFD_MMAP_TYPE_EVENTS:
+		return kfd_event_mmap(process, vma);
+
+	case KFD_MMAP_TYPE_RESERVED_MEM:
+		if (!dev)
+			return -ENODEV;
+		return kfd_reserved_mem_mmap(dev, process, vma);
+	case KFD_MMAP_TYPE_MMIO:
+		if (!dev)
+			return -ENODEV;
+		return kfd_mmio_mmap(dev, process, vma);
+	}
+
+	return -EFAULT;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
new file mode 100644
index 000000000..f76b7aee5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -0,0 +1,2277 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2015-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/pci.h>
+#include <linux/acpi.h>
+#include "kfd_crat.h"
+#include "kfd_priv.h"
+#include "kfd_topology.h"
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+
+/* GPU Processor ID base for dGPUs for which VCRAT needs to be created.
+ * GPU processor ID are expressed with Bit[31]=1.
+ * The base is set to 0x8000_0000 + 0x1000 to avoid collision with GPU IDs
+ * used in the CRAT.
+ */
+static uint32_t gpu_processor_id_low = 0x80001000;
+
+/* Return the next available gpu_processor_id and increment it for next GPU
+ *	@total_cu_count - Total CUs present in the GPU including ones
+ *			  masked off
+ */
+static inline unsigned int get_and_inc_gpu_processor_id(
+				unsigned int total_cu_count)
+{
+	int current_id = gpu_processor_id_low;
+
+	gpu_processor_id_low += total_cu_count;
+	return current_id;
+}
+
+
+static struct kfd_gpu_cache_info kaveri_cache_info[] = {
+	{
+		/* TCP L1 Cache per CU */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 1,
+	},
+	{
+		/* Scalar L1 Instruction Cache (in SQC module) per bank */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_INST_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+	{
+		/* Scalar L1 Data Cache (in SQC module) per bank */
+		.cache_size = 8,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+
+	/* TODO: Add L2 Cache information */
+};
+
+
+static struct kfd_gpu_cache_info carrizo_cache_info[] = {
+	{
+		/* TCP L1 Cache per CU */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 1,
+	},
+	{
+		/* Scalar L1 Instruction Cache (in SQC module) per bank */
+		.cache_size = 8,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_INST_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 4,
+	},
+	{
+		/* Scalar L1 Data Cache (in SQC module) per bank. */
+		.cache_size = 4,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 4,
+	},
+
+	/* TODO: Add L2 Cache information */
+};
+
+#define hawaii_cache_info kaveri_cache_info
+#define tonga_cache_info carrizo_cache_info
+#define fiji_cache_info  carrizo_cache_info
+#define polaris10_cache_info carrizo_cache_info
+#define polaris11_cache_info carrizo_cache_info
+#define polaris12_cache_info carrizo_cache_info
+#define vegam_cache_info carrizo_cache_info
+
+/* NOTE: L1 cache information has been updated and L2/L3
+ * cache information has been added for Vega10 and
+ * newer ASICs. The unit for cache_size is KiB.
+ * In future,  check & update cache details
+ * for every new ASIC is required.
+ */
+
+static struct kfd_gpu_cache_info vega10_cache_info[] = {
+	{
+		/* TCP L1 Cache per CU */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 1,
+	},
+	{
+		/* Scalar L1 Instruction Cache per SQC */
+		.cache_size = 32,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_INST_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 3,
+	},
+	{
+		/* Scalar L1 Data Cache per SQC */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 3,
+	},
+	{
+		/* L2 Data Cache per GPU (Total Tex Cache) */
+		.cache_size = 4096,
+		.cache_level = 2,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 16,
+	},
+};
+
+static struct kfd_gpu_cache_info raven_cache_info[] = {
+	{
+		/* TCP L1 Cache per CU */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 1,
+	},
+	{
+		/* Scalar L1 Instruction Cache per SQC */
+		.cache_size = 32,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_INST_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 3,
+	},
+	{
+		/* Scalar L1 Data Cache per SQC */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 3,
+	},
+	{
+		/* L2 Data Cache per GPU (Total Tex Cache) */
+		.cache_size = 1024,
+		.cache_level = 2,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 11,
+	},
+};
+
+static struct kfd_gpu_cache_info renoir_cache_info[] = {
+	{
+		/* TCP L1 Cache per CU */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 1,
+	},
+	{
+		/* Scalar L1 Instruction Cache per SQC */
+		.cache_size = 32,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_INST_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 3,
+	},
+	{
+		/* Scalar L1 Data Cache per SQC */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 3,
+	},
+	{
+		/* L2 Data Cache per GPU (Total Tex Cache) */
+		.cache_size = 1024,
+		.cache_level = 2,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 8,
+	},
+};
+
+static struct kfd_gpu_cache_info vega12_cache_info[] = {
+	{
+		/* TCP L1 Cache per CU */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 1,
+	},
+	{
+		/* Scalar L1 Instruction Cache per SQC */
+		.cache_size = 32,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_INST_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 3,
+	},
+	{
+		/* Scalar L1 Data Cache per SQC */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 3,
+	},
+	{
+		/* L2 Data Cache per GPU (Total Tex Cache) */
+		.cache_size = 2048,
+		.cache_level = 2,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 5,
+	},
+};
+
+static struct kfd_gpu_cache_info vega20_cache_info[] = {
+	{
+		/* TCP L1 Cache per CU */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 1,
+	},
+	{
+		/* Scalar L1 Instruction Cache per SQC */
+		.cache_size = 32,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_INST_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 3,
+	},
+	{
+		/* Scalar L1 Data Cache per SQC */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 3,
+	},
+	{
+		/* L2 Data Cache per GPU (Total Tex Cache) */
+		.cache_size = 8192,
+		.cache_level = 2,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 16,
+	},
+};
+
+static struct kfd_gpu_cache_info aldebaran_cache_info[] = {
+	{
+		/* TCP L1 Cache per CU */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 1,
+	},
+	{
+		/* Scalar L1 Instruction Cache per SQC */
+		.cache_size = 32,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_INST_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+	{
+		/* Scalar L1 Data Cache per SQC */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+	{
+		/* L2 Data Cache per GPU (Total Tex Cache) */
+		.cache_size = 8192,
+		.cache_level = 2,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 14,
+	},
+};
+
+static struct kfd_gpu_cache_info navi10_cache_info[] = {
+	{
+		/* TCP L1 Cache per CU */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 1,
+	},
+	{
+		/* Scalar L1 Instruction Cache per SQC */
+		.cache_size = 32,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_INST_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+	{
+		/* Scalar L1 Data Cache per SQC */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+	{
+		/* GL1 Data Cache per SA */
+		.cache_size = 128,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 10,
+	},
+	{
+		/* L2 Data Cache per GPU (Total Tex Cache) */
+		.cache_size = 4096,
+		.cache_level = 2,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 10,
+	},
+};
+
+static struct kfd_gpu_cache_info vangogh_cache_info[] = {
+	{
+		/* TCP L1 Cache per CU */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 1,
+	},
+	{
+		/* Scalar L1 Instruction Cache per SQC */
+		.cache_size = 32,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_INST_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+	{
+		/* Scalar L1 Data Cache per SQC */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+	{
+		/* GL1 Data Cache per SA */
+		.cache_size = 128,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 8,
+	},
+	{
+		/* L2 Data Cache per GPU (Total Tex Cache) */
+		.cache_size = 1024,
+		.cache_level = 2,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 8,
+	},
+};
+
+static struct kfd_gpu_cache_info navi14_cache_info[] = {
+	{
+		/* TCP L1 Cache per CU */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 1,
+	},
+	{
+		/* Scalar L1 Instruction Cache per SQC */
+		.cache_size = 32,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_INST_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+	{
+		/* Scalar L1 Data Cache per SQC */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+	{
+		/* GL1 Data Cache per SA */
+		.cache_size = 128,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 12,
+	},
+	{
+		/* L2 Data Cache per GPU (Total Tex Cache) */
+		.cache_size = 2048,
+		.cache_level = 2,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 12,
+	},
+};
+
+static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
+	{
+		/* TCP L1 Cache per CU */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 1,
+	},
+	{
+		/* Scalar L1 Instruction Cache per SQC */
+		.cache_size = 32,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_INST_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+	{
+		/* Scalar L1 Data Cache per SQC */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+	{
+		/* GL1 Data Cache per SA */
+		.cache_size = 128,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 10,
+	},
+	{
+		/* L2 Data Cache per GPU (Total Tex Cache) */
+		.cache_size = 4096,
+		.cache_level = 2,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 10,
+	},
+	{
+		/* L3 Data Cache per GPU */
+		.cache_size = 128*1024,
+		.cache_level = 3,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 10,
+	},
+};
+
+static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
+	{
+		/* TCP L1 Cache per CU */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 1,
+	},
+	{
+		/* Scalar L1 Instruction Cache per SQC */
+		.cache_size = 32,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_INST_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+	{
+		/* Scalar L1 Data Cache per SQC */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+	{
+		/* GL1 Data Cache per SA */
+		.cache_size = 128,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 10,
+	},
+	{
+		/* L2 Data Cache per GPU (Total Tex Cache) */
+		.cache_size = 3072,
+		.cache_level = 2,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 10,
+	},
+	{
+		/* L3 Data Cache per GPU */
+		.cache_size = 96*1024,
+		.cache_level = 3,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 10,
+	},
+};
+
+static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
+	{
+		/* TCP L1 Cache per CU */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 1,
+	},
+	{
+		/* Scalar L1 Instruction Cache per SQC */
+		.cache_size = 32,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_INST_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+	{
+		/* Scalar L1 Data Cache per SQC */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+	{
+		/* GL1 Data Cache per SA */
+		.cache_size = 128,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 8,
+	},
+	{
+		/* L2 Data Cache per GPU (Total Tex Cache) */
+		.cache_size = 2048,
+		.cache_level = 2,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 8,
+	},
+	{
+		/* L3 Data Cache per GPU */
+		.cache_size = 32*1024,
+		.cache_level = 3,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 8,
+	},
+};
+
+static struct kfd_gpu_cache_info beige_goby_cache_info[] = {
+	{
+		/* TCP L1 Cache per CU */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 1,
+	},
+	{
+		/* Scalar L1 Instruction Cache per SQC */
+		.cache_size = 32,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_INST_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+	{
+		/* Scalar L1 Data Cache per SQC */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+	{
+		/* GL1 Data Cache per SA */
+		.cache_size = 128,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 8,
+	},
+	{
+		/* L2 Data Cache per GPU (Total Tex Cache) */
+		.cache_size = 1024,
+		.cache_level = 2,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 8,
+	},
+	{
+		/* L3 Data Cache per GPU */
+		.cache_size = 16*1024,
+		.cache_level = 3,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 8,
+	},
+};
+
+static struct kfd_gpu_cache_info yellow_carp_cache_info[] = {
+	{
+		/* TCP L1 Cache per CU */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 1,
+	},
+	{
+		/* Scalar L1 Instruction Cache per SQC */
+		.cache_size = 32,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_INST_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+	{
+		/* Scalar L1 Data Cache per SQC */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+	{
+		/* GL1 Data Cache per SA */
+		.cache_size = 128,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 6,
+	},
+	{
+		/* L2 Data Cache per GPU (Total Tex Cache) */
+		.cache_size = 2048,
+		.cache_level = 2,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 6,
+	},
+};
+
+static struct kfd_gpu_cache_info gfx1037_cache_info[] = {
+	{
+		/* TCP L1 Cache per CU */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 1,
+	},
+	{
+		/* Scalar L1 Instruction Cache per SQC */
+		.cache_size = 32,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_INST_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+	{
+		/* Scalar L1 Data Cache per SQC */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+	{
+		/* GL1 Data Cache per SA */
+		.cache_size = 128,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+	{
+		/* L2 Data Cache per GPU (Total Tex Cache) */
+		.cache_size = 256,
+		.cache_level = 2,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+};
+
+static struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = {
+	{
+		/* TCP L1 Cache per CU */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+			  CRAT_CACHE_FLAGS_DATA_CACHE |
+			  CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 1,
+	},
+	{
+		/* Scalar L1 Instruction Cache per SQC */
+		.cache_size = 32,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+			  CRAT_CACHE_FLAGS_INST_CACHE |
+			  CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+	{
+		/* Scalar L1 Data Cache per SQC */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+			  CRAT_CACHE_FLAGS_DATA_CACHE |
+			  CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+	{
+		/* GL1 Data Cache per SA */
+		.cache_size = 128,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+			  CRAT_CACHE_FLAGS_DATA_CACHE |
+			  CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+	{
+		/* L2 Data Cache per GPU (Total Tex Cache) */
+		.cache_size = 256,
+		.cache_level = 2,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+			  CRAT_CACHE_FLAGS_DATA_CACHE |
+			  CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+};
+
+static struct kfd_gpu_cache_info dummy_cache_info[] = {
+	{
+		/* TCP L1 Cache per CU */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 1,
+	},
+	{
+		/* Scalar L1 Instruction Cache per SQC */
+		.cache_size = 32,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_INST_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+	{
+		/* Scalar L1 Data Cache per SQC */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+	{
+		/* GL1 Data Cache per SA */
+		.cache_size = 128,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 6,
+	},
+	{
+		/* L2 Data Cache per GPU (Total Tex Cache) */
+		.cache_size = 2048,
+		.cache_level = 2,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 6,
+	},
+};
+
+static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev,
+		struct crat_subtype_computeunit *cu)
+{
+	dev->node_props.cpu_cores_count = cu->num_cpu_cores;
+	dev->node_props.cpu_core_id_base = cu->processor_id_low;
+	if (cu->hsa_capability & CRAT_CU_FLAGS_IOMMU_PRESENT)
+		dev->node_props.capability |= HSA_CAP_ATS_PRESENT;
+
+	pr_debug("CU CPU: cores=%d id_base=%d\n", cu->num_cpu_cores,
+			cu->processor_id_low);
+}
+
+static void kfd_populated_cu_info_gpu(struct kfd_topology_device *dev,
+		struct crat_subtype_computeunit *cu)
+{
+	dev->node_props.simd_id_base = cu->processor_id_low;
+	dev->node_props.simd_count = cu->num_simd_cores;
+	dev->node_props.lds_size_in_kb = cu->lds_size_in_kb;
+	dev->node_props.max_waves_per_simd = cu->max_waves_simd;
+	dev->node_props.wave_front_size = cu->wave_front_size;
+	dev->node_props.array_count = cu->array_count;
+	dev->node_props.cu_per_simd_array = cu->num_cu_per_array;
+	dev->node_props.simd_per_cu = cu->num_simd_per_cu;
+	dev->node_props.max_slots_scratch_cu = cu->max_slots_scatch_cu;
+	if (cu->hsa_capability & CRAT_CU_FLAGS_HOT_PLUGGABLE)
+		dev->node_props.capability |= HSA_CAP_HOT_PLUGGABLE;
+	pr_debug("CU GPU: id_base=%d\n", cu->processor_id_low);
+}
+
+/* kfd_parse_subtype_cu - parse compute unit subtypes and attach it to correct
+ * topology device present in the device_list
+ */
+static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu,
+				struct list_head *device_list)
+{
+	struct kfd_topology_device *dev;
+
+	pr_debug("Found CU entry in CRAT table with proximity_domain=%d caps=%x\n",
+			cu->proximity_domain, cu->hsa_capability);
+	list_for_each_entry(dev, device_list, list) {
+		if (cu->proximity_domain == dev->proximity_domain) {
+			if (cu->flags & CRAT_CU_FLAGS_CPU_PRESENT)
+				kfd_populated_cu_info_cpu(dev, cu);
+
+			if (cu->flags & CRAT_CU_FLAGS_GPU_PRESENT)
+				kfd_populated_cu_info_gpu(dev, cu);
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static struct kfd_mem_properties *
+find_subtype_mem(uint32_t heap_type, uint32_t flags, uint32_t width,
+		struct kfd_topology_device *dev)
+{
+	struct kfd_mem_properties *props;
+
+	list_for_each_entry(props, &dev->mem_props, list) {
+		if (props->heap_type == heap_type
+				&& props->flags == flags
+				&& props->width == width)
+			return props;
+	}
+
+	return NULL;
+}
+/* kfd_parse_subtype_mem - parse memory subtypes and attach it to correct
+ * topology device present in the device_list
+ */
+static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem,
+				struct list_head *device_list)
+{
+	struct kfd_mem_properties *props;
+	struct kfd_topology_device *dev;
+	uint32_t heap_type;
+	uint64_t size_in_bytes;
+	uint32_t flags = 0;
+	uint32_t width;
+
+	pr_debug("Found memory entry in CRAT table with proximity_domain=%d\n",
+			mem->proximity_domain);
+	list_for_each_entry(dev, device_list, list) {
+		if (mem->proximity_domain == dev->proximity_domain) {
+			/* We're on GPU node */
+			if (dev->node_props.cpu_cores_count == 0) {
+				/* APU */
+				if (mem->visibility_type == 0)
+					heap_type =
+						HSA_MEM_HEAP_TYPE_FB_PRIVATE;
+				/* dGPU */
+				else
+					heap_type = mem->visibility_type;
+			} else
+				heap_type = HSA_MEM_HEAP_TYPE_SYSTEM;
+
+			if (mem->flags & CRAT_MEM_FLAGS_HOT_PLUGGABLE)
+				flags |= HSA_MEM_FLAGS_HOT_PLUGGABLE;
+			if (mem->flags & CRAT_MEM_FLAGS_NON_VOLATILE)
+				flags |= HSA_MEM_FLAGS_NON_VOLATILE;
+
+			size_in_bytes =
+				((uint64_t)mem->length_high << 32) +
+							mem->length_low;
+			width = mem->width;
+
+			/* Multiple banks of the same type are aggregated into
+			 * one. User mode doesn't care about multiple physical
+			 * memory segments. It's managed as a single virtual
+			 * heap for user mode.
+			 */
+			props = find_subtype_mem(heap_type, flags, width, dev);
+			if (props) {
+				props->size_in_bytes += size_in_bytes;
+				break;
+			}
+
+			props = kfd_alloc_struct(props);
+			if (!props)
+				return -ENOMEM;
+
+			props->heap_type = heap_type;
+			props->flags = flags;
+			props->size_in_bytes = size_in_bytes;
+			props->width = width;
+
+			dev->node_props.mem_banks_count++;
+			list_add_tail(&props->list, &dev->mem_props);
+
+			break;
+		}
+	}
+
+	return 0;
+}
+
+/* kfd_parse_subtype_cache - parse cache subtypes and attach it to correct
+ * topology device present in the device_list
+ */
+static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache,
+			struct list_head *device_list)
+{
+	struct kfd_cache_properties *props;
+	struct kfd_topology_device *dev;
+	uint32_t id;
+	uint32_t total_num_of_cu;
+
+	id = cache->processor_id_low;
+
+	pr_debug("Found cache entry in CRAT table with processor_id=%d\n", id);
+	list_for_each_entry(dev, device_list, list) {
+		total_num_of_cu = (dev->node_props.array_count *
+					dev->node_props.cu_per_simd_array);
+
+		/* Cache infomration in CRAT doesn't have proximity_domain
+		 * information as it is associated with a CPU core or GPU
+		 * Compute Unit. So map the cache using CPU core Id or SIMD
+		 * (GPU) ID.
+		 * TODO: This works because currently we can safely assume that
+		 *  Compute Units are parsed before caches are parsed. In
+		 *  future, remove this dependency
+		 */
+		if ((id >= dev->node_props.cpu_core_id_base &&
+			id <= dev->node_props.cpu_core_id_base +
+				dev->node_props.cpu_cores_count) ||
+			(id >= dev->node_props.simd_id_base &&
+			id < dev->node_props.simd_id_base +
+				total_num_of_cu)) {
+			props = kfd_alloc_struct(props);
+			if (!props)
+				return -ENOMEM;
+
+			props->processor_id_low = id;
+			props->cache_level = cache->cache_level;
+			props->cache_size = cache->cache_size;
+			props->cacheline_size = cache->cache_line_size;
+			props->cachelines_per_tag = cache->lines_per_tag;
+			props->cache_assoc = cache->associativity;
+			props->cache_latency = cache->cache_latency;
+
+			memcpy(props->sibling_map, cache->sibling_map,
+					CRAT_SIBLINGMAP_SIZE);
+
+			/* set the sibling_map_size as 32 for CRAT from ACPI */
+			props->sibling_map_size = CRAT_SIBLINGMAP_SIZE;
+
+			if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE)
+				props->cache_type |= HSA_CACHE_TYPE_DATA;
+			if (cache->flags & CRAT_CACHE_FLAGS_INST_CACHE)
+				props->cache_type |= HSA_CACHE_TYPE_INSTRUCTION;
+			if (cache->flags & CRAT_CACHE_FLAGS_CPU_CACHE)
+				props->cache_type |= HSA_CACHE_TYPE_CPU;
+			if (cache->flags & CRAT_CACHE_FLAGS_SIMD_CACHE)
+				props->cache_type |= HSA_CACHE_TYPE_HSACU;
+
+			dev->node_props.caches_count++;
+			list_add_tail(&props->list, &dev->cache_props);
+
+			break;
+		}
+	}
+
+	return 0;
+}
+
+/* kfd_parse_subtype_iolink - parse iolink subtypes and attach it to correct
+ * topology device present in the device_list
+ */
+static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
+					struct list_head *device_list)
+{
+	struct kfd_iolink_properties *props = NULL, *props2;
+	struct kfd_topology_device *dev, *to_dev;
+	uint32_t id_from;
+	uint32_t id_to;
+
+	id_from = iolink->proximity_domain_from;
+	id_to = iolink->proximity_domain_to;
+
+	pr_debug("Found IO link entry in CRAT table with id_from=%d, id_to %d\n",
+			id_from, id_to);
+	list_for_each_entry(dev, device_list, list) {
+		if (id_from == dev->proximity_domain) {
+			props = kfd_alloc_struct(props);
+			if (!props)
+				return -ENOMEM;
+
+			props->node_from = id_from;
+			props->node_to = id_to;
+			props->ver_maj = iolink->version_major;
+			props->ver_min = iolink->version_minor;
+			props->iolink_type = iolink->io_interface_type;
+
+			if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS)
+				props->weight = 20;
+			else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI)
+				props->weight = iolink->weight_xgmi;
+			else
+				props->weight = node_distance(id_from, id_to);
+
+			props->min_latency = iolink->minimum_latency;
+			props->max_latency = iolink->maximum_latency;
+			props->min_bandwidth = iolink->minimum_bandwidth_mbs;
+			props->max_bandwidth = iolink->maximum_bandwidth_mbs;
+			props->rec_transfer_size =
+					iolink->recommended_transfer_size;
+
+			dev->node_props.io_links_count++;
+			list_add_tail(&props->list, &dev->io_link_props);
+			break;
+		}
+	}
+
+	/* CPU topology is created before GPUs are detected, so CPU->GPU
+	 * links are not built at that time. If a PCIe type is discovered, it
+	 * means a GPU is detected and we are adding GPU->CPU to the topology.
+	 * At this time, also add the corresponded CPU->GPU link if GPU
+	 * is large bar.
+	 * For xGMI, we only added the link with one direction in the crat
+	 * table, add corresponded reversed direction link now.
+	 */
+	if (props && (iolink->flags & CRAT_IOLINK_FLAGS_BI_DIRECTIONAL)) {
+		to_dev = kfd_topology_device_by_proximity_domain_no_lock(id_to);
+		if (!to_dev)
+			return -ENODEV;
+		/* same everything but the other direction */
+		props2 = kmemdup(props, sizeof(*props2), GFP_KERNEL);
+		if (!props2)
+			return -ENOMEM;
+
+		props2->node_from = id_to;
+		props2->node_to = id_from;
+		props2->kobj = NULL;
+		to_dev->node_props.io_links_count++;
+		list_add_tail(&props2->list, &to_dev->io_link_props);
+	}
+
+	return 0;
+}
+
+/* kfd_parse_subtype - parse subtypes and attach it to correct topology device
+ * present in the device_list
+ *	@sub_type_hdr - subtype section of crat_image
+ *	@device_list - list of topology devices present in this crat_image
+ */
+static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr,
+				struct list_head *device_list)
+{
+	struct crat_subtype_computeunit *cu;
+	struct crat_subtype_memory *mem;
+	struct crat_subtype_cache *cache;
+	struct crat_subtype_iolink *iolink;
+	int ret = 0;
+
+	switch (sub_type_hdr->type) {
+	case CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY:
+		cu = (struct crat_subtype_computeunit *)sub_type_hdr;
+		ret = kfd_parse_subtype_cu(cu, device_list);
+		break;
+	case CRAT_SUBTYPE_MEMORY_AFFINITY:
+		mem = (struct crat_subtype_memory *)sub_type_hdr;
+		ret = kfd_parse_subtype_mem(mem, device_list);
+		break;
+	case CRAT_SUBTYPE_CACHE_AFFINITY:
+		cache = (struct crat_subtype_cache *)sub_type_hdr;
+		ret = kfd_parse_subtype_cache(cache, device_list);
+		break;
+	case CRAT_SUBTYPE_TLB_AFFINITY:
+		/*
+		 * For now, nothing to do here
+		 */
+		pr_debug("Found TLB entry in CRAT table (not processing)\n");
+		break;
+	case CRAT_SUBTYPE_CCOMPUTE_AFFINITY:
+		/*
+		 * For now, nothing to do here
+		 */
+		pr_debug("Found CCOMPUTE entry in CRAT table (not processing)\n");
+		break;
+	case CRAT_SUBTYPE_IOLINK_AFFINITY:
+		iolink = (struct crat_subtype_iolink *)sub_type_hdr;
+		ret = kfd_parse_subtype_iolink(iolink, device_list);
+		break;
+	default:
+		pr_warn("Unknown subtype %d in CRAT\n",
+				sub_type_hdr->type);
+	}
+
+	return ret;
+}
+
+/* kfd_parse_crat_table - parse CRAT table. For each node present in CRAT
+ * create a kfd_topology_device and add in to device_list. Also parse
+ * CRAT subtypes and attach it to appropriate kfd_topology_device
+ *	@crat_image - input image containing CRAT
+ *	@device_list - [OUT] list of kfd_topology_device generated after
+ *		       parsing crat_image
+ *	@proximity_domain - Proximity domain of the first device in the table
+ *
+ *	Return - 0 if successful else -ve value
+ */
+int kfd_parse_crat_table(void *crat_image, struct list_head *device_list,
+			 uint32_t proximity_domain)
+{
+	struct kfd_topology_device *top_dev = NULL;
+	struct crat_subtype_generic *sub_type_hdr;
+	uint16_t node_id;
+	int ret = 0;
+	struct crat_header *crat_table = (struct crat_header *)crat_image;
+	uint16_t num_nodes;
+	uint32_t image_len;
+
+	if (!crat_image)
+		return -EINVAL;
+
+	if (!list_empty(device_list)) {
+		pr_warn("Error device list should be empty\n");
+		return -EINVAL;
+	}
+
+	num_nodes = crat_table->num_domains;
+	image_len = crat_table->length;
+
+	pr_debug("Parsing CRAT table with %d nodes\n", num_nodes);
+
+	for (node_id = 0; node_id < num_nodes; node_id++) {
+		top_dev = kfd_create_topology_device(device_list);
+		if (!top_dev)
+			break;
+		top_dev->proximity_domain = proximity_domain++;
+	}
+
+	if (!top_dev) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	memcpy(top_dev->oem_id, crat_table->oem_id, CRAT_OEMID_LENGTH);
+	memcpy(top_dev->oem_table_id, crat_table->oem_table_id,
+			CRAT_OEMTABLEID_LENGTH);
+	top_dev->oem_revision = crat_table->oem_revision;
+
+	sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1);
+	while ((char *)sub_type_hdr + sizeof(struct crat_subtype_generic) <
+			((char *)crat_image) + image_len) {
+		if (sub_type_hdr->flags & CRAT_SUBTYPE_FLAGS_ENABLED) {
+			ret = kfd_parse_subtype(sub_type_hdr, device_list);
+			if (ret)
+				break;
+		}
+
+		sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
+				sub_type_hdr->length);
+	}
+
+err:
+	if (ret)
+		kfd_release_topology_device_list(device_list);
+
+	return ret;
+}
+
+
+static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
+						   struct kfd_gpu_cache_info *pcache_info)
+{
+	struct amdgpu_device *adev = kdev->adev;
+	int i = 0;
+
+	/* TCP L1 Cache per CU */
+	if (adev->gfx.config.gc_tcp_l1_size) {
+		pcache_info[i].cache_size = adev->gfx.config.gc_tcp_l1_size;
+		pcache_info[i].cache_level = 1;
+		pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+					CRAT_CACHE_FLAGS_DATA_CACHE |
+					CRAT_CACHE_FLAGS_SIMD_CACHE);
+		pcache_info[0].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2;
+		i++;
+	}
+	/* Scalar L1 Instruction Cache per SQC */
+	if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) {
+		pcache_info[i].cache_size =
+			adev->gfx.config.gc_l1_instruction_cache_size_per_sqc;
+		pcache_info[i].cache_level = 1;
+		pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+					CRAT_CACHE_FLAGS_INST_CACHE |
+					CRAT_CACHE_FLAGS_SIMD_CACHE);
+		pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
+		i++;
+	}
+	/* Scalar L1 Data Cache per SQC */
+	if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) {
+		pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc;
+		pcache_info[i].cache_level = 1;
+		pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+					CRAT_CACHE_FLAGS_DATA_CACHE |
+					CRAT_CACHE_FLAGS_SIMD_CACHE);
+		pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
+		i++;
+	}
+	/* GL1 Data Cache per SA */
+	if (adev->gfx.config.gc_gl1c_per_sa &&
+	    adev->gfx.config.gc_gl1c_size_per_instance) {
+		pcache_info[i].cache_size = adev->gfx.config.gc_gl1c_per_sa *
+			adev->gfx.config.gc_gl1c_size_per_instance;
+		pcache_info[i].cache_level = 1;
+		pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+					CRAT_CACHE_FLAGS_DATA_CACHE |
+					CRAT_CACHE_FLAGS_SIMD_CACHE);
+		pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
+		i++;
+	}
+	/* L2 Data Cache per GPU (Total Tex Cache) */
+	if (adev->gfx.config.gc_gl2c_per_gpu) {
+		pcache_info[i].cache_size = adev->gfx.config.gc_gl2c_per_gpu;
+		pcache_info[i].cache_level = 2;
+		pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+					CRAT_CACHE_FLAGS_DATA_CACHE |
+					CRAT_CACHE_FLAGS_SIMD_CACHE);
+		pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
+		i++;
+	}
+	/* L3 Data Cache per GPU */
+	if (adev->gmc.mall_size) {
+		pcache_info[i].cache_size = adev->gmc.mall_size / 1024;
+		pcache_info[i].cache_level = 3;
+		pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+					CRAT_CACHE_FLAGS_DATA_CACHE |
+					CRAT_CACHE_FLAGS_SIMD_CACHE);
+		pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
+		i++;
+	}
+	return i;
+}
+
+int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info)
+{
+	int num_of_cache_types = 0;
+
+	switch (kdev->adev->asic_type) {
+	case CHIP_KAVERI:
+		*pcache_info = kaveri_cache_info;
+		num_of_cache_types = ARRAY_SIZE(kaveri_cache_info);
+		break;
+	case CHIP_HAWAII:
+		*pcache_info = hawaii_cache_info;
+		num_of_cache_types = ARRAY_SIZE(hawaii_cache_info);
+		break;
+	case CHIP_CARRIZO:
+		*pcache_info = carrizo_cache_info;
+		num_of_cache_types = ARRAY_SIZE(carrizo_cache_info);
+		break;
+	case CHIP_TONGA:
+		*pcache_info = tonga_cache_info;
+		num_of_cache_types = ARRAY_SIZE(tonga_cache_info);
+		break;
+	case CHIP_FIJI:
+		*pcache_info = fiji_cache_info;
+		num_of_cache_types = ARRAY_SIZE(fiji_cache_info);
+		break;
+	case CHIP_POLARIS10:
+		*pcache_info = polaris10_cache_info;
+		num_of_cache_types = ARRAY_SIZE(polaris10_cache_info);
+		break;
+	case CHIP_POLARIS11:
+		*pcache_info = polaris11_cache_info;
+		num_of_cache_types = ARRAY_SIZE(polaris11_cache_info);
+		break;
+	case CHIP_POLARIS12:
+		*pcache_info = polaris12_cache_info;
+		num_of_cache_types = ARRAY_SIZE(polaris12_cache_info);
+		break;
+	case CHIP_VEGAM:
+		*pcache_info = vegam_cache_info;
+		num_of_cache_types = ARRAY_SIZE(vegam_cache_info);
+		break;
+	default:
+		switch (KFD_GC_VERSION(kdev)) {
+		case IP_VERSION(9, 0, 1):
+			*pcache_info = vega10_cache_info;
+			num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
+			break;
+		case IP_VERSION(9, 2, 1):
+			*pcache_info = vega12_cache_info;
+			num_of_cache_types = ARRAY_SIZE(vega12_cache_info);
+			break;
+		case IP_VERSION(9, 4, 0):
+		case IP_VERSION(9, 4, 1):
+			*pcache_info = vega20_cache_info;
+			num_of_cache_types = ARRAY_SIZE(vega20_cache_info);
+			break;
+		case IP_VERSION(9, 4, 2):
+		case IP_VERSION(9, 4, 3):
+			*pcache_info = aldebaran_cache_info;
+			num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info);
+			break;
+		case IP_VERSION(9, 1, 0):
+		case IP_VERSION(9, 2, 2):
+			*pcache_info = raven_cache_info;
+			num_of_cache_types = ARRAY_SIZE(raven_cache_info);
+			break;
+		case IP_VERSION(9, 3, 0):
+			*pcache_info = renoir_cache_info;
+			num_of_cache_types = ARRAY_SIZE(renoir_cache_info);
+			break;
+		case IP_VERSION(10, 1, 10):
+		case IP_VERSION(10, 1, 2):
+		case IP_VERSION(10, 1, 3):
+		case IP_VERSION(10, 1, 4):
+			*pcache_info = navi10_cache_info;
+			num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
+			break;
+		case IP_VERSION(10, 1, 1):
+			*pcache_info = navi14_cache_info;
+			num_of_cache_types = ARRAY_SIZE(navi14_cache_info);
+			break;
+		case IP_VERSION(10, 3, 0):
+			*pcache_info = sienna_cichlid_cache_info;
+			num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info);
+			break;
+		case IP_VERSION(10, 3, 2):
+			*pcache_info = navy_flounder_cache_info;
+			num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info);
+			break;
+		case IP_VERSION(10, 3, 4):
+			*pcache_info = dimgrey_cavefish_cache_info;
+			num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info);
+			break;
+		case IP_VERSION(10, 3, 1):
+			*pcache_info = vangogh_cache_info;
+			num_of_cache_types = ARRAY_SIZE(vangogh_cache_info);
+			break;
+		case IP_VERSION(10, 3, 5):
+			*pcache_info = beige_goby_cache_info;
+			num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info);
+			break;
+		case IP_VERSION(10, 3, 3):
+			*pcache_info = yellow_carp_cache_info;
+			num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info);
+			break;
+		case IP_VERSION(10, 3, 6):
+			*pcache_info = gc_10_3_6_cache_info;
+			num_of_cache_types = ARRAY_SIZE(gc_10_3_6_cache_info);
+			break;
+		case IP_VERSION(10, 3, 7):
+			*pcache_info = gfx1037_cache_info;
+			num_of_cache_types = ARRAY_SIZE(gfx1037_cache_info);
+			break;
+		case IP_VERSION(11, 0, 0):
+		case IP_VERSION(11, 0, 1):
+		case IP_VERSION(11, 0, 2):
+		case IP_VERSION(11, 0, 3):
+		case IP_VERSION(11, 0, 4):
+			num_of_cache_types =
+				kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, *pcache_info);
+			break;
+		default:
+			*pcache_info = dummy_cache_info;
+			num_of_cache_types = ARRAY_SIZE(dummy_cache_info);
+			pr_warn("dummy cache info is used temporarily and real cache info need update later.\n");
+			break;
+		}
+	}
+	return num_of_cache_types;
+}
+
+/* Memory required to create Virtual CRAT.
+ * Since there is no easy way to predict the amount of memory required, the
+ * following amount is allocated for GPU Virtual CRAT. This is
+ * expected to cover all known conditions. But to be safe additional check
+ * is put in the code to ensure we don't overwrite.
+ */
+#define VCRAT_SIZE_FOR_GPU	(4 * PAGE_SIZE)
+
+/* kfd_fill_cu_for_cpu - Fill in Compute info for the given CPU NUMA node
+ *
+ *	@numa_node_id: CPU NUMA node id
+ *	@avail_size: Available size in the memory
+ *	@sub_type_hdr: Memory into which compute info will be filled in
+ *
+ *	Return 0 if successful else return -ve value
+ */
+static int kfd_fill_cu_for_cpu(int numa_node_id, int *avail_size,
+				int proximity_domain,
+				struct crat_subtype_computeunit *sub_type_hdr)
+{
+	const struct cpumask *cpumask;
+
+	*avail_size -= sizeof(struct crat_subtype_computeunit);
+	if (*avail_size < 0)
+		return -ENOMEM;
+
+	memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit));
+
+	/* Fill in subtype header data */
+	sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY;
+	sub_type_hdr->length = sizeof(struct crat_subtype_computeunit);
+	sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
+
+	cpumask = cpumask_of_node(numa_node_id);
+
+	/* Fill in CU data */
+	sub_type_hdr->flags |= CRAT_CU_FLAGS_CPU_PRESENT;
+	sub_type_hdr->proximity_domain = proximity_domain;
+	sub_type_hdr->processor_id_low = kfd_numa_node_to_apic_id(numa_node_id);
+	if (sub_type_hdr->processor_id_low == -1)
+		return -EINVAL;
+
+	sub_type_hdr->num_cpu_cores = cpumask_weight(cpumask);
+
+	return 0;
+}
+
+/* kfd_fill_mem_info_for_cpu - Fill in Memory info for the given CPU NUMA node
+ *
+ *	@numa_node_id: CPU NUMA node id
+ *	@avail_size: Available size in the memory
+ *	@sub_type_hdr: Memory into which compute info will be filled in
+ *
+ *	Return 0 if successful else return -ve value
+ */
+static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size,
+			int proximity_domain,
+			struct crat_subtype_memory *sub_type_hdr)
+{
+	uint64_t mem_in_bytes = 0;
+	pg_data_t *pgdat;
+	int zone_type;
+
+	*avail_size -= sizeof(struct crat_subtype_memory);
+	if (*avail_size < 0)
+		return -ENOMEM;
+
+	memset(sub_type_hdr, 0, sizeof(struct crat_subtype_memory));
+
+	/* Fill in subtype header data */
+	sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY;
+	sub_type_hdr->length = sizeof(struct crat_subtype_memory);
+	sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
+
+	/* Fill in Memory Subunit data */
+
+	/* Unlike si_meminfo, si_meminfo_node is not exported. So
+	 * the following lines are duplicated from si_meminfo_node
+	 * function
+	 */
+	pgdat = NODE_DATA(numa_node_id);
+	for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
+		mem_in_bytes += zone_managed_pages(&pgdat->node_zones[zone_type]);
+	mem_in_bytes <<= PAGE_SHIFT;
+
+	sub_type_hdr->length_low = lower_32_bits(mem_in_bytes);
+	sub_type_hdr->length_high = upper_32_bits(mem_in_bytes);
+	sub_type_hdr->proximity_domain = proximity_domain;
+
+	return 0;
+}
+
+#ifdef CONFIG_X86_64
+static int kfd_fill_iolink_info_for_cpu(int numa_node_id, int *avail_size,
+				uint32_t *num_entries,
+				struct crat_subtype_iolink *sub_type_hdr)
+{
+	int nid;
+	struct cpuinfo_x86 *c = &cpu_data(0);
+	uint8_t link_type;
+
+	if (c->x86_vendor == X86_VENDOR_AMD)
+		link_type = CRAT_IOLINK_TYPE_HYPERTRANSPORT;
+	else
+		link_type = CRAT_IOLINK_TYPE_QPI_1_1;
+
+	*num_entries = 0;
+
+	/* Create IO links from this node to other CPU nodes */
+	for_each_online_node(nid) {
+		if (nid == numa_node_id) /* node itself */
+			continue;
+
+		*avail_size -= sizeof(struct crat_subtype_iolink);
+		if (*avail_size < 0)
+			return -ENOMEM;
+
+		memset(sub_type_hdr, 0, sizeof(struct crat_subtype_iolink));
+
+		/* Fill in subtype header data */
+		sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;
+		sub_type_hdr->length = sizeof(struct crat_subtype_iolink);
+		sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
+
+		/* Fill in IO link data */
+		sub_type_hdr->proximity_domain_from = numa_node_id;
+		sub_type_hdr->proximity_domain_to = nid;
+		sub_type_hdr->io_interface_type = link_type;
+
+		(*num_entries)++;
+		sub_type_hdr++;
+	}
+
+	return 0;
+}
+#endif
+
+/* kfd_create_vcrat_image_cpu - Create Virtual CRAT for CPU
+ *
+ *	@pcrat_image: Fill in VCRAT for CPU
+ *	@size:	[IN] allocated size of crat_image.
+ *		[OUT] actual size of data filled in crat_image
+ */
+static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size)
+{
+	struct crat_header *crat_table = (struct crat_header *)pcrat_image;
+	struct acpi_table_header *acpi_table;
+	acpi_status status;
+	struct crat_subtype_generic *sub_type_hdr;
+	int avail_size = *size;
+	int numa_node_id;
+#ifdef CONFIG_X86_64
+	uint32_t entries = 0;
+#endif
+	int ret = 0;
+
+	if (!pcrat_image)
+		return -EINVAL;
+
+	/* Fill in CRAT Header.
+	 * Modify length and total_entries as subunits are added.
+	 */
+	avail_size -= sizeof(struct crat_header);
+	if (avail_size < 0)
+		return -ENOMEM;
+
+	memset(crat_table, 0, sizeof(struct crat_header));
+	memcpy(&crat_table->signature, CRAT_SIGNATURE,
+			sizeof(crat_table->signature));
+	crat_table->length = sizeof(struct crat_header);
+
+	status = acpi_get_table("DSDT", 0, &acpi_table);
+	if (status != AE_OK)
+		pr_warn("DSDT table not found for OEM information\n");
+	else {
+		crat_table->oem_revision = acpi_table->revision;
+		memcpy(crat_table->oem_id, acpi_table->oem_id,
+				CRAT_OEMID_LENGTH);
+		memcpy(crat_table->oem_table_id, acpi_table->oem_table_id,
+				CRAT_OEMTABLEID_LENGTH);
+		acpi_put_table(acpi_table);
+	}
+	crat_table->total_entries = 0;
+	crat_table->num_domains = 0;
+
+	sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1);
+
+	for_each_online_node(numa_node_id) {
+		if (kfd_numa_node_to_apic_id(numa_node_id) == -1)
+			continue;
+
+		/* Fill in Subtype: Compute Unit */
+		ret = kfd_fill_cu_for_cpu(numa_node_id, &avail_size,
+			crat_table->num_domains,
+			(struct crat_subtype_computeunit *)sub_type_hdr);
+		if (ret < 0)
+			return ret;
+		crat_table->length += sub_type_hdr->length;
+		crat_table->total_entries++;
+
+		sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
+			sub_type_hdr->length);
+
+		/* Fill in Subtype: Memory */
+		ret = kfd_fill_mem_info_for_cpu(numa_node_id, &avail_size,
+			crat_table->num_domains,
+			(struct crat_subtype_memory *)sub_type_hdr);
+		if (ret < 0)
+			return ret;
+		crat_table->length += sub_type_hdr->length;
+		crat_table->total_entries++;
+
+		sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
+			sub_type_hdr->length);
+
+		/* Fill in Subtype: IO Link */
+#ifdef CONFIG_X86_64
+		ret = kfd_fill_iolink_info_for_cpu(numa_node_id, &avail_size,
+				&entries,
+				(struct crat_subtype_iolink *)sub_type_hdr);
+		if (ret < 0)
+			return ret;
+
+		if (entries) {
+			crat_table->length += (sub_type_hdr->length * entries);
+			crat_table->total_entries += entries;
+
+			sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
+					sub_type_hdr->length * entries);
+		}
+#else
+		pr_info("IO link not available for non x86 platforms\n");
+#endif
+
+		crat_table->num_domains++;
+	}
+
+	/* TODO: Add cache Subtype for CPU.
+	 * Currently, CPU cache information is available in function
+	 * detect_cache_attributes(cpu) defined in the file
+	 * ./arch/x86/kernel/cpu/intel_cacheinfo.c. This function is not
+	 * exported and to get the same information the code needs to be
+	 * duplicated.
+	 */
+
+	*size = crat_table->length;
+	pr_info("Virtual CRAT table created for CPU\n");
+
+	return 0;
+}
+
+static int kfd_fill_gpu_memory_affinity(int *avail_size,
+		struct kfd_node *kdev, uint8_t type, uint64_t size,
+		struct crat_subtype_memory *sub_type_hdr,
+		uint32_t proximity_domain,
+		const struct kfd_local_mem_info *local_mem_info)
+{
+	*avail_size -= sizeof(struct crat_subtype_memory);
+	if (*avail_size < 0)
+		return -ENOMEM;
+
+	memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_memory));
+	sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY;
+	sub_type_hdr->length = sizeof(struct crat_subtype_memory);
+	sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED;
+
+	sub_type_hdr->proximity_domain = proximity_domain;
+
+	pr_debug("Fill gpu memory affinity - type 0x%x size 0x%llx\n",
+			type, size);
+
+	sub_type_hdr->length_low = lower_32_bits(size);
+	sub_type_hdr->length_high = upper_32_bits(size);
+
+	sub_type_hdr->width = local_mem_info->vram_width;
+	sub_type_hdr->visibility_type = type;
+
+	return 0;
+}
+
+#ifdef CONFIG_ACPI_NUMA
+static void kfd_find_numa_node_in_srat(struct kfd_node *kdev)
+{
+	struct acpi_table_header *table_header = NULL;
+	struct acpi_subtable_header *sub_header = NULL;
+	unsigned long table_end, subtable_len;
+	u32 pci_id = pci_domain_nr(kdev->adev->pdev->bus) << 16 |
+			pci_dev_id(kdev->adev->pdev);
+	u32 bdf;
+	acpi_status status;
+	struct acpi_srat_cpu_affinity *cpu;
+	struct acpi_srat_generic_affinity *gpu;
+	int pxm = 0, max_pxm = 0;
+	int numa_node = NUMA_NO_NODE;
+	bool found = false;
+
+	/* Fetch the SRAT table from ACPI */
+	status = acpi_get_table(ACPI_SIG_SRAT, 0, &table_header);
+	if (status == AE_NOT_FOUND) {
+		pr_warn("SRAT table not found\n");
+		return;
+	} else if (ACPI_FAILURE(status)) {
+		const char *err = acpi_format_exception(status);
+		pr_err("SRAT table error: %s\n", err);
+		return;
+	}
+
+	table_end = (unsigned long)table_header + table_header->length;
+
+	/* Parse all entries looking for a match. */
+	sub_header = (struct acpi_subtable_header *)
+			((unsigned long)table_header +
+			sizeof(struct acpi_table_srat));
+	subtable_len = sub_header->length;
+
+	while (((unsigned long)sub_header) + subtable_len  < table_end) {
+		/*
+		 * If length is 0, break from this loop to avoid
+		 * infinite loop.
+		 */
+		if (subtable_len == 0) {
+			pr_err("SRAT invalid zero length\n");
+			break;
+		}
+
+		switch (sub_header->type) {
+		case ACPI_SRAT_TYPE_CPU_AFFINITY:
+			cpu = (struct acpi_srat_cpu_affinity *)sub_header;
+			pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
+					cpu->proximity_domain_lo;
+			if (pxm > max_pxm)
+				max_pxm = pxm;
+			break;
+		case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
+			gpu = (struct acpi_srat_generic_affinity *)sub_header;
+			bdf = *((u16 *)(&gpu->device_handle[0])) << 16 |
+					*((u16 *)(&gpu->device_handle[2]));
+			if (bdf == pci_id) {
+				found = true;
+				numa_node = pxm_to_node(gpu->proximity_domain);
+			}
+			break;
+		default:
+			break;
+		}
+
+		if (found)
+			break;
+
+		sub_header = (struct acpi_subtable_header *)
+				((unsigned long)sub_header + subtable_len);
+		subtable_len = sub_header->length;
+	}
+
+	acpi_put_table(table_header);
+
+	/* Workaround bad cpu-gpu binding case */
+	if (found && (numa_node < 0 ||
+			numa_node > pxm_to_node(max_pxm)))
+		numa_node = 0;
+
+	if (numa_node != NUMA_NO_NODE)
+		set_dev_node(&kdev->adev->pdev->dev, numa_node);
+}
+#endif
+
+#define KFD_CRAT_INTRA_SOCKET_WEIGHT	13
+#define KFD_CRAT_XGMI_WEIGHT		15
+
+/* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
+ * to its NUMA node
+ *	@avail_size: Available size in the memory
+ *	@kdev - [IN] GPU device
+ *	@sub_type_hdr: Memory into which io link info will be filled in
+ *	@proximity_domain - proximity domain of the GPU node
+ *
+ *	Return 0 if successful else return -ve value
+ */
+static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
+			struct kfd_node *kdev,
+			struct crat_subtype_iolink *sub_type_hdr,
+			uint32_t proximity_domain)
+{
+	*avail_size -= sizeof(struct crat_subtype_iolink);
+	if (*avail_size < 0)
+		return -ENOMEM;
+
+	memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_iolink));
+
+	/* Fill in subtype header data */
+	sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;
+	sub_type_hdr->length = sizeof(struct crat_subtype_iolink);
+	sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED;
+	if (kfd_dev_is_large_bar(kdev))
+		sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
+
+	/* Fill in IOLINK subtype.
+	 * TODO: Fill-in other fields of iolink subtype
+	 */
+	if (kdev->adev->gmc.xgmi.connected_to_cpu ||
+	    (KFD_GC_VERSION(kdev) == IP_VERSION(9, 4, 3) &&
+	     kdev->adev->smuio.funcs->get_pkg_type(kdev->adev) ==
+	     AMDGPU_PKG_TYPE_APU)) {
+		bool ext_cpu = KFD_GC_VERSION(kdev) != IP_VERSION(9, 4, 3);
+		int mem_bw = 819200, weight = ext_cpu ? KFD_CRAT_XGMI_WEIGHT :
+							KFD_CRAT_INTRA_SOCKET_WEIGHT;
+		uint32_t bandwidth = ext_cpu ? amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(
+							kdev->adev, NULL, true) : mem_bw;
+
+		/*
+		 * with host gpu xgmi link, host can access gpu memory whether
+		 * or not pcie bar type is large, so always create bidirectional
+		 * io link.
+		 */
+		sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
+		sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
+		sub_type_hdr->weight_xgmi = weight;
+		sub_type_hdr->minimum_bandwidth_mbs = bandwidth;
+		sub_type_hdr->maximum_bandwidth_mbs = bandwidth;
+	} else {
+		sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS;
+		sub_type_hdr->minimum_bandwidth_mbs =
+				amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, true);
+		sub_type_hdr->maximum_bandwidth_mbs =
+				amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, false);
+	}
+
+	sub_type_hdr->proximity_domain_from = proximity_domain;
+
+#ifdef CONFIG_ACPI_NUMA
+	if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE &&
+	    num_possible_nodes() > 1)
+		kfd_find_numa_node_in_srat(kdev);
+#endif
+#ifdef CONFIG_NUMA
+	if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE)
+		sub_type_hdr->proximity_domain_to = 0;
+	else
+		sub_type_hdr->proximity_domain_to = kdev->adev->pdev->dev.numa_node;
+#else
+	sub_type_hdr->proximity_domain_to = 0;
+#endif
+	return 0;
+}
+
+static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
+			struct kfd_node *kdev,
+			struct kfd_node *peer_kdev,
+			struct crat_subtype_iolink *sub_type_hdr,
+			uint32_t proximity_domain_from,
+			uint32_t proximity_domain_to)
+{
+	bool use_ta_info = kdev->kfd->num_nodes == 1;
+
+	*avail_size -= sizeof(struct crat_subtype_iolink);
+	if (*avail_size < 0)
+		return -ENOMEM;
+
+	memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_iolink));
+
+	sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;
+	sub_type_hdr->length = sizeof(struct crat_subtype_iolink);
+	sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED |
+			       CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
+
+	sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
+	sub_type_hdr->proximity_domain_from = proximity_domain_from;
+	sub_type_hdr->proximity_domain_to = proximity_domain_to;
+
+	if (use_ta_info) {
+		sub_type_hdr->weight_xgmi = KFD_CRAT_XGMI_WEIGHT *
+			amdgpu_amdkfd_get_xgmi_hops_count(kdev->adev, peer_kdev->adev);
+		sub_type_hdr->maximum_bandwidth_mbs =
+			amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev,
+							peer_kdev->adev, false);
+		sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr->maximum_bandwidth_mbs ?
+			amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, NULL, true) : 0;
+	} else {
+		bool is_single_hop = kdev->kfd == peer_kdev->kfd;
+		int weight = is_single_hop ? KFD_CRAT_INTRA_SOCKET_WEIGHT :
+			(2 * KFD_CRAT_INTRA_SOCKET_WEIGHT) + KFD_CRAT_XGMI_WEIGHT;
+		int mem_bw = 819200;
+
+		sub_type_hdr->weight_xgmi = weight;
+		sub_type_hdr->maximum_bandwidth_mbs = is_single_hop ? mem_bw : 0;
+		sub_type_hdr->minimum_bandwidth_mbs = is_single_hop ? mem_bw : 0;
+	}
+
+	return 0;
+}
+
+/* kfd_create_vcrat_image_gpu - Create Virtual CRAT for CPU
+ *
+ *	@pcrat_image: Fill in VCRAT for GPU
+ *	@size:	[IN] allocated size of crat_image.
+ *		[OUT] actual size of data filled in crat_image
+ */
+static int kfd_create_vcrat_image_gpu(void *pcrat_image,
+				      size_t *size, struct kfd_node *kdev,
+				      uint32_t proximity_domain)
+{
+	struct crat_header *crat_table = (struct crat_header *)pcrat_image;
+	struct crat_subtype_generic *sub_type_hdr;
+	struct kfd_local_mem_info local_mem_info;
+	struct kfd_topology_device *peer_dev;
+	struct crat_subtype_computeunit *cu;
+	struct kfd_cu_info cu_info;
+	int avail_size = *size;
+	uint32_t total_num_of_cu;
+	uint32_t nid = 0;
+	int ret = 0;
+
+	if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU)
+		return -EINVAL;
+
+	/* Fill the CRAT Header.
+	 * Modify length and total_entries as subunits are added.
+	 */
+	avail_size -= sizeof(struct crat_header);
+	if (avail_size < 0)
+		return -ENOMEM;
+
+	memset(crat_table, 0, sizeof(struct crat_header));
+
+	memcpy(&crat_table->signature, CRAT_SIGNATURE,
+			sizeof(crat_table->signature));
+	/* Change length as we add more subtypes*/
+	crat_table->length = sizeof(struct crat_header);
+	crat_table->num_domains = 1;
+	crat_table->total_entries = 0;
+
+	/* Fill in Subtype: Compute Unit
+	 * First fill in the sub type header and then sub type data
+	 */
+	avail_size -= sizeof(struct crat_subtype_computeunit);
+	if (avail_size < 0)
+		return -ENOMEM;
+
+	sub_type_hdr = (struct crat_subtype_generic *)(crat_table + 1);
+	memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit));
+
+	sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY;
+	sub_type_hdr->length = sizeof(struct crat_subtype_computeunit);
+	sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
+
+	/* Fill CU subtype data */
+	cu = (struct crat_subtype_computeunit *)sub_type_hdr;
+	cu->flags |= CRAT_CU_FLAGS_GPU_PRESENT;
+	cu->proximity_domain = proximity_domain;
+
+	amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info);
+	cu->num_simd_per_cu = cu_info.simd_per_cu;
+	cu->num_simd_cores = cu_info.simd_per_cu *
+			(cu_info.cu_active_number / kdev->kfd->num_nodes);
+	cu->max_waves_simd = cu_info.max_waves_per_simd;
+
+	cu->wave_front_size = cu_info.wave_front_size;
+	cu->array_count = cu_info.num_shader_arrays_per_engine *
+		cu_info.num_shader_engines;
+	total_num_of_cu = (cu->array_count * cu_info.num_cu_per_sh);
+	cu->processor_id_low = get_and_inc_gpu_processor_id(total_num_of_cu);
+	cu->num_cu_per_array = cu_info.num_cu_per_sh;
+	cu->max_slots_scatch_cu = cu_info.max_scratch_slots_per_cu;
+	cu->num_banks = cu_info.num_shader_engines;
+	cu->lds_size_in_kb = cu_info.lds_size;
+
+	cu->hsa_capability = 0;
+
+	crat_table->length += sub_type_hdr->length;
+	crat_table->total_entries++;
+
+	/* Fill in Subtype: Memory. Only on systems with large BAR (no
+	 * private FB), report memory as public. On other systems
+	 * report the total FB size (public+private) as a single
+	 * private heap.
+	 */
+	local_mem_info = kdev->local_mem_info;
+	sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
+			sub_type_hdr->length);
+
+	if (debug_largebar)
+		local_mem_info.local_mem_size_private = 0;
+
+	if (local_mem_info.local_mem_size_private == 0)
+		ret = kfd_fill_gpu_memory_affinity(&avail_size,
+				kdev, HSA_MEM_HEAP_TYPE_FB_PUBLIC,
+				local_mem_info.local_mem_size_public,
+				(struct crat_subtype_memory *)sub_type_hdr,
+				proximity_domain,
+				&local_mem_info);
+	else
+		ret = kfd_fill_gpu_memory_affinity(&avail_size,
+				kdev, HSA_MEM_HEAP_TYPE_FB_PRIVATE,
+				local_mem_info.local_mem_size_public +
+				local_mem_info.local_mem_size_private,
+				(struct crat_subtype_memory *)sub_type_hdr,
+				proximity_domain,
+				&local_mem_info);
+	if (ret < 0)
+		return ret;
+
+	crat_table->length += sizeof(struct crat_subtype_memory);
+	crat_table->total_entries++;
+
+	/* Fill in Subtype: IO_LINKS
+	 *  Only direct links are added here which is Link from GPU to
+	 *  its NUMA node. Indirect links are added by userspace.
+	 */
+	sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
+		sub_type_hdr->length);
+	ret = kfd_fill_gpu_direct_io_link_to_cpu(&avail_size, kdev,
+		(struct crat_subtype_iolink *)sub_type_hdr, proximity_domain);
+
+	if (ret < 0)
+		return ret;
+
+	crat_table->length += sub_type_hdr->length;
+	crat_table->total_entries++;
+
+
+	/* Fill in Subtype: IO_LINKS
+	 * Direct links from GPU to other GPUs through xGMI.
+	 * We will loop GPUs that already be processed (with lower value
+	 * of proximity_domain), add the link for the GPUs with same
+	 * hive id (from this GPU to other GPU) . The reversed iolink
+	 * (from other GPU to this GPU) will be added
+	 * in kfd_parse_subtype_iolink.
+	 */
+	if (kdev->kfd->hive_id) {
+		for (nid = 0; nid < proximity_domain; ++nid) {
+			peer_dev = kfd_topology_device_by_proximity_domain_no_lock(nid);
+			if (!peer_dev->gpu)
+				continue;
+			if (peer_dev->gpu->kfd->hive_id != kdev->kfd->hive_id)
+				continue;
+			sub_type_hdr = (typeof(sub_type_hdr))(
+				(char *)sub_type_hdr +
+				sizeof(struct crat_subtype_iolink));
+			ret = kfd_fill_gpu_xgmi_link_to_gpu(
+				&avail_size, kdev, peer_dev->gpu,
+				(struct crat_subtype_iolink *)sub_type_hdr,
+				proximity_domain, nid);
+			if (ret < 0)
+				return ret;
+			crat_table->length += sub_type_hdr->length;
+			crat_table->total_entries++;
+		}
+	}
+	*size = crat_table->length;
+	pr_info("Virtual CRAT table created for GPU\n");
+
+	return ret;
+}
+
+/* kfd_create_crat_image_virtual - Allocates memory for CRAT image and
+ *		creates a Virtual CRAT (VCRAT) image
+ *
+ * NOTE: Call kfd_destroy_crat_image to free CRAT image memory
+ *
+ *	@crat_image: VCRAT image created because ACPI does not have a
+ *		     CRAT for this device
+ *	@size: [OUT] size of virtual crat_image
+ *	@flags:	COMPUTE_UNIT_CPU - Create VCRAT for CPU device
+ *		COMPUTE_UNIT_GPU - Create VCRAT for GPU
+ *		(COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) - Create VCRAT for APU
+ *			-- this option is not currently implemented.
+ *			The assumption is that all AMD APUs will have CRAT
+ *	@kdev: Valid kfd_node required if flags contain COMPUTE_UNIT_GPU
+ *
+ *	Return 0 if successful else return -ve value
+ */
+int kfd_create_crat_image_virtual(void **crat_image, size_t *size,
+				  int flags, struct kfd_node *kdev,
+				  uint32_t proximity_domain)
+{
+	void *pcrat_image = NULL;
+	int ret = 0, num_nodes;
+	size_t dyn_size;
+
+	if (!crat_image)
+		return -EINVAL;
+
+	*crat_image = NULL;
+
+	/* Allocate the CPU Virtual CRAT size based on the number of online
+	 * nodes. Allocate VCRAT_SIZE_FOR_GPU for GPU virtual CRAT image.
+	 * This should cover all the current conditions. A check is put not
+	 * to overwrite beyond allocated size for GPUs
+	 */
+	switch (flags) {
+	case COMPUTE_UNIT_CPU:
+		num_nodes = num_online_nodes();
+		dyn_size = sizeof(struct crat_header) +
+			num_nodes * (sizeof(struct crat_subtype_computeunit) +
+			sizeof(struct crat_subtype_memory) +
+			(num_nodes - 1) * sizeof(struct crat_subtype_iolink));
+		pcrat_image = kvmalloc(dyn_size, GFP_KERNEL);
+		if (!pcrat_image)
+			return -ENOMEM;
+		*size = dyn_size;
+		pr_debug("CRAT size is %ld", dyn_size);
+		ret = kfd_create_vcrat_image_cpu(pcrat_image, size);
+		break;
+	case COMPUTE_UNIT_GPU:
+		if (!kdev)
+			return -EINVAL;
+		pcrat_image = kvmalloc(VCRAT_SIZE_FOR_GPU, GFP_KERNEL);
+		if (!pcrat_image)
+			return -ENOMEM;
+		*size = VCRAT_SIZE_FOR_GPU;
+		ret = kfd_create_vcrat_image_gpu(pcrat_image, size, kdev,
+						 proximity_domain);
+		break;
+	case (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU):
+		/* TODO: */
+		ret = -EINVAL;
+		pr_err("VCRAT not implemented for APU\n");
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	if (!ret)
+		*crat_image = pcrat_image;
+	else
+		kvfree(pcrat_image);
+
+	return ret;
+}
+
+
+/* kfd_destroy_crat_image
+ *
+ *	@crat_image: [IN] - crat_image from kfd_create_crat_image_xxx(..)
+ *
+ */
+void kfd_destroy_crat_image(void *crat_image)
+{
+	kvfree(crat_image);
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
new file mode 100644
index 000000000..74c2d7a0d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
@@ -0,0 +1,321 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef KFD_CRAT_H_INCLUDED
+#define KFD_CRAT_H_INCLUDED
+
+#include <linux/types.h>
+
+#pragma pack(1)
+
+/*
+ * 4CC signature value for the CRAT ACPI table
+ */
+
+#define CRAT_SIGNATURE	"CRAT"
+
+/*
+ * Component Resource Association Table (CRAT)
+ */
+
+#define CRAT_OEMID_LENGTH	6
+#define CRAT_OEMTABLEID_LENGTH	8
+#define CRAT_RESERVED_LENGTH	6
+
+#define CRAT_OEMID_64BIT_MASK ((1ULL << (CRAT_OEMID_LENGTH * 8)) - 1)
+
+/* Compute Unit flags */
+#define COMPUTE_UNIT_CPU	(1 << 0)  /* Create Virtual CRAT for CPU */
+#define COMPUTE_UNIT_GPU	(1 << 1)  /* Create Virtual CRAT for GPU */
+
+struct crat_header {
+	uint32_t	signature;
+	uint32_t	length;
+	uint8_t		revision;
+	uint8_t		checksum;
+	uint8_t		oem_id[CRAT_OEMID_LENGTH];
+	uint8_t		oem_table_id[CRAT_OEMTABLEID_LENGTH];
+	uint32_t	oem_revision;
+	uint32_t	creator_id;
+	uint32_t	creator_revision;
+	uint32_t	total_entries;
+	uint16_t	num_domains;
+	uint8_t		reserved[CRAT_RESERVED_LENGTH];
+};
+
+/*
+ * The header structure is immediately followed by total_entries of the
+ * data definitions
+ */
+
+/*
+ * The currently defined subtype entries in the CRAT
+ */
+#define CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY	0
+#define CRAT_SUBTYPE_MEMORY_AFFINITY		1
+#define CRAT_SUBTYPE_CACHE_AFFINITY		2
+#define CRAT_SUBTYPE_TLB_AFFINITY		3
+#define CRAT_SUBTYPE_CCOMPUTE_AFFINITY		4
+#define CRAT_SUBTYPE_IOLINK_AFFINITY		5
+#define CRAT_SUBTYPE_MAX			6
+
+/*
+ * Do not change the value of CRAT_SIBLINGMAP_SIZE from 32
+ * as it breaks the ABI.
+ */
+#define CRAT_SIBLINGMAP_SIZE	32
+
+/*
+ * ComputeUnit Affinity structure and definitions
+ */
+#define CRAT_CU_FLAGS_ENABLED		0x00000001
+#define CRAT_CU_FLAGS_HOT_PLUGGABLE	0x00000002
+#define CRAT_CU_FLAGS_CPU_PRESENT	0x00000004
+#define CRAT_CU_FLAGS_GPU_PRESENT	0x00000008
+#define CRAT_CU_FLAGS_IOMMU_PRESENT	0x00000010
+#define CRAT_CU_FLAGS_RESERVED		0xffffffe0
+
+#define CRAT_COMPUTEUNIT_RESERVED_LENGTH 4
+
+struct crat_subtype_computeunit {
+	uint8_t		type;
+	uint8_t		length;
+	uint16_t	reserved;
+	uint32_t	flags;
+	uint32_t	proximity_domain;
+	uint32_t	processor_id_low;
+	uint16_t	num_cpu_cores;
+	uint16_t	num_simd_cores;
+	uint16_t	max_waves_simd;
+	uint16_t	io_count;
+	uint16_t	hsa_capability;
+	uint16_t	lds_size_in_kb;
+	uint8_t		wave_front_size;
+	uint8_t		num_banks;
+	uint16_t	micro_engine_id;
+	uint8_t		array_count;
+	uint8_t		num_cu_per_array;
+	uint8_t		num_simd_per_cu;
+	uint8_t		max_slots_scatch_cu;
+	uint8_t		reserved2[CRAT_COMPUTEUNIT_RESERVED_LENGTH];
+};
+
+/*
+ * HSA Memory Affinity structure and definitions
+ */
+#define CRAT_MEM_FLAGS_ENABLED		0x00000001
+#define CRAT_MEM_FLAGS_HOT_PLUGGABLE	0x00000002
+#define CRAT_MEM_FLAGS_NON_VOLATILE	0x00000004
+#define CRAT_MEM_FLAGS_RESERVED		0xfffffff8
+
+#define CRAT_MEMORY_RESERVED_LENGTH 8
+
+struct crat_subtype_memory {
+	uint8_t		type;
+	uint8_t		length;
+	uint16_t	reserved;
+	uint32_t	flags;
+	uint32_t	proximity_domain;
+	uint32_t	base_addr_low;
+	uint32_t	base_addr_high;
+	uint32_t	length_low;
+	uint32_t	length_high;
+	uint32_t	width;
+	uint8_t		visibility_type; /* for virtual (dGPU) CRAT */
+	uint8_t		reserved2[CRAT_MEMORY_RESERVED_LENGTH - 1];
+};
+
+/*
+ * HSA Cache Affinity structure and definitions
+ */
+#define CRAT_CACHE_FLAGS_ENABLED	0x00000001
+#define CRAT_CACHE_FLAGS_DATA_CACHE	0x00000002
+#define CRAT_CACHE_FLAGS_INST_CACHE	0x00000004
+#define CRAT_CACHE_FLAGS_CPU_CACHE	0x00000008
+#define CRAT_CACHE_FLAGS_SIMD_CACHE	0x00000010
+#define CRAT_CACHE_FLAGS_RESERVED	0xffffffe0
+
+#define CRAT_CACHE_RESERVED_LENGTH 8
+
+struct crat_subtype_cache {
+	uint8_t		type;
+	uint8_t		length;
+	uint16_t	reserved;
+	uint32_t	flags;
+	uint32_t	processor_id_low;
+	uint8_t		sibling_map[CRAT_SIBLINGMAP_SIZE];
+	uint32_t	cache_size;
+	uint8_t		cache_level;
+	uint8_t		lines_per_tag;
+	uint16_t	cache_line_size;
+	uint8_t		associativity;
+	uint8_t		cache_properties;
+	uint16_t	cache_latency;
+	uint8_t		reserved2[CRAT_CACHE_RESERVED_LENGTH];
+};
+
+/*
+ * HSA TLB Affinity structure and definitions
+ */
+#define CRAT_TLB_FLAGS_ENABLED	0x00000001
+#define CRAT_TLB_FLAGS_DATA_TLB	0x00000002
+#define CRAT_TLB_FLAGS_INST_TLB	0x00000004
+#define CRAT_TLB_FLAGS_CPU_TLB	0x00000008
+#define CRAT_TLB_FLAGS_SIMD_TLB	0x00000010
+#define CRAT_TLB_FLAGS_RESERVED	0xffffffe0
+
+#define CRAT_TLB_RESERVED_LENGTH 4
+
+struct crat_subtype_tlb {
+	uint8_t		type;
+	uint8_t		length;
+	uint16_t	reserved;
+	uint32_t	flags;
+	uint32_t	processor_id_low;
+	uint8_t		sibling_map[CRAT_SIBLINGMAP_SIZE];
+	uint32_t	tlb_level;
+	uint8_t		data_tlb_associativity_2mb;
+	uint8_t		data_tlb_size_2mb;
+	uint8_t		instruction_tlb_associativity_2mb;
+	uint8_t		instruction_tlb_size_2mb;
+	uint8_t		data_tlb_associativity_4k;
+	uint8_t		data_tlb_size_4k;
+	uint8_t		instruction_tlb_associativity_4k;
+	uint8_t		instruction_tlb_size_4k;
+	uint8_t		data_tlb_associativity_1gb;
+	uint8_t		data_tlb_size_1gb;
+	uint8_t		instruction_tlb_associativity_1gb;
+	uint8_t		instruction_tlb_size_1gb;
+	uint8_t		reserved2[CRAT_TLB_RESERVED_LENGTH];
+};
+
+/*
+ * HSA CCompute/APU Affinity structure and definitions
+ */
+#define CRAT_CCOMPUTE_FLAGS_ENABLED	0x00000001
+#define CRAT_CCOMPUTE_FLAGS_RESERVED	0xfffffffe
+
+#define CRAT_CCOMPUTE_RESERVED_LENGTH 16
+
+struct crat_subtype_ccompute {
+	uint8_t		type;
+	uint8_t		length;
+	uint16_t	reserved;
+	uint32_t	flags;
+	uint32_t	processor_id_low;
+	uint8_t		sibling_map[CRAT_SIBLINGMAP_SIZE];
+	uint32_t	apu_size;
+	uint8_t		reserved2[CRAT_CCOMPUTE_RESERVED_LENGTH];
+};
+
+/*
+ * HSA IO Link Affinity structure and definitions
+ */
+#define CRAT_IOLINK_FLAGS_ENABLED		(1 << 0)
+#define CRAT_IOLINK_FLAGS_NON_COHERENT		(1 << 1)
+#define CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT	(1 << 2)
+#define CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT	(1 << 3)
+#define CRAT_IOLINK_FLAGS_NO_PEER_TO_PEER_DMA	(1 << 4)
+#define CRAT_IOLINK_FLAGS_BI_DIRECTIONAL	(1 << 31)
+#define CRAT_IOLINK_FLAGS_RESERVED_MASK		0x7fffffe0
+
+/*
+ * IO interface types
+ */
+#define CRAT_IOLINK_TYPE_UNDEFINED	0
+#define CRAT_IOLINK_TYPE_HYPERTRANSPORT	1
+#define CRAT_IOLINK_TYPE_PCIEXPRESS	2
+#define CRAT_IOLINK_TYPE_AMBA		3
+#define CRAT_IOLINK_TYPE_MIPI		4
+#define CRAT_IOLINK_TYPE_QPI_1_1	5
+#define CRAT_IOLINK_TYPE_RESERVED1	6
+#define CRAT_IOLINK_TYPE_RESERVED2	7
+#define CRAT_IOLINK_TYPE_RAPID_IO	8
+#define CRAT_IOLINK_TYPE_INFINIBAND	9
+#define CRAT_IOLINK_TYPE_RESERVED3	10
+#define CRAT_IOLINK_TYPE_XGMI		11
+#define CRAT_IOLINK_TYPE_XGOP		12
+#define CRAT_IOLINK_TYPE_GZ		13
+#define CRAT_IOLINK_TYPE_ETHERNET_RDMA	14
+#define CRAT_IOLINK_TYPE_RDMA_OTHER	15
+#define CRAT_IOLINK_TYPE_OTHER		16
+#define CRAT_IOLINK_TYPE_MAX		255
+
+#define CRAT_IOLINK_RESERVED_LENGTH	24
+
+struct crat_subtype_iolink {
+	uint8_t		type;
+	uint8_t		length;
+	uint16_t	reserved;
+	uint32_t	flags;
+	uint32_t	proximity_domain_from;
+	uint32_t	proximity_domain_to;
+	uint8_t		io_interface_type;
+	uint8_t		version_major;
+	uint16_t	version_minor;
+	uint32_t	minimum_latency;
+	uint32_t	maximum_latency;
+	uint32_t	minimum_bandwidth_mbs;
+	uint32_t	maximum_bandwidth_mbs;
+	uint32_t	recommended_transfer_size;
+	uint8_t		reserved2[CRAT_IOLINK_RESERVED_LENGTH - 1];
+	uint8_t		weight_xgmi;
+};
+
+/*
+ * HSA generic sub-type header
+ */
+
+#define CRAT_SUBTYPE_FLAGS_ENABLED 0x00000001
+
+struct crat_subtype_generic {
+	uint8_t		type;
+	uint8_t		length;
+	uint16_t	reserved;
+	uint32_t	flags;
+};
+
+#pragma pack()
+
+struct kfd_node;
+
+/* Static table to describe GPU Cache information */
+struct kfd_gpu_cache_info {
+	uint32_t	cache_size;
+	uint32_t	cache_level;
+	uint32_t	flags;
+	/* Indicates how many Compute Units share this cache
+	 * within a SA. Value = 1 indicates the cache is not shared
+	 */
+	uint32_t	num_cu_shared;
+};
+int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info);
+
+void kfd_destroy_crat_image(void *crat_image);
+int kfd_parse_crat_table(void *crat_image, struct list_head *device_list,
+			 uint32_t proximity_domain);
+int kfd_create_crat_image_virtual(void **crat_image, size_t *size,
+				  int flags, struct kfd_node *kdev,
+				  uint32_t proximity_domain);
+
+#endif /* KFD_CRAT_H_INCLUDED */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
new file mode 100644
index 000000000..9ec750666
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
@@ -0,0 +1,1120 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "kfd_debug.h"
+#include "kfd_device_queue_manager.h"
+#include "kfd_topology.h"
+#include <linux/file.h>
+#include <uapi/linux/kfd_ioctl.h>
+
+#define MAX_WATCH_ADDRESSES	4
+
+int kfd_dbg_ev_query_debug_event(struct kfd_process *process,
+		      unsigned int *queue_id,
+		      unsigned int *gpu_id,
+		      uint64_t exception_clear_mask,
+		      uint64_t *event_status)
+{
+	struct process_queue_manager *pqm;
+	struct process_queue_node *pqn;
+	int i;
+
+	if (!(process && process->debug_trap_enabled))
+		return -ENODATA;
+
+	mutex_lock(&process->event_mutex);
+	*event_status = 0;
+	*queue_id = 0;
+	*gpu_id = 0;
+
+	/* find and report queue events */
+	pqm = &process->pqm;
+	list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
+		uint64_t tmp = process->exception_enable_mask;
+
+		if (!pqn->q)
+			continue;
+
+		tmp &= pqn->q->properties.exception_status;
+
+		if (!tmp)
+			continue;
+
+		*event_status = pqn->q->properties.exception_status;
+		*queue_id = pqn->q->properties.queue_id;
+		*gpu_id = pqn->q->device->id;
+		pqn->q->properties.exception_status &= ~exception_clear_mask;
+		goto out;
+	}
+
+	/* find and report device events */
+	for (i = 0; i < process->n_pdds; i++) {
+		struct kfd_process_device *pdd = process->pdds[i];
+		uint64_t tmp = process->exception_enable_mask
+						& pdd->exception_status;
+
+		if (!tmp)
+			continue;
+
+		*event_status = pdd->exception_status;
+		*gpu_id = pdd->dev->id;
+		pdd->exception_status &= ~exception_clear_mask;
+		goto out;
+	}
+
+	/* report process events */
+	if (process->exception_enable_mask & process->exception_status) {
+		*event_status = process->exception_status;
+		process->exception_status &= ~exception_clear_mask;
+	}
+
+out:
+	mutex_unlock(&process->event_mutex);
+	return *event_status ? 0 : -EAGAIN;
+}
+
+void debug_event_write_work_handler(struct work_struct *work)
+{
+	struct kfd_process *process;
+
+	static const char write_data = '.';
+	loff_t pos = 0;
+
+	process = container_of(work,
+			struct kfd_process,
+			debug_event_workarea);
+
+	kernel_write(process->dbg_ev_file, &write_data, 1, &pos);
+}
+
+/* update process/device/queue exception status, write to descriptor
+ * only if exception_status is enabled.
+ */
+bool kfd_dbg_ev_raise(uint64_t event_mask,
+			struct kfd_process *process, struct kfd_node *dev,
+			unsigned int source_id, bool use_worker,
+			void *exception_data, size_t exception_data_size)
+{
+	struct process_queue_manager *pqm;
+	struct process_queue_node *pqn;
+	int i;
+	static const char write_data = '.';
+	loff_t pos = 0;
+	bool is_subscribed = true;
+
+	if (!(process && process->debug_trap_enabled))
+		return false;
+
+	mutex_lock(&process->event_mutex);
+
+	if (event_mask & KFD_EC_MASK_DEVICE) {
+		for (i = 0; i < process->n_pdds; i++) {
+			struct kfd_process_device *pdd = process->pdds[i];
+
+			if (pdd->dev != dev)
+				continue;
+
+			pdd->exception_status |= event_mask & KFD_EC_MASK_DEVICE;
+
+			if (event_mask & KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION)) {
+				if (!pdd->vm_fault_exc_data) {
+					pdd->vm_fault_exc_data = kmemdup(
+							exception_data,
+							exception_data_size,
+							GFP_KERNEL);
+					if (!pdd->vm_fault_exc_data)
+						pr_debug("Failed to allocate exception data memory");
+				} else {
+					pr_debug("Debugger exception data not saved\n");
+					print_hex_dump_bytes("exception data: ",
+							DUMP_PREFIX_OFFSET,
+							exception_data,
+							exception_data_size);
+				}
+			}
+			break;
+		}
+	} else if (event_mask & KFD_EC_MASK_PROCESS) {
+		process->exception_status |= event_mask & KFD_EC_MASK_PROCESS;
+	} else {
+		pqm = &process->pqm;
+		list_for_each_entry(pqn, &pqm->queues,
+				process_queue_list) {
+			int target_id;
+
+			if (!pqn->q)
+				continue;
+
+			target_id = event_mask & KFD_EC_MASK(EC_QUEUE_NEW) ?
+					pqn->q->properties.queue_id :
+							pqn->q->doorbell_id;
+
+			if (pqn->q->device != dev || target_id != source_id)
+				continue;
+
+			pqn->q->properties.exception_status |= event_mask;
+			break;
+		}
+	}
+
+	if (process->exception_enable_mask & event_mask) {
+		if (use_worker)
+			schedule_work(&process->debug_event_workarea);
+		else
+			kernel_write(process->dbg_ev_file,
+					&write_data,
+					1,
+					&pos);
+	} else {
+		is_subscribed = false;
+	}
+
+	mutex_unlock(&process->event_mutex);
+
+	return is_subscribed;
+}
+
+/* set pending event queue entry from ring entry  */
+bool kfd_set_dbg_ev_from_interrupt(struct kfd_node *dev,
+				   unsigned int pasid,
+				   uint32_t doorbell_id,
+				   uint64_t trap_mask,
+				   void *exception_data,
+				   size_t exception_data_size)
+{
+	struct kfd_process *p;
+	bool signaled_to_debugger_or_runtime = false;
+
+	p = kfd_lookup_process_by_pasid(pasid);
+
+	if (!p)
+		return false;
+
+	if (!kfd_dbg_ev_raise(trap_mask, p, dev, doorbell_id, true,
+			      exception_data, exception_data_size)) {
+		struct process_queue_manager *pqm;
+		struct process_queue_node *pqn;
+
+		if (!!(trap_mask & KFD_EC_MASK_QUEUE) &&
+		       p->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED) {
+			mutex_lock(&p->mutex);
+
+			pqm = &p->pqm;
+			list_for_each_entry(pqn, &pqm->queues,
+							process_queue_list) {
+
+				if (!(pqn->q && pqn->q->device == dev &&
+				      pqn->q->doorbell_id == doorbell_id))
+					continue;
+
+				kfd_send_exception_to_runtime(p, pqn->q->properties.queue_id,
+							      trap_mask);
+
+				signaled_to_debugger_or_runtime = true;
+
+				break;
+			}
+
+			mutex_unlock(&p->mutex);
+		} else if (trap_mask & KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION)) {
+			kfd_dqm_evict_pasid(dev->dqm, p->pasid);
+			kfd_signal_vm_fault_event(dev, p->pasid, NULL,
+							exception_data);
+
+			signaled_to_debugger_or_runtime = true;
+		}
+	} else {
+		signaled_to_debugger_or_runtime = true;
+	}
+
+	kfd_unref_process(p);
+
+	return signaled_to_debugger_or_runtime;
+}
+
+int kfd_dbg_send_exception_to_runtime(struct kfd_process *p,
+					unsigned int dev_id,
+					unsigned int queue_id,
+					uint64_t error_reason)
+{
+	if (error_reason & KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION)) {
+		struct kfd_process_device *pdd = NULL;
+		struct kfd_hsa_memory_exception_data *data;
+		int i;
+
+		for (i = 0; i < p->n_pdds; i++) {
+			if (p->pdds[i]->dev->id == dev_id) {
+				pdd = p->pdds[i];
+				break;
+			}
+		}
+
+		if (!pdd)
+			return -ENODEV;
+
+		data = (struct kfd_hsa_memory_exception_data *)
+						pdd->vm_fault_exc_data;
+
+		kfd_dqm_evict_pasid(pdd->dev->dqm, p->pasid);
+		kfd_signal_vm_fault_event(pdd->dev, p->pasid, NULL, data);
+		error_reason &= ~KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION);
+	}
+
+	if (error_reason & (KFD_EC_MASK(EC_PROCESS_RUNTIME))) {
+		/*
+		 * block should only happen after the debugger receives runtime
+		 * enable notice.
+		 */
+		up(&p->runtime_enable_sema);
+		error_reason &= ~KFD_EC_MASK(EC_PROCESS_RUNTIME);
+	}
+
+	if (error_reason)
+		return kfd_send_exception_to_runtime(p, queue_id, error_reason);
+
+	return 0;
+}
+
+static int kfd_dbg_set_queue_workaround(struct queue *q, bool enable)
+{
+	struct mqd_update_info minfo = {0};
+	int err;
+
+	if (!q)
+		return 0;
+
+	if (!kfd_dbg_has_cwsr_workaround(q->device))
+		return 0;
+
+	if (enable && q->properties.is_user_cu_masked)
+		return -EBUSY;
+
+	minfo.update_flag = enable ? UPDATE_FLAG_DBG_WA_ENABLE : UPDATE_FLAG_DBG_WA_DISABLE;
+
+	q->properties.is_dbg_wa = enable;
+	err = q->device->dqm->ops.update_queue(q->device->dqm, q, &minfo);
+	if (err)
+		q->properties.is_dbg_wa = false;
+
+	return err;
+}
+
+static int kfd_dbg_set_workaround(struct kfd_process *target, bool enable)
+{
+	struct process_queue_manager *pqm = &target->pqm;
+	struct process_queue_node *pqn;
+	int r = 0;
+
+	list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
+		r = kfd_dbg_set_queue_workaround(pqn->q, enable);
+		if (enable && r)
+			goto unwind;
+	}
+
+	return 0;
+
+unwind:
+	list_for_each_entry(pqn, &pqm->queues, process_queue_list)
+		kfd_dbg_set_queue_workaround(pqn->q, false);
+
+	if (enable)
+		target->runtime_info.runtime_state = r == -EBUSY ?
+				DEBUG_RUNTIME_STATE_ENABLED_BUSY :
+				DEBUG_RUNTIME_STATE_ENABLED_ERROR;
+
+	return r;
+}
+
+int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd, bool sq_trap_en)
+{
+	uint32_t spi_dbg_cntl = pdd->spi_dbg_override | pdd->spi_dbg_launch_mode;
+	uint32_t flags = pdd->process->dbg_flags;
+
+	if (!kfd_dbg_is_per_vmid_supported(pdd->dev))
+		return 0;
+
+	return amdgpu_mes_set_shader_debugger(pdd->dev->adev, pdd->proc_ctx_gpu_addr, spi_dbg_cntl,
+						pdd->watch_points, flags, sq_trap_en);
+}
+
+#define KFD_DEBUGGER_INVALID_WATCH_POINT_ID -1
+static int kfd_dbg_get_dev_watch_id(struct kfd_process_device *pdd, int *watch_id)
+{
+	int i;
+
+	*watch_id = KFD_DEBUGGER_INVALID_WATCH_POINT_ID;
+
+	spin_lock(&pdd->dev->kfd->watch_points_lock);
+
+	for (i = 0; i < MAX_WATCH_ADDRESSES; i++) {
+		/* device watchpoint in use so skip */
+		if ((pdd->dev->kfd->alloc_watch_ids >> i) & 0x1)
+			continue;
+
+		pdd->alloc_watch_ids |= 0x1 << i;
+		pdd->dev->kfd->alloc_watch_ids |= 0x1 << i;
+		*watch_id = i;
+		spin_unlock(&pdd->dev->kfd->watch_points_lock);
+		return 0;
+	}
+
+	spin_unlock(&pdd->dev->kfd->watch_points_lock);
+
+	return -ENOMEM;
+}
+
+static void kfd_dbg_clear_dev_watch_id(struct kfd_process_device *pdd, int watch_id)
+{
+	spin_lock(&pdd->dev->kfd->watch_points_lock);
+
+	/* process owns device watch point so safe to clear */
+	if ((pdd->alloc_watch_ids >> watch_id) & 0x1) {
+		pdd->alloc_watch_ids &= ~(0x1 << watch_id);
+		pdd->dev->kfd->alloc_watch_ids &= ~(0x1 << watch_id);
+	}
+
+	spin_unlock(&pdd->dev->kfd->watch_points_lock);
+}
+
+static bool kfd_dbg_owns_dev_watch_id(struct kfd_process_device *pdd, int watch_id)
+{
+	bool owns_watch_id = false;
+
+	spin_lock(&pdd->dev->kfd->watch_points_lock);
+	owns_watch_id = watch_id < MAX_WATCH_ADDRESSES &&
+			((pdd->alloc_watch_ids >> watch_id) & 0x1);
+
+	spin_unlock(&pdd->dev->kfd->watch_points_lock);
+
+	return owns_watch_id;
+}
+
+int kfd_dbg_trap_clear_dev_address_watch(struct kfd_process_device *pdd,
+					uint32_t watch_id)
+{
+	int r;
+
+	if (!kfd_dbg_owns_dev_watch_id(pdd, watch_id))
+		return -EINVAL;
+
+	if (!pdd->dev->kfd->shared_resources.enable_mes) {
+		r = debug_lock_and_unmap(pdd->dev->dqm);
+		if (r)
+			return r;
+	}
+
+	amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
+	pdd->watch_points[watch_id] = pdd->dev->kfd2kgd->clear_address_watch(
+							pdd->dev->adev,
+							watch_id);
+	amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
+
+	if (!pdd->dev->kfd->shared_resources.enable_mes)
+		r = debug_map_and_unlock(pdd->dev->dqm);
+	else
+		r = kfd_dbg_set_mes_debug_mode(pdd, true);
+
+	kfd_dbg_clear_dev_watch_id(pdd, watch_id);
+
+	return r;
+}
+
+int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd,
+					uint64_t watch_address,
+					uint32_t watch_address_mask,
+					uint32_t *watch_id,
+					uint32_t watch_mode)
+{
+	int xcc_id, r = kfd_dbg_get_dev_watch_id(pdd, watch_id);
+	uint32_t xcc_mask = pdd->dev->xcc_mask;
+
+	if (r)
+		return r;
+
+	if (!pdd->dev->kfd->shared_resources.enable_mes) {
+		r = debug_lock_and_unmap(pdd->dev->dqm);
+		if (r) {
+			kfd_dbg_clear_dev_watch_id(pdd, *watch_id);
+			return r;
+		}
+	}
+
+	amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
+	for_each_inst(xcc_id, xcc_mask)
+		pdd->watch_points[*watch_id] = pdd->dev->kfd2kgd->set_address_watch(
+				pdd->dev->adev,
+				watch_address,
+				watch_address_mask,
+				*watch_id,
+				watch_mode,
+				pdd->dev->vm_info.last_vmid_kfd,
+				xcc_id);
+	amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
+
+	if (!pdd->dev->kfd->shared_resources.enable_mes)
+		r = debug_map_and_unlock(pdd->dev->dqm);
+	else
+		r = kfd_dbg_set_mes_debug_mode(pdd, true);
+
+	/* HWS is broken so no point in HW rollback but release the watchpoint anyways */
+	if (r)
+		kfd_dbg_clear_dev_watch_id(pdd, *watch_id);
+
+	return 0;
+}
+
+static void kfd_dbg_clear_process_address_watch(struct kfd_process *target)
+{
+	int i, j;
+
+	for (i = 0; i < target->n_pdds; i++)
+		for (j = 0; j < MAX_WATCH_ADDRESSES; j++)
+			kfd_dbg_trap_clear_dev_address_watch(target->pdds[i], j);
+}
+
+int kfd_dbg_trap_set_flags(struct kfd_process *target, uint32_t *flags)
+{
+	uint32_t prev_flags = target->dbg_flags;
+	int i, r = 0, rewind_count = 0;
+
+	for (i = 0; i < target->n_pdds; i++) {
+		if (!kfd_dbg_is_per_vmid_supported(target->pdds[i]->dev) &&
+			(*flags & KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP)) {
+			*flags = prev_flags;
+			return -EACCES;
+		}
+	}
+
+	target->dbg_flags = *flags & KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP;
+	*flags = prev_flags;
+	for (i = 0; i < target->n_pdds; i++) {
+		struct kfd_process_device *pdd = target->pdds[i];
+
+		if (!kfd_dbg_is_per_vmid_supported(pdd->dev))
+			continue;
+
+		if (!pdd->dev->kfd->shared_resources.enable_mes)
+			r = debug_refresh_runlist(pdd->dev->dqm);
+		else
+			r = kfd_dbg_set_mes_debug_mode(pdd, true);
+
+		if (r) {
+			target->dbg_flags = prev_flags;
+			break;
+		}
+
+		rewind_count++;
+	}
+
+	/* Rewind flags */
+	if (r) {
+		target->dbg_flags = prev_flags;
+
+		for (i = 0; i < rewind_count; i++) {
+			struct kfd_process_device *pdd = target->pdds[i];
+
+			if (!kfd_dbg_is_per_vmid_supported(pdd->dev))
+				continue;
+
+			if (!pdd->dev->kfd->shared_resources.enable_mes)
+				debug_refresh_runlist(pdd->dev->dqm);
+			else
+				kfd_dbg_set_mes_debug_mode(pdd, true);
+		}
+	}
+
+	return r;
+}
+
+/* kfd_dbg_trap_deactivate:
+ *	target: target process
+ *	unwind: If this is unwinding a failed kfd_dbg_trap_enable()
+ *	unwind_count:
+ *		If unwind == true, how far down the pdd list we need
+ *				to unwind
+ *		else: ignored
+ */
+void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind_count)
+{
+	int i;
+
+	if (!unwind) {
+		uint32_t flags = 0;
+		int resume_count = resume_queues(target, 0, NULL);
+
+		if (resume_count)
+			pr_debug("Resumed %d queues\n", resume_count);
+
+		cancel_work_sync(&target->debug_event_workarea);
+		kfd_dbg_clear_process_address_watch(target);
+		kfd_dbg_trap_set_wave_launch_mode(target, 0);
+
+		kfd_dbg_trap_set_flags(target, &flags);
+	}
+
+	for (i = 0; i < target->n_pdds; i++) {
+		struct kfd_process_device *pdd = target->pdds[i];
+
+		/* If this is an unwind, and we have unwound the required
+		 * enable calls on the pdd list, we need to stop now
+		 * otherwise we may mess up another debugger session.
+		 */
+		if (unwind && i == unwind_count)
+			break;
+
+		kfd_process_set_trap_debug_flag(&pdd->qpd, false);
+
+		/* GFX off is already disabled by debug activate if not RLC restore supported. */
+		if (kfd_dbg_is_rlc_restore_supported(pdd->dev))
+			amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
+		pdd->spi_dbg_override =
+				pdd->dev->kfd2kgd->disable_debug_trap(
+				pdd->dev->adev,
+				target->runtime_info.ttmp_setup,
+				pdd->dev->vm_info.last_vmid_kfd);
+		amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
+
+		if (!kfd_dbg_is_per_vmid_supported(pdd->dev) &&
+				release_debug_trap_vmid(pdd->dev->dqm, &pdd->qpd))
+			pr_err("Failed to release debug vmid on [%i]\n", pdd->dev->id);
+
+		if (!pdd->dev->kfd->shared_resources.enable_mes)
+			debug_refresh_runlist(pdd->dev->dqm);
+		else
+			kfd_dbg_set_mes_debug_mode(pdd, !kfd_dbg_has_cwsr_workaround(pdd->dev));
+	}
+
+	kfd_dbg_set_workaround(target, false);
+}
+
+static void kfd_dbg_clean_exception_status(struct kfd_process *target)
+{
+	struct process_queue_manager *pqm;
+	struct process_queue_node *pqn;
+	int i;
+
+	for (i = 0; i < target->n_pdds; i++) {
+		struct kfd_process_device *pdd = target->pdds[i];
+
+		kfd_process_drain_interrupts(pdd);
+
+		pdd->exception_status = 0;
+	}
+
+	pqm = &target->pqm;
+	list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
+		if (!pqn->q)
+			continue;
+
+		pqn->q->properties.exception_status = 0;
+	}
+
+	target->exception_status = 0;
+}
+
+int kfd_dbg_trap_disable(struct kfd_process *target)
+{
+	if (!target->debug_trap_enabled)
+		return 0;
+
+	/*
+	 * Defer deactivation to runtime if runtime not enabled otherwise reset
+	 * attached running target runtime state to enable for re-attach.
+	 */
+	if (target->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED)
+		kfd_dbg_trap_deactivate(target, false, 0);
+	else if (target->runtime_info.runtime_state != DEBUG_RUNTIME_STATE_DISABLED)
+		target->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_ENABLED;
+
+	fput(target->dbg_ev_file);
+	target->dbg_ev_file = NULL;
+
+	if (target->debugger_process) {
+		atomic_dec(&target->debugger_process->debugged_process_count);
+		target->debugger_process = NULL;
+	}
+
+	target->debug_trap_enabled = false;
+	kfd_dbg_clean_exception_status(target);
+	kfd_unref_process(target);
+
+	return 0;
+}
+
+int kfd_dbg_trap_activate(struct kfd_process *target)
+{
+	int i, r = 0;
+
+	r = kfd_dbg_set_workaround(target, true);
+	if (r)
+		return r;
+
+	for (i = 0; i < target->n_pdds; i++) {
+		struct kfd_process_device *pdd = target->pdds[i];
+
+		if (!kfd_dbg_is_per_vmid_supported(pdd->dev)) {
+			r = reserve_debug_trap_vmid(pdd->dev->dqm, &pdd->qpd);
+
+			if (r) {
+				target->runtime_info.runtime_state = (r == -EBUSY) ?
+							DEBUG_RUNTIME_STATE_ENABLED_BUSY :
+							DEBUG_RUNTIME_STATE_ENABLED_ERROR;
+
+				goto unwind_err;
+			}
+		}
+
+		/* Disable GFX OFF to prevent garbage read/writes to debug registers.
+		 * If RLC restore of debug registers is not supported and runtime enable
+		 * hasn't done so already on ttmp setup request, restore the trap config registers.
+		 *
+		 * If RLC restore of debug registers is not supported, keep gfx off disabled for
+		 * the debug session.
+		 */
+		amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
+		if (!(kfd_dbg_is_rlc_restore_supported(pdd->dev) ||
+						target->runtime_info.ttmp_setup))
+			pdd->dev->kfd2kgd->enable_debug_trap(pdd->dev->adev, true,
+								pdd->dev->vm_info.last_vmid_kfd);
+
+		pdd->spi_dbg_override = pdd->dev->kfd2kgd->enable_debug_trap(
+					pdd->dev->adev,
+					false,
+					pdd->dev->vm_info.last_vmid_kfd);
+
+		if (kfd_dbg_is_rlc_restore_supported(pdd->dev))
+			amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
+
+		/*
+		 * Setting the debug flag in the trap handler requires that the TMA has been
+		 * allocated, which occurs during CWSR initialization.
+		 * In the event that CWSR has not been initialized at this point, setting the
+		 * flag will be called again during CWSR initialization if the target process
+		 * is still debug enabled.
+		 */
+		kfd_process_set_trap_debug_flag(&pdd->qpd, true);
+
+		if (!pdd->dev->kfd->shared_resources.enable_mes)
+			r = debug_refresh_runlist(pdd->dev->dqm);
+		else
+			r = kfd_dbg_set_mes_debug_mode(pdd, true);
+
+		if (r) {
+			target->runtime_info.runtime_state =
+					DEBUG_RUNTIME_STATE_ENABLED_ERROR;
+			goto unwind_err;
+		}
+	}
+
+	return 0;
+
+unwind_err:
+	/* Enabling debug failed, we need to disable on
+	 * all GPUs so the enable is all or nothing.
+	 */
+	kfd_dbg_trap_deactivate(target, true, i);
+	return r;
+}
+
+int kfd_dbg_trap_enable(struct kfd_process *target, uint32_t fd,
+			void __user *runtime_info, uint32_t *runtime_size)
+{
+	struct file *f;
+	uint32_t copy_size;
+	int i, r = 0;
+
+	if (target->debug_trap_enabled)
+		return -EALREADY;
+
+	/* Enable pre-checks */
+	for (i = 0; i < target->n_pdds; i++) {
+		struct kfd_process_device *pdd = target->pdds[i];
+
+		if (!KFD_IS_SOC15(pdd->dev))
+			return -ENODEV;
+
+		if (pdd->qpd.num_gws && (!kfd_dbg_has_gws_support(pdd->dev) ||
+					 kfd_dbg_has_cwsr_workaround(pdd->dev)))
+			return -EBUSY;
+	}
+
+	copy_size = min((size_t)(*runtime_size), sizeof(target->runtime_info));
+
+	f = fget(fd);
+	if (!f) {
+		pr_err("Failed to get file for (%i)\n", fd);
+		return -EBADF;
+	}
+
+	target->dbg_ev_file = f;
+
+	/* defer activation to runtime if not runtime enabled */
+	if (target->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED)
+		kfd_dbg_trap_activate(target);
+
+	/* We already hold the process reference but hold another one for the
+	 * debug session.
+	 */
+	kref_get(&target->ref);
+	target->debug_trap_enabled = true;
+
+	if (target->debugger_process)
+		atomic_inc(&target->debugger_process->debugged_process_count);
+
+	if (copy_to_user(runtime_info, (void *)&target->runtime_info, copy_size)) {
+		kfd_dbg_trap_deactivate(target, false, 0);
+		r = -EFAULT;
+	}
+
+	*runtime_size = sizeof(target->runtime_info);
+
+	return r;
+}
+
+static int kfd_dbg_validate_trap_override_request(struct kfd_process *p,
+						uint32_t trap_override,
+						uint32_t trap_mask_request,
+						uint32_t *trap_mask_supported)
+{
+	int i = 0;
+
+	*trap_mask_supported = 0xffffffff;
+
+	for (i = 0; i < p->n_pdds; i++) {
+		struct kfd_process_device *pdd = p->pdds[i];
+		int err = pdd->dev->kfd2kgd->validate_trap_override_request(
+								pdd->dev->adev,
+								trap_override,
+								trap_mask_supported);
+
+		if (err)
+			return err;
+	}
+
+	if (trap_mask_request & ~*trap_mask_supported)
+		return -EACCES;
+
+	return 0;
+}
+
+int kfd_dbg_trap_set_wave_launch_override(struct kfd_process *target,
+					uint32_t trap_override,
+					uint32_t trap_mask_bits,
+					uint32_t trap_mask_request,
+					uint32_t *trap_mask_prev,
+					uint32_t *trap_mask_supported)
+{
+	int r = 0, i;
+
+	r = kfd_dbg_validate_trap_override_request(target,
+						trap_override,
+						trap_mask_request,
+						trap_mask_supported);
+
+	if (r)
+		return r;
+
+	for (i = 0; i < target->n_pdds; i++) {
+		struct kfd_process_device *pdd = target->pdds[i];
+
+		amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
+		pdd->spi_dbg_override = pdd->dev->kfd2kgd->set_wave_launch_trap_override(
+				pdd->dev->adev,
+				pdd->dev->vm_info.last_vmid_kfd,
+				trap_override,
+				trap_mask_bits,
+				trap_mask_request,
+				trap_mask_prev,
+				pdd->spi_dbg_override);
+		amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
+
+		if (!pdd->dev->kfd->shared_resources.enable_mes)
+			r = debug_refresh_runlist(pdd->dev->dqm);
+		else
+			r = kfd_dbg_set_mes_debug_mode(pdd, true);
+
+		if (r)
+			break;
+	}
+
+	return r;
+}
+
+int kfd_dbg_trap_set_wave_launch_mode(struct kfd_process *target,
+					uint8_t wave_launch_mode)
+{
+	int r = 0, i;
+
+	if (wave_launch_mode != KFD_DBG_TRAP_WAVE_LAUNCH_MODE_NORMAL &&
+			wave_launch_mode != KFD_DBG_TRAP_WAVE_LAUNCH_MODE_HALT &&
+			wave_launch_mode != KFD_DBG_TRAP_WAVE_LAUNCH_MODE_DEBUG)
+		return -EINVAL;
+
+	for (i = 0; i < target->n_pdds; i++) {
+		struct kfd_process_device *pdd = target->pdds[i];
+
+		amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
+		pdd->spi_dbg_launch_mode = pdd->dev->kfd2kgd->set_wave_launch_mode(
+				pdd->dev->adev,
+				wave_launch_mode,
+				pdd->dev->vm_info.last_vmid_kfd);
+		amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
+
+		if (!pdd->dev->kfd->shared_resources.enable_mes)
+			r = debug_refresh_runlist(pdd->dev->dqm);
+		else
+			r = kfd_dbg_set_mes_debug_mode(pdd, true);
+
+		if (r)
+			break;
+	}
+
+	return r;
+}
+
+int kfd_dbg_trap_query_exception_info(struct kfd_process *target,
+		uint32_t source_id,
+		uint32_t exception_code,
+		bool clear_exception,
+		void __user *info,
+		uint32_t *info_size)
+{
+	bool found = false;
+	int r = 0;
+	uint32_t copy_size, actual_info_size = 0;
+	uint64_t *exception_status_ptr = NULL;
+
+	if (!target)
+		return -EINVAL;
+
+	if (!info || !info_size)
+		return -EINVAL;
+
+	mutex_lock(&target->event_mutex);
+
+	if (KFD_DBG_EC_TYPE_IS_QUEUE(exception_code)) {
+		/* Per queue exceptions */
+		struct queue *queue = NULL;
+		int i;
+
+		for (i = 0; i < target->n_pdds; i++) {
+			struct kfd_process_device *pdd = target->pdds[i];
+			struct qcm_process_device *qpd = &pdd->qpd;
+
+			list_for_each_entry(queue, &qpd->queues_list, list) {
+				if (!found && queue->properties.queue_id == source_id) {
+					found = true;
+					break;
+				}
+			}
+			if (found)
+				break;
+		}
+
+		if (!found) {
+			r = -EINVAL;
+			goto out;
+		}
+
+		if (!(queue->properties.exception_status & KFD_EC_MASK(exception_code))) {
+			r = -ENODATA;
+			goto out;
+		}
+		exception_status_ptr = &queue->properties.exception_status;
+	} else if (KFD_DBG_EC_TYPE_IS_DEVICE(exception_code)) {
+		/* Per device exceptions */
+		struct kfd_process_device *pdd = NULL;
+		int i;
+
+		for (i = 0; i < target->n_pdds; i++) {
+			pdd = target->pdds[i];
+			if (pdd->dev->id == source_id) {
+				found = true;
+				break;
+			}
+		}
+
+		if (!found) {
+			r = -EINVAL;
+			goto out;
+		}
+
+		if (!(pdd->exception_status & KFD_EC_MASK(exception_code))) {
+			r = -ENODATA;
+			goto out;
+		}
+
+		if (exception_code == EC_DEVICE_MEMORY_VIOLATION) {
+			copy_size = min((size_t)(*info_size), pdd->vm_fault_exc_data_size);
+
+			if (copy_to_user(info, pdd->vm_fault_exc_data, copy_size)) {
+				r = -EFAULT;
+				goto out;
+			}
+			actual_info_size = pdd->vm_fault_exc_data_size;
+			if (clear_exception) {
+				kfree(pdd->vm_fault_exc_data);
+				pdd->vm_fault_exc_data = NULL;
+				pdd->vm_fault_exc_data_size = 0;
+			}
+		}
+		exception_status_ptr = &pdd->exception_status;
+	} else if (KFD_DBG_EC_TYPE_IS_PROCESS(exception_code)) {
+		/* Per process exceptions */
+		if (!(target->exception_status & KFD_EC_MASK(exception_code))) {
+			r = -ENODATA;
+			goto out;
+		}
+
+		if (exception_code == EC_PROCESS_RUNTIME) {
+			copy_size = min((size_t)(*info_size), sizeof(target->runtime_info));
+
+			if (copy_to_user(info, (void *)&target->runtime_info, copy_size)) {
+				r = -EFAULT;
+				goto out;
+			}
+
+			actual_info_size = sizeof(target->runtime_info);
+		}
+
+		exception_status_ptr = &target->exception_status;
+	} else {
+		pr_debug("Bad exception type [%i]\n", exception_code);
+		r = -EINVAL;
+		goto out;
+	}
+
+	*info_size = actual_info_size;
+	if (clear_exception)
+		*exception_status_ptr &= ~KFD_EC_MASK(exception_code);
+out:
+	mutex_unlock(&target->event_mutex);
+	return r;
+}
+
+int kfd_dbg_trap_device_snapshot(struct kfd_process *target,
+		uint64_t exception_clear_mask,
+		void __user *user_info,
+		uint32_t *number_of_device_infos,
+		uint32_t *entry_size)
+{
+	struct kfd_dbg_device_info_entry device_info;
+	uint32_t tmp_entry_size = *entry_size, tmp_num_devices;
+	int i, r = 0;
+
+	if (!(target && user_info && number_of_device_infos && entry_size))
+		return -EINVAL;
+
+	tmp_num_devices = min_t(size_t, *number_of_device_infos, target->n_pdds);
+	*number_of_device_infos = target->n_pdds;
+	*entry_size = min_t(size_t, *entry_size, sizeof(device_info));
+
+	if (!tmp_num_devices)
+		return 0;
+
+	memset(&device_info, 0, sizeof(device_info));
+
+	mutex_lock(&target->event_mutex);
+
+	/* Run over all pdd of the process */
+	for (i = 0; i < tmp_num_devices; i++) {
+		struct kfd_process_device *pdd = target->pdds[i];
+		struct kfd_topology_device *topo_dev = kfd_topology_device_by_id(pdd->dev->id);
+
+		device_info.gpu_id = pdd->dev->id;
+		device_info.exception_status = pdd->exception_status;
+		device_info.lds_base = pdd->lds_base;
+		device_info.lds_limit = pdd->lds_limit;
+		device_info.scratch_base = pdd->scratch_base;
+		device_info.scratch_limit = pdd->scratch_limit;
+		device_info.gpuvm_base = pdd->gpuvm_base;
+		device_info.gpuvm_limit = pdd->gpuvm_limit;
+		device_info.location_id = topo_dev->node_props.location_id;
+		device_info.vendor_id = topo_dev->node_props.vendor_id;
+		device_info.device_id = topo_dev->node_props.device_id;
+		device_info.revision_id = pdd->dev->adev->pdev->revision;
+		device_info.subsystem_vendor_id = pdd->dev->adev->pdev->subsystem_vendor;
+		device_info.subsystem_device_id = pdd->dev->adev->pdev->subsystem_device;
+		device_info.fw_version = pdd->dev->kfd->mec_fw_version;
+		device_info.gfx_target_version =
+			topo_dev->node_props.gfx_target_version;
+		device_info.simd_count = topo_dev->node_props.simd_count;
+		device_info.max_waves_per_simd =
+			topo_dev->node_props.max_waves_per_simd;
+		device_info.array_count = topo_dev->node_props.array_count;
+		device_info.simd_arrays_per_engine =
+			topo_dev->node_props.simd_arrays_per_engine;
+		device_info.num_xcc = NUM_XCC(pdd->dev->xcc_mask);
+		device_info.capability = topo_dev->node_props.capability;
+		device_info.debug_prop = topo_dev->node_props.debug_prop;
+
+		if (exception_clear_mask)
+			pdd->exception_status &= ~exception_clear_mask;
+
+		if (copy_to_user(user_info, &device_info, *entry_size)) {
+			r = -EFAULT;
+			break;
+		}
+
+		user_info += tmp_entry_size;
+	}
+
+	mutex_unlock(&target->event_mutex);
+
+	return r;
+}
+
+void kfd_dbg_set_enabled_debug_exception_mask(struct kfd_process *target,
+					uint64_t exception_set_mask)
+{
+	uint64_t found_mask = 0;
+	struct process_queue_manager *pqm;
+	struct process_queue_node *pqn;
+	static const char write_data = '.';
+	loff_t pos = 0;
+	int i;
+
+	mutex_lock(&target->event_mutex);
+
+	found_mask |= target->exception_status;
+
+	pqm = &target->pqm;
+	list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
+		if (!pqn->q)
+			continue;
+
+		found_mask |= pqn->q->properties.exception_status;
+	}
+
+	for (i = 0; i < target->n_pdds; i++) {
+		struct kfd_process_device *pdd = target->pdds[i];
+
+		found_mask |= pdd->exception_status;
+	}
+
+	if (exception_set_mask & found_mask)
+		kernel_write(target->dbg_ev_file, &write_data, 1, &pos);
+
+	target->exception_enable_mask = exception_set_mask;
+
+	mutex_unlock(&target->event_mutex);
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
new file mode 100644
index 000000000..fd0ff64d4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
@@ -0,0 +1,139 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef KFD_DEBUG_EVENTS_H_INCLUDED
+#define KFD_DEBUG_EVENTS_H_INCLUDED
+
+#include "kfd_priv.h"
+
+void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind_count);
+int kfd_dbg_trap_activate(struct kfd_process *target);
+int kfd_dbg_ev_query_debug_event(struct kfd_process *process,
+			unsigned int *queue_id,
+			unsigned int *gpu_id,
+			uint64_t exception_clear_mask,
+			uint64_t *event_status);
+bool kfd_set_dbg_ev_from_interrupt(struct kfd_node *dev,
+				   unsigned int pasid,
+				   uint32_t doorbell_id,
+				   uint64_t trap_mask,
+				   void *exception_data,
+				   size_t exception_data_size);
+bool kfd_dbg_ev_raise(uint64_t event_mask,
+			struct kfd_process *process, struct kfd_node *dev,
+			unsigned int source_id, bool use_worker,
+			void *exception_data,
+			size_t exception_data_size);
+int kfd_dbg_trap_disable(struct kfd_process *target);
+int kfd_dbg_trap_enable(struct kfd_process *target, uint32_t fd,
+			void __user *runtime_info,
+			uint32_t *runtime_info_size);
+int kfd_dbg_trap_set_wave_launch_override(struct kfd_process *target,
+					uint32_t trap_override,
+					uint32_t trap_mask_bits,
+					uint32_t trap_mask_request,
+					uint32_t *trap_mask_prev,
+					uint32_t *trap_mask_supported);
+int kfd_dbg_trap_set_wave_launch_mode(struct kfd_process *target,
+					uint8_t wave_launch_mode);
+int kfd_dbg_trap_clear_dev_address_watch(struct kfd_process_device *pdd,
+					uint32_t watch_id);
+int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd,
+					uint64_t watch_address,
+					uint32_t watch_address_mask,
+					uint32_t *watch_id,
+					uint32_t watch_mode);
+int kfd_dbg_trap_set_flags(struct kfd_process *target, uint32_t *flags);
+int kfd_dbg_trap_query_exception_info(struct kfd_process *target,
+		uint32_t source_id,
+		uint32_t exception_code,
+		bool clear_exception,
+		void __user *info,
+		uint32_t *info_size);
+int kfd_dbg_send_exception_to_runtime(struct kfd_process *p,
+					unsigned int dev_id,
+					unsigned int queue_id,
+					uint64_t error_reason);
+
+static inline bool kfd_dbg_is_per_vmid_supported(struct kfd_node *dev)
+{
+	return (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) ||
+		KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) ||
+		KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0));
+}
+
+void debug_event_write_work_handler(struct work_struct *work);
+int kfd_dbg_trap_device_snapshot(struct kfd_process *target,
+		uint64_t exception_clear_mask,
+		void __user *user_info,
+		uint32_t *number_of_device_infos,
+		uint32_t *entry_size);
+
+void kfd_dbg_set_enabled_debug_exception_mask(struct kfd_process *target,
+					uint64_t exception_set_mask);
+/*
+ * If GFX off is enabled, chips that do not support RLC restore for the debug
+ * registers will disable GFX off temporarily for the entire debug session.
+ * See disable_on_trap_action_entry and enable_on_trap_action_exit for details.
+ */
+static inline bool kfd_dbg_is_rlc_restore_supported(struct kfd_node *dev)
+{
+	return !(KFD_GC_VERSION(dev) == IP_VERSION(10, 1, 10) ||
+		 KFD_GC_VERSION(dev) == IP_VERSION(10, 1, 1));
+}
+
+static inline bool kfd_dbg_has_cwsr_workaround(struct kfd_node *dev)
+{
+	return KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0) &&
+	       KFD_GC_VERSION(dev) <= IP_VERSION(11, 0, 3);
+}
+
+static inline bool kfd_dbg_has_gws_support(struct kfd_node *dev)
+{
+	if ((KFD_GC_VERSION(dev) == IP_VERSION(9, 0, 1)
+			&& dev->kfd->mec2_fw_version < 0x81b6) ||
+		(KFD_GC_VERSION(dev) >= IP_VERSION(9, 1, 0)
+			&& KFD_GC_VERSION(dev) <= IP_VERSION(9, 2, 2)
+			&& dev->kfd->mec2_fw_version < 0x1b6) ||
+		(KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 0)
+			&& dev->kfd->mec2_fw_version < 0x1b6) ||
+		(KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 1)
+			&& dev->kfd->mec2_fw_version < 0x30) ||
+		(KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0) &&
+			KFD_GC_VERSION(dev) < IP_VERSION(12, 0, 0)))
+		return false;
+
+	/* Assume debugging and cooperative launch supported otherwise. */
+	return true;
+}
+
+int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd, bool sq_trap_en);
+
+static inline bool kfd_dbg_has_ttmps_always_setup(struct kfd_node *dev)
+{
+	return (KFD_GC_VERSION(dev) < IP_VERSION(11, 0, 0) &&
+			KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 2)) ||
+	       (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0) &&
+			KFD_GC_VERSION(dev) < IP_VERSION(12, 0, 0) &&
+			(dev->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 70);
+}
+#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
new file mode 100644
index 000000000..4a5a0a4e0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2016-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+
+#include "kfd_priv.h"
+
+static struct dentry *debugfs_root;
+
+static int kfd_debugfs_open(struct inode *inode, struct file *file)
+{
+	int (*show)(struct seq_file *, void *) = inode->i_private;
+
+	return single_open(file, show, NULL);
+}
+static int kfd_debugfs_hang_hws_read(struct seq_file *m, void *data)
+{
+	seq_puts(m, "echo gpu_id > hang_hws\n");
+	return 0;
+}
+
+static ssize_t kfd_debugfs_hang_hws_write(struct file *file,
+	const char __user *user_buf, size_t size, loff_t *ppos)
+{
+	struct kfd_node *dev;
+	char tmp[16];
+	uint32_t gpu_id;
+	int ret = -EINVAL;
+
+	memset(tmp, 0, 16);
+	if (size >= 16) {
+		pr_err("Invalid input for gpu id.\n");
+		goto out;
+	}
+	if (copy_from_user(tmp, user_buf, size)) {
+		ret = -EFAULT;
+		goto out;
+	}
+	if (kstrtoint(tmp, 10, &gpu_id)) {
+		pr_err("Invalid input for gpu id.\n");
+		goto out;
+	}
+	dev = kfd_device_by_id(gpu_id);
+	if (dev) {
+		kfd_debugfs_hang_hws(dev);
+		ret = size;
+	} else
+		pr_err("Cannot find device %d.\n", gpu_id);
+
+out:
+	return ret;
+}
+
+static const struct file_operations kfd_debugfs_fops = {
+	.owner = THIS_MODULE,
+	.open = kfd_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static const struct file_operations kfd_debugfs_hang_hws_fops = {
+	.owner = THIS_MODULE,
+	.open = kfd_debugfs_open,
+	.read = seq_read,
+	.write = kfd_debugfs_hang_hws_write,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+void kfd_debugfs_init(void)
+{
+	debugfs_root = debugfs_create_dir("kfd", NULL);
+
+	debugfs_create_file("mqds", S_IFREG | 0444, debugfs_root,
+			    kfd_debugfs_mqds_by_process, &kfd_debugfs_fops);
+	debugfs_create_file("hqds", S_IFREG | 0444, debugfs_root,
+			    kfd_debugfs_hqds_by_device, &kfd_debugfs_fops);
+	debugfs_create_file("rls", S_IFREG | 0444, debugfs_root,
+			    kfd_debugfs_rls_by_device, &kfd_debugfs_fops);
+	debugfs_create_file("hang_hws", S_IFREG | 0200, debugfs_root,
+			    kfd_debugfs_hang_hws_read, &kfd_debugfs_hang_hws_fops);
+	debugfs_create_file("mem_limit", S_IFREG | 0200, debugfs_root,
+			    kfd_debugfs_kfd_mem_limits, &kfd_debugfs_fops);
+}
+
+void kfd_debugfs_fini(void)
+{
+	debugfs_remove_recursive(debugfs_root);
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
new file mode 100644
index 000000000..93ce181eb
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -0,0 +1,1406 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/bsearch.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include "kfd_priv.h"
+#include "kfd_device_queue_manager.h"
+#include "kfd_pm4_headers_vi.h"
+#include "kfd_pm4_headers_aldebaran.h"
+#include "cwsr_trap_handler.h"
+#include "amdgpu_amdkfd.h"
+#include "kfd_smi_events.h"
+#include "kfd_svm.h"
+#include "kfd_migrate.h"
+#include "amdgpu.h"
+#include "amdgpu_xcp.h"
+
+#define MQD_SIZE_ALIGNED 768
+
+/*
+ * kfd_locked is used to lock the kfd driver during suspend or reset
+ * once locked, kfd driver will stop any further GPU execution.
+ * create process (open) will return -EAGAIN.
+ */
+static int kfd_locked;
+
+#ifdef CONFIG_DRM_AMDGPU_CIK
+extern const struct kfd2kgd_calls gfx_v7_kfd2kgd;
+#endif
+extern const struct kfd2kgd_calls gfx_v8_kfd2kgd;
+extern const struct kfd2kgd_calls gfx_v9_kfd2kgd;
+extern const struct kfd2kgd_calls arcturus_kfd2kgd;
+extern const struct kfd2kgd_calls aldebaran_kfd2kgd;
+extern const struct kfd2kgd_calls gc_9_4_3_kfd2kgd;
+extern const struct kfd2kgd_calls gfx_v10_kfd2kgd;
+extern const struct kfd2kgd_calls gfx_v10_3_kfd2kgd;
+extern const struct kfd2kgd_calls gfx_v11_kfd2kgd;
+
+static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
+				unsigned int chunk_size);
+static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
+
+static int kfd_resume(struct kfd_node *kfd);
+
+static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
+{
+	uint32_t sdma_version = kfd->adev->ip_versions[SDMA0_HWIP][0];
+
+	switch (sdma_version) {
+	case IP_VERSION(4, 0, 0):/* VEGA10 */
+	case IP_VERSION(4, 0, 1):/* VEGA12 */
+	case IP_VERSION(4, 1, 0):/* RAVEN */
+	case IP_VERSION(4, 1, 1):/* RAVEN */
+	case IP_VERSION(4, 1, 2):/* RENOIR */
+	case IP_VERSION(5, 2, 1):/* VANGOGH */
+	case IP_VERSION(5, 2, 3):/* YELLOW_CARP */
+	case IP_VERSION(5, 2, 6):/* GC 10.3.6 */
+	case IP_VERSION(5, 2, 7):/* GC 10.3.7 */
+		kfd->device_info.num_sdma_queues_per_engine = 2;
+		break;
+	case IP_VERSION(4, 2, 0):/* VEGA20 */
+	case IP_VERSION(4, 2, 2):/* ARCTURUS */
+	case IP_VERSION(4, 4, 0):/* ALDEBARAN */
+	case IP_VERSION(4, 4, 2):
+	case IP_VERSION(5, 0, 0):/* NAVI10 */
+	case IP_VERSION(5, 0, 1):/* CYAN_SKILLFISH */
+	case IP_VERSION(5, 0, 2):/* NAVI14 */
+	case IP_VERSION(5, 0, 5):/* NAVI12 */
+	case IP_VERSION(5, 2, 0):/* SIENNA_CICHLID */
+	case IP_VERSION(5, 2, 2):/* NAVY_FLOUNDER */
+	case IP_VERSION(5, 2, 4):/* DIMGREY_CAVEFISH */
+	case IP_VERSION(5, 2, 5):/* BEIGE_GOBY */
+	case IP_VERSION(6, 0, 0):
+	case IP_VERSION(6, 0, 1):
+	case IP_VERSION(6, 0, 2):
+	case IP_VERSION(6, 0, 3):
+		kfd->device_info.num_sdma_queues_per_engine = 8;
+		break;
+	default:
+		dev_warn(kfd_device,
+			"Default sdma queue per engine(8) is set due to mismatch of sdma ip block(SDMA_HWIP:0x%x).\n",
+			sdma_version);
+		kfd->device_info.num_sdma_queues_per_engine = 8;
+	}
+
+	bitmap_zero(kfd->device_info.reserved_sdma_queues_bitmap, KFD_MAX_SDMA_QUEUES);
+
+	switch (sdma_version) {
+	case IP_VERSION(6, 0, 0):
+	case IP_VERSION(6, 0, 1):
+	case IP_VERSION(6, 0, 2):
+	case IP_VERSION(6, 0, 3):
+		/* Reserve 1 for paging and 1 for gfx */
+		kfd->device_info.num_reserved_sdma_queues_per_engine = 2;
+		/* BIT(0)=engine-0 queue-0; BIT(1)=engine-1 queue-0; BIT(2)=engine-0 queue-1; ... */
+		bitmap_set(kfd->device_info.reserved_sdma_queues_bitmap, 0,
+			   kfd->adev->sdma.num_instances *
+			   kfd->device_info.num_reserved_sdma_queues_per_engine);
+		break;
+	default:
+		break;
+	}
+}
+
+static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd)
+{
+	uint32_t gc_version = KFD_GC_VERSION(kfd);
+
+	switch (gc_version) {
+	case IP_VERSION(9, 0, 1): /* VEGA10 */
+	case IP_VERSION(9, 1, 0): /* RAVEN */
+	case IP_VERSION(9, 2, 1): /* VEGA12 */
+	case IP_VERSION(9, 2, 2): /* RAVEN */
+	case IP_VERSION(9, 3, 0): /* RENOIR */
+	case IP_VERSION(9, 4, 0): /* VEGA20 */
+	case IP_VERSION(9, 4, 1): /* ARCTURUS */
+	case IP_VERSION(9, 4, 2): /* ALDEBARAN */
+		kfd->device_info.event_interrupt_class = &event_interrupt_class_v9;
+		break;
+	case IP_VERSION(9, 4, 3): /* GC 9.4.3 */
+		kfd->device_info.event_interrupt_class =
+						&event_interrupt_class_v9_4_3;
+		break;
+	case IP_VERSION(10, 3, 1): /* VANGOGH */
+	case IP_VERSION(10, 3, 3): /* YELLOW_CARP */
+	case IP_VERSION(10, 3, 6): /* GC 10.3.6 */
+	case IP_VERSION(10, 3, 7): /* GC 10.3.7 */
+	case IP_VERSION(10, 1, 3): /* CYAN_SKILLFISH */
+	case IP_VERSION(10, 1, 4):
+	case IP_VERSION(10, 1, 10): /* NAVI10 */
+	case IP_VERSION(10, 1, 2): /* NAVI12 */
+	case IP_VERSION(10, 1, 1): /* NAVI14 */
+	case IP_VERSION(10, 3, 0): /* SIENNA_CICHLID */
+	case IP_VERSION(10, 3, 2): /* NAVY_FLOUNDER */
+	case IP_VERSION(10, 3, 4): /* DIMGREY_CAVEFISH */
+	case IP_VERSION(10, 3, 5): /* BEIGE_GOBY */
+		kfd->device_info.event_interrupt_class = &event_interrupt_class_v10;
+		break;
+	case IP_VERSION(11, 0, 0):
+	case IP_VERSION(11, 0, 1):
+	case IP_VERSION(11, 0, 2):
+	case IP_VERSION(11, 0, 3):
+	case IP_VERSION(11, 0, 4):
+		kfd->device_info.event_interrupt_class = &event_interrupt_class_v11;
+		break;
+	default:
+		dev_warn(kfd_device, "v9 event interrupt handler is set due to "
+			"mismatch of gc ip block(GC_HWIP:0x%x).\n", gc_version);
+		kfd->device_info.event_interrupt_class = &event_interrupt_class_v9;
+	}
+}
+
+static void kfd_device_info_init(struct kfd_dev *kfd,
+				 bool vf, uint32_t gfx_target_version)
+{
+	uint32_t gc_version = KFD_GC_VERSION(kfd);
+	uint32_t asic_type = kfd->adev->asic_type;
+
+	kfd->device_info.max_pasid_bits = 16;
+	kfd->device_info.max_no_of_hqd = 24;
+	kfd->device_info.num_of_watch_points = 4;
+	kfd->device_info.mqd_size_aligned = MQD_SIZE_ALIGNED;
+	kfd->device_info.gfx_target_version = gfx_target_version;
+
+	if (KFD_IS_SOC15(kfd)) {
+		kfd->device_info.doorbell_size = 8;
+		kfd->device_info.ih_ring_entry_size = 8 * sizeof(uint32_t);
+		kfd->device_info.supports_cwsr = true;
+
+		kfd_device_info_set_sdma_info(kfd);
+
+		kfd_device_info_set_event_interrupt_class(kfd);
+
+		if (gc_version < IP_VERSION(11, 0, 0)) {
+			/* Navi2x+, Navi1x+ */
+			if (gc_version == IP_VERSION(10, 3, 6))
+				kfd->device_info.no_atomic_fw_version = 14;
+			else if (gc_version == IP_VERSION(10, 3, 7))
+				kfd->device_info.no_atomic_fw_version = 3;
+			else if (gc_version >= IP_VERSION(10, 3, 0))
+				kfd->device_info.no_atomic_fw_version = 92;
+			else if (gc_version >= IP_VERSION(10, 1, 1))
+				kfd->device_info.no_atomic_fw_version = 145;
+
+			/* Navi1x+ */
+			if (gc_version >= IP_VERSION(10, 1, 1))
+				kfd->device_info.needs_pci_atomics = true;
+		} else if (gc_version < IP_VERSION(12, 0, 0)) {
+			/*
+			 * PCIe atomics support acknowledgment in GFX11 RS64 CPFW requires
+			 * MEC version >= 509. Prior RS64 CPFW versions (and all F32) require
+			 * PCIe atomics support.
+			 */
+			kfd->device_info.needs_pci_atomics = true;
+			kfd->device_info.no_atomic_fw_version = kfd->adev->gfx.rs64_enable ? 509 : 0;
+		}
+	} else {
+		kfd->device_info.doorbell_size = 4;
+		kfd->device_info.ih_ring_entry_size = 4 * sizeof(uint32_t);
+		kfd->device_info.event_interrupt_class = &event_interrupt_class_cik;
+		kfd->device_info.num_sdma_queues_per_engine = 2;
+
+		if (asic_type != CHIP_KAVERI &&
+		    asic_type != CHIP_HAWAII &&
+		    asic_type != CHIP_TONGA)
+			kfd->device_info.supports_cwsr = true;
+
+		if (asic_type != CHIP_HAWAII && !vf)
+			kfd->device_info.needs_pci_atomics = true;
+	}
+}
+
+struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
+{
+	struct kfd_dev *kfd = NULL;
+	const struct kfd2kgd_calls *f2g = NULL;
+	uint32_t gfx_target_version = 0;
+
+	switch (adev->asic_type) {
+#ifdef CONFIG_DRM_AMDGPU_CIK
+	case CHIP_KAVERI:
+		gfx_target_version = 70000;
+		if (!vf)
+			f2g = &gfx_v7_kfd2kgd;
+		break;
+#endif
+	case CHIP_CARRIZO:
+		gfx_target_version = 80001;
+		if (!vf)
+			f2g = &gfx_v8_kfd2kgd;
+		break;
+#ifdef CONFIG_DRM_AMDGPU_CIK
+	case CHIP_HAWAII:
+		gfx_target_version = 70001;
+		if (!amdgpu_exp_hw_support)
+			pr_info(
+	"KFD support on Hawaii is experimental. See modparam exp_hw_support\n"
+				);
+		else if (!vf)
+			f2g = &gfx_v7_kfd2kgd;
+		break;
+#endif
+	case CHIP_TONGA:
+		gfx_target_version = 80002;
+		if (!vf)
+			f2g = &gfx_v8_kfd2kgd;
+		break;
+	case CHIP_FIJI:
+	case CHIP_POLARIS10:
+		gfx_target_version = 80003;
+		f2g = &gfx_v8_kfd2kgd;
+		break;
+	case CHIP_POLARIS11:
+	case CHIP_POLARIS12:
+	case CHIP_VEGAM:
+		gfx_target_version = 80003;
+		if (!vf)
+			f2g = &gfx_v8_kfd2kgd;
+		break;
+	default:
+		switch (adev->ip_versions[GC_HWIP][0]) {
+		/* Vega 10 */
+		case IP_VERSION(9, 0, 1):
+			gfx_target_version = 90000;
+			f2g = &gfx_v9_kfd2kgd;
+			break;
+		/* Raven */
+		case IP_VERSION(9, 1, 0):
+		case IP_VERSION(9, 2, 2):
+			gfx_target_version = 90002;
+			if (!vf)
+				f2g = &gfx_v9_kfd2kgd;
+			break;
+		/* Vega12 */
+		case IP_VERSION(9, 2, 1):
+			gfx_target_version = 90004;
+			if (!vf)
+				f2g = &gfx_v9_kfd2kgd;
+			break;
+		/* Renoir */
+		case IP_VERSION(9, 3, 0):
+			gfx_target_version = 90012;
+			if (!vf)
+				f2g = &gfx_v9_kfd2kgd;
+			break;
+		/* Vega20 */
+		case IP_VERSION(9, 4, 0):
+			gfx_target_version = 90006;
+			if (!vf)
+				f2g = &gfx_v9_kfd2kgd;
+			break;
+		/* Arcturus */
+		case IP_VERSION(9, 4, 1):
+			gfx_target_version = 90008;
+			f2g = &arcturus_kfd2kgd;
+			break;
+		/* Aldebaran */
+		case IP_VERSION(9, 4, 2):
+			gfx_target_version = 90010;
+			f2g = &aldebaran_kfd2kgd;
+			break;
+		case IP_VERSION(9, 4, 3):
+			gfx_target_version = adev->rev_id >= 1 ? 90402
+					   : adev->flags & AMD_IS_APU ? 90400
+					   : 90401;
+			f2g = &gc_9_4_3_kfd2kgd;
+			break;
+		/* Navi10 */
+		case IP_VERSION(10, 1, 10):
+			gfx_target_version = 100100;
+			if (!vf)
+				f2g = &gfx_v10_kfd2kgd;
+			break;
+		/* Navi12 */
+		case IP_VERSION(10, 1, 2):
+			gfx_target_version = 100101;
+			f2g = &gfx_v10_kfd2kgd;
+			break;
+		/* Navi14 */
+		case IP_VERSION(10, 1, 1):
+			gfx_target_version = 100102;
+			if (!vf)
+				f2g = &gfx_v10_kfd2kgd;
+			break;
+		/* Cyan Skillfish */
+		case IP_VERSION(10, 1, 3):
+		case IP_VERSION(10, 1, 4):
+			gfx_target_version = 100103;
+			if (!vf)
+				f2g = &gfx_v10_kfd2kgd;
+			break;
+		/* Sienna Cichlid */
+		case IP_VERSION(10, 3, 0):
+			gfx_target_version = 100300;
+			f2g = &gfx_v10_3_kfd2kgd;
+			break;
+		/* Navy Flounder */
+		case IP_VERSION(10, 3, 2):
+			gfx_target_version = 100301;
+			f2g = &gfx_v10_3_kfd2kgd;
+			break;
+		/* Van Gogh */
+		case IP_VERSION(10, 3, 1):
+			gfx_target_version = 100303;
+			if (!vf)
+				f2g = &gfx_v10_3_kfd2kgd;
+			break;
+		/* Dimgrey Cavefish */
+		case IP_VERSION(10, 3, 4):
+			gfx_target_version = 100302;
+			f2g = &gfx_v10_3_kfd2kgd;
+			break;
+		/* Beige Goby */
+		case IP_VERSION(10, 3, 5):
+			gfx_target_version = 100304;
+			f2g = &gfx_v10_3_kfd2kgd;
+			break;
+		/* Yellow Carp */
+		case IP_VERSION(10, 3, 3):
+			gfx_target_version = 100305;
+			if (!vf)
+				f2g = &gfx_v10_3_kfd2kgd;
+			break;
+		case IP_VERSION(10, 3, 6):
+		case IP_VERSION(10, 3, 7):
+			gfx_target_version = 100306;
+			if (!vf)
+				f2g = &gfx_v10_3_kfd2kgd;
+			break;
+		case IP_VERSION(11, 0, 0):
+			gfx_target_version = 110000;
+			f2g = &gfx_v11_kfd2kgd;
+			break;
+		case IP_VERSION(11, 0, 1):
+		case IP_VERSION(11, 0, 4):
+			gfx_target_version = 110003;
+			f2g = &gfx_v11_kfd2kgd;
+			break;
+		case IP_VERSION(11, 0, 2):
+			gfx_target_version = 110002;
+			f2g = &gfx_v11_kfd2kgd;
+			break;
+		case IP_VERSION(11, 0, 3):
+			if ((adev->pdev->device == 0x7460 &&
+			     adev->pdev->revision == 0x00) ||
+			    (adev->pdev->device == 0x7461 &&
+			     adev->pdev->revision == 0x00))
+				/* Note: Compiler version is 11.0.5 while HW version is 11.0.3 */
+				gfx_target_version = 110005;
+			else
+				/* Note: Compiler version is 11.0.1 while HW version is 11.0.3 */
+				gfx_target_version = 110001;
+			f2g = &gfx_v11_kfd2kgd;
+			break;
+		default:
+			break;
+		}
+		break;
+	}
+
+	if (!f2g) {
+		if (adev->ip_versions[GC_HWIP][0])
+			dev_err(kfd_device, "GC IP %06x %s not supported in kfd\n",
+				adev->ip_versions[GC_HWIP][0], vf ? "VF" : "");
+		else
+			dev_err(kfd_device, "%s %s not supported in kfd\n",
+				amdgpu_asic_name[adev->asic_type], vf ? "VF" : "");
+		return NULL;
+	}
+
+	kfd = kzalloc(sizeof(*kfd), GFP_KERNEL);
+	if (!kfd)
+		return NULL;
+
+	kfd->adev = adev;
+	kfd_device_info_init(kfd, vf, gfx_target_version);
+	kfd->init_complete = false;
+	kfd->kfd2kgd = f2g;
+	atomic_set(&kfd->compute_profile, 0);
+
+	mutex_init(&kfd->doorbell_mutex);
+
+	ida_init(&kfd->doorbell_ida);
+
+	return kfd;
+}
+
+static void kfd_cwsr_init(struct kfd_dev *kfd)
+{
+	if (cwsr_enable && kfd->device_info.supports_cwsr) {
+		if (KFD_GC_VERSION(kfd) < IP_VERSION(9, 0, 1)) {
+			BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
+			kfd->cwsr_isa = cwsr_trap_gfx8_hex;
+			kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
+		} else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 1)) {
+			BUILD_BUG_ON(sizeof(cwsr_trap_arcturus_hex) > PAGE_SIZE);
+			kfd->cwsr_isa = cwsr_trap_arcturus_hex;
+			kfd->cwsr_isa_size = sizeof(cwsr_trap_arcturus_hex);
+		} else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2)) {
+			BUILD_BUG_ON(sizeof(cwsr_trap_aldebaran_hex) > PAGE_SIZE);
+			kfd->cwsr_isa = cwsr_trap_aldebaran_hex;
+			kfd->cwsr_isa_size = sizeof(cwsr_trap_aldebaran_hex);
+		} else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3)) {
+			BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_4_3_hex) > PAGE_SIZE);
+			kfd->cwsr_isa = cwsr_trap_gfx9_4_3_hex;
+			kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_4_3_hex);
+		} else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 1, 1)) {
+			BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE);
+			kfd->cwsr_isa = cwsr_trap_gfx9_hex;
+			kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex);
+		} else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 3, 0)) {
+			BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex) > PAGE_SIZE);
+			kfd->cwsr_isa = cwsr_trap_nv1x_hex;
+			kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex);
+		} else if (KFD_GC_VERSION(kfd) < IP_VERSION(11, 0, 0)) {
+			BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex) > PAGE_SIZE);
+			kfd->cwsr_isa = cwsr_trap_gfx10_hex;
+			kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx10_hex);
+		} else {
+			BUILD_BUG_ON(sizeof(cwsr_trap_gfx11_hex) > PAGE_SIZE);
+			kfd->cwsr_isa = cwsr_trap_gfx11_hex;
+			kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx11_hex);
+		}
+
+		kfd->cwsr_enabled = true;
+	}
+}
+
+static int kfd_gws_init(struct kfd_node *node)
+{
+	int ret = 0;
+	struct kfd_dev *kfd = node->kfd;
+	uint32_t mes_rev = node->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK;
+
+	if (node->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
+		return 0;
+
+	if (hws_gws_support || (KFD_IS_SOC15(node) &&
+		((KFD_GC_VERSION(node) == IP_VERSION(9, 0, 1)
+			&& kfd->mec2_fw_version >= 0x81b3) ||
+		(KFD_GC_VERSION(node) <= IP_VERSION(9, 4, 0)
+			&& kfd->mec2_fw_version >= 0x1b3)  ||
+		(KFD_GC_VERSION(node) == IP_VERSION(9, 4, 1)
+			&& kfd->mec2_fw_version >= 0x30)   ||
+		(KFD_GC_VERSION(node) == IP_VERSION(9, 4, 2)
+			&& kfd->mec2_fw_version >= 0x28) ||
+		(KFD_GC_VERSION(node) == IP_VERSION(9, 4, 3)) ||
+		(KFD_GC_VERSION(node) >= IP_VERSION(10, 3, 0)
+			&& KFD_GC_VERSION(node) < IP_VERSION(11, 0, 0)
+			&& kfd->mec2_fw_version >= 0x6b) ||
+		(KFD_GC_VERSION(node) >= IP_VERSION(11, 0, 0)
+			&& KFD_GC_VERSION(node) < IP_VERSION(12, 0, 0)
+			&& mes_rev >= 68))))
+		ret = amdgpu_amdkfd_alloc_gws(node->adev,
+				node->adev->gds.gws_size, &node->gws);
+
+	return ret;
+}
+
+static void kfd_smi_init(struct kfd_node *dev)
+{
+	INIT_LIST_HEAD(&dev->smi_clients);
+	spin_lock_init(&dev->smi_lock);
+}
+
+static int kfd_init_node(struct kfd_node *node)
+{
+	int err = -1;
+
+	if (kfd_interrupt_init(node)) {
+		dev_err(kfd_device, "Error initializing interrupts\n");
+		goto kfd_interrupt_error;
+	}
+
+	node->dqm = device_queue_manager_init(node);
+	if (!node->dqm) {
+		dev_err(kfd_device, "Error initializing queue manager\n");
+		goto device_queue_manager_error;
+	}
+
+	if (kfd_gws_init(node)) {
+		dev_err(kfd_device, "Could not allocate %d gws\n",
+			node->adev->gds.gws_size);
+		goto gws_error;
+	}
+
+	if (kfd_resume(node))
+		goto kfd_resume_error;
+
+	if (kfd_topology_add_device(node)) {
+		dev_err(kfd_device, "Error adding device to topology\n");
+		goto kfd_topology_add_device_error;
+	}
+
+	kfd_smi_init(node);
+
+	return 0;
+
+kfd_topology_add_device_error:
+kfd_resume_error:
+gws_error:
+	device_queue_manager_uninit(node->dqm);
+device_queue_manager_error:
+	kfd_interrupt_exit(node);
+kfd_interrupt_error:
+	if (node->gws)
+		amdgpu_amdkfd_free_gws(node->adev, node->gws);
+
+	/* Cleanup the node memory here */
+	kfree(node);
+	return err;
+}
+
+static void kfd_cleanup_nodes(struct kfd_dev *kfd, unsigned int num_nodes)
+{
+	struct kfd_node *knode;
+	unsigned int i;
+
+	for (i = 0; i < num_nodes; i++) {
+		knode = kfd->nodes[i];
+		device_queue_manager_uninit(knode->dqm);
+		kfd_interrupt_exit(knode);
+		kfd_topology_remove_device(knode);
+		if (knode->gws)
+			amdgpu_amdkfd_free_gws(knode->adev, knode->gws);
+		kfree(knode);
+		kfd->nodes[i] = NULL;
+	}
+}
+
+static void kfd_setup_interrupt_bitmap(struct kfd_node *node,
+				       unsigned int kfd_node_idx)
+{
+	struct amdgpu_device *adev = node->adev;
+	uint32_t xcc_mask = node->xcc_mask;
+	uint32_t xcc, mapped_xcc;
+	/*
+	 * Interrupt bitmap is setup for processing interrupts from
+	 * different XCDs and AIDs.
+	 * Interrupt bitmap is defined as follows:
+	 * 1. Bits 0-15 - correspond to the NodeId field.
+	 *    Each bit corresponds to NodeId number. For example, if
+	 *    a KFD node has interrupt bitmap set to 0x7, then this
+	 *    KFD node will process interrupts with NodeId = 0, 1 and 2
+	 *    in the IH cookie.
+	 * 2. Bits 16-31 - unused.
+	 *
+	 * Please note that the kfd_node_idx argument passed to this
+	 * function is not related to NodeId field received in the
+	 * IH cookie.
+	 *
+	 * In CPX mode, a KFD node will process an interrupt if:
+	 * - the Node Id matches the corresponding bit set in
+	 *   Bits 0-15.
+	 * - AND VMID reported in the interrupt lies within the
+	 *   VMID range of the node.
+	 */
+	for_each_inst(xcc, xcc_mask) {
+		mapped_xcc = GET_INST(GC, xcc);
+		node->interrupt_bitmap |= (mapped_xcc % 2 ? 5 : 3) << (4 * (mapped_xcc / 2));
+	}
+	dev_info(kfd_device, "Node: %d, interrupt_bitmap: %x\n", kfd_node_idx,
+							node->interrupt_bitmap);
+}
+
+bool kgd2kfd_device_init(struct kfd_dev *kfd,
+			 const struct kgd2kfd_shared_resources *gpu_resources)
+{
+	unsigned int size, map_process_packet_size, i;
+	struct kfd_node *node;
+	uint32_t first_vmid_kfd, last_vmid_kfd, vmid_num_kfd;
+	unsigned int max_proc_per_quantum;
+	int partition_mode;
+	int xcp_idx;
+
+	kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev,
+			KGD_ENGINE_MEC1);
+	kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev,
+			KGD_ENGINE_MEC2);
+	kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev,
+			KGD_ENGINE_SDMA1);
+	kfd->shared_resources = *gpu_resources;
+
+	kfd->num_nodes = amdgpu_xcp_get_num_xcp(kfd->adev->xcp_mgr);
+
+	if (kfd->num_nodes == 0) {
+		dev_err(kfd_device,
+			"KFD num nodes cannot be 0, num_xcc_in_node: %d\n",
+			kfd->adev->gfx.num_xcc_per_xcp);
+		goto out;
+	}
+
+	/* Allow BIF to recode atomics to PCIe 3.0 AtomicOps.
+	 * 32 and 64-bit requests are possible and must be
+	 * supported.
+	 */
+	kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kfd->adev);
+	if (!kfd->pci_atomic_requested &&
+	    kfd->device_info.needs_pci_atomics &&
+	    (!kfd->device_info.no_atomic_fw_version ||
+	     kfd->mec_fw_version < kfd->device_info.no_atomic_fw_version)) {
+		dev_info(kfd_device,
+			 "skipped device %x:%x, PCI rejects atomics %d<%d\n",
+			 kfd->adev->pdev->vendor, kfd->adev->pdev->device,
+			 kfd->mec_fw_version,
+			 kfd->device_info.no_atomic_fw_version);
+		return false;
+	}
+
+	first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1;
+	last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1;
+	vmid_num_kfd = last_vmid_kfd - first_vmid_kfd + 1;
+
+	/* For GFX9.4.3, we need special handling for VMIDs depending on
+	 * partition mode.
+	 * In CPX mode, the VMID range needs to be shared between XCDs.
+	 * Additionally, there are 13 VMIDs (3-15) available for KFD. To
+	 * divide them equally, we change starting VMID to 4 and not use
+	 * VMID 3.
+	 * If the VMID range changes for GFX9.4.3, then this code MUST be
+	 * revisited.
+	 */
+	if (kfd->adev->xcp_mgr) {
+		partition_mode = amdgpu_xcp_query_partition_mode(kfd->adev->xcp_mgr,
+								 AMDGPU_XCP_FL_LOCKED);
+		if (partition_mode == AMDGPU_CPX_PARTITION_MODE &&
+		    kfd->num_nodes != 1) {
+			vmid_num_kfd /= 2;
+			first_vmid_kfd = last_vmid_kfd + 1 - vmid_num_kfd*2;
+		}
+	}
+
+	/* Verify module parameters regarding mapped process number*/
+	if (hws_max_conc_proc >= 0)
+		max_proc_per_quantum = min((u32)hws_max_conc_proc, vmid_num_kfd);
+	else
+		max_proc_per_quantum = vmid_num_kfd;
+
+	/* calculate max size of mqds needed for queues */
+	size = max_num_of_queues_per_device *
+			kfd->device_info.mqd_size_aligned;
+
+	/*
+	 * calculate max size of runlist packet.
+	 * There can be only 2 packets at once
+	 */
+	map_process_packet_size = KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2) ?
+				sizeof(struct pm4_mes_map_process_aldebaran) :
+				sizeof(struct pm4_mes_map_process);
+	size += (KFD_MAX_NUM_OF_PROCESSES * map_process_packet_size +
+		max_num_of_queues_per_device * sizeof(struct pm4_mes_map_queues)
+		+ sizeof(struct pm4_mes_runlist)) * 2;
+
+	/* Add size of HIQ & DIQ */
+	size += KFD_KERNEL_QUEUE_SIZE * 2;
+
+	/* add another 512KB for all other allocations on gart (HPD, fences) */
+	size += 512 * 1024;
+
+	if (amdgpu_amdkfd_alloc_gtt_mem(
+			kfd->adev, size, &kfd->gtt_mem,
+			&kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr,
+			false)) {
+		dev_err(kfd_device, "Could not allocate %d bytes\n", size);
+		goto alloc_gtt_mem_failure;
+	}
+
+	dev_info(kfd_device, "Allocated %d bytes on gart\n", size);
+
+	/* Initialize GTT sa with 512 byte chunk size */
+	if (kfd_gtt_sa_init(kfd, size, 512) != 0) {
+		dev_err(kfd_device, "Error initializing gtt sub-allocator\n");
+		goto kfd_gtt_sa_init_error;
+	}
+
+	if (kfd_doorbell_init(kfd)) {
+		dev_err(kfd_device,
+			"Error initializing doorbell aperture\n");
+		goto kfd_doorbell_error;
+	}
+
+	if (amdgpu_use_xgmi_p2p)
+		kfd->hive_id = kfd->adev->gmc.xgmi.hive_id;
+
+	/*
+	 * For GFX9.4.3, the KFD abstracts all partitions within a socket as
+	 * xGMI connected in the topology so assign a unique hive id per
+	 * device based on the pci device location if device is in PCIe mode.
+	 */
+	if (!kfd->hive_id && (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3)) && kfd->num_nodes > 1)
+		kfd->hive_id = pci_dev_id(kfd->adev->pdev);
+
+	kfd->noretry = kfd->adev->gmc.noretry;
+
+	kfd_cwsr_init(kfd);
+
+	dev_info(kfd_device, "Total number of KFD nodes to be created: %d\n",
+				kfd->num_nodes);
+
+	/* Allocate the KFD nodes */
+	for (i = 0, xcp_idx = 0; i < kfd->num_nodes; i++) {
+		node = kzalloc(sizeof(struct kfd_node), GFP_KERNEL);
+		if (!node)
+			goto node_alloc_error;
+
+		node->node_id = i;
+		node->adev = kfd->adev;
+		node->kfd = kfd;
+		node->kfd2kgd = kfd->kfd2kgd;
+		node->vm_info.vmid_num_kfd = vmid_num_kfd;
+		node->xcp = amdgpu_get_next_xcp(kfd->adev->xcp_mgr, &xcp_idx);
+		/* TODO : Check if error handling is needed */
+		if (node->xcp) {
+			amdgpu_xcp_get_inst_details(node->xcp, AMDGPU_XCP_GFX,
+						    &node->xcc_mask);
+			++xcp_idx;
+		} else {
+			node->xcc_mask =
+				(1U << NUM_XCC(kfd->adev->gfx.xcc_mask)) - 1;
+		}
+
+		if (node->xcp) {
+			dev_info(kfd_device, "KFD node %d partition %d size %lldM\n",
+				node->node_id, node->xcp->mem_id,
+				KFD_XCP_MEMORY_SIZE(node->adev, node->node_id) >> 20);
+		}
+
+		if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) &&
+		    partition_mode == AMDGPU_CPX_PARTITION_MODE &&
+		    kfd->num_nodes != 1) {
+			/* For GFX9.4.3 and CPX mode, first XCD gets VMID range
+			 * 4-9 and second XCD gets VMID range 10-15.
+			 */
+
+			node->vm_info.first_vmid_kfd = (i%2 == 0) ?
+						first_vmid_kfd :
+						first_vmid_kfd+vmid_num_kfd;
+			node->vm_info.last_vmid_kfd = (i%2 == 0) ?
+						last_vmid_kfd-vmid_num_kfd :
+						last_vmid_kfd;
+			node->compute_vmid_bitmap =
+				((0x1 << (node->vm_info.last_vmid_kfd + 1)) - 1) -
+				((0x1 << (node->vm_info.first_vmid_kfd)) - 1);
+		} else {
+			node->vm_info.first_vmid_kfd = first_vmid_kfd;
+			node->vm_info.last_vmid_kfd = last_vmid_kfd;
+			node->compute_vmid_bitmap =
+				gpu_resources->compute_vmid_bitmap;
+		}
+		node->max_proc_per_quantum = max_proc_per_quantum;
+		atomic_set(&node->sram_ecc_flag, 0);
+
+		amdgpu_amdkfd_get_local_mem_info(kfd->adev,
+					&node->local_mem_info, node->xcp);
+
+		if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3))
+			kfd_setup_interrupt_bitmap(node, i);
+
+		/* Initialize the KFD node */
+		if (kfd_init_node(node)) {
+			dev_err(kfd_device, "Error initializing KFD node\n");
+			goto node_init_error;
+		}
+		kfd->nodes[i] = node;
+	}
+
+	svm_range_set_max_pages(kfd->adev);
+
+	spin_lock_init(&kfd->watch_points_lock);
+
+	kfd->init_complete = true;
+	dev_info(kfd_device, "added device %x:%x\n", kfd->adev->pdev->vendor,
+		 kfd->adev->pdev->device);
+
+	pr_debug("Starting kfd with the following scheduling policy %d\n",
+		node->dqm->sched_policy);
+
+	goto out;
+
+node_init_error:
+node_alloc_error:
+	kfd_cleanup_nodes(kfd, i);
+	kfd_doorbell_fini(kfd);
+kfd_doorbell_error:
+	kfd_gtt_sa_fini(kfd);
+kfd_gtt_sa_init_error:
+	amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem);
+alloc_gtt_mem_failure:
+	dev_err(kfd_device,
+		"device %x:%x NOT added due to errors\n",
+		kfd->adev->pdev->vendor, kfd->adev->pdev->device);
+out:
+	return kfd->init_complete;
+}
+
+void kgd2kfd_device_exit(struct kfd_dev *kfd)
+{
+	if (kfd->init_complete) {
+		/* Cleanup KFD nodes */
+		kfd_cleanup_nodes(kfd, kfd->num_nodes);
+		/* Cleanup common/shared resources */
+		kfd_doorbell_fini(kfd);
+		ida_destroy(&kfd->doorbell_ida);
+		kfd_gtt_sa_fini(kfd);
+		amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem);
+	}
+
+	kfree(kfd);
+}
+
+int kgd2kfd_pre_reset(struct kfd_dev *kfd)
+{
+	struct kfd_node *node;
+	int i;
+
+	if (!kfd->init_complete)
+		return 0;
+
+	for (i = 0; i < kfd->num_nodes; i++) {
+		node = kfd->nodes[i];
+		kfd_smi_event_update_gpu_reset(node, false);
+		node->dqm->ops.pre_reset(node->dqm);
+	}
+
+	kgd2kfd_suspend(kfd, false);
+
+	for (i = 0; i < kfd->num_nodes; i++)
+		kfd_signal_reset_event(kfd->nodes[i]);
+
+	return 0;
+}
+
+/*
+ * Fix me. KFD won't be able to resume existing process for now.
+ * We will keep all existing process in a evicted state and
+ * wait the process to be terminated.
+ */
+
+int kgd2kfd_post_reset(struct kfd_dev *kfd)
+{
+	int ret;
+	struct kfd_node *node;
+	int i;
+
+	if (!kfd->init_complete)
+		return 0;
+
+	for (i = 0; i < kfd->num_nodes; i++) {
+		ret = kfd_resume(kfd->nodes[i]);
+		if (ret)
+			return ret;
+	}
+
+	mutex_lock(&kfd_processes_mutex);
+	--kfd_locked;
+	mutex_unlock(&kfd_processes_mutex);
+
+	for (i = 0; i < kfd->num_nodes; i++) {
+		node = kfd->nodes[i];
+		atomic_set(&node->sram_ecc_flag, 0);
+		kfd_smi_event_update_gpu_reset(node, true);
+	}
+
+	return 0;
+}
+
+bool kfd_is_locked(void)
+{
+	lockdep_assert_held(&kfd_processes_mutex);
+	return  (kfd_locked > 0);
+}
+
+void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
+{
+	struct kfd_node *node;
+	int i;
+	int count;
+
+	if (!kfd->init_complete)
+		return;
+
+	/* for runtime suspend, skip locking kfd */
+	if (!run_pm) {
+		mutex_lock(&kfd_processes_mutex);
+		count = ++kfd_locked;
+		mutex_unlock(&kfd_processes_mutex);
+
+		/* For first KFD device suspend all the KFD processes */
+		if (count == 1)
+			kfd_suspend_all_processes();
+	}
+
+	for (i = 0; i < kfd->num_nodes; i++) {
+		node = kfd->nodes[i];
+		node->dqm->ops.stop(node->dqm);
+	}
+}
+
+int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
+{
+	int ret, count, i;
+
+	if (!kfd->init_complete)
+		return 0;
+
+	for (i = 0; i < kfd->num_nodes; i++) {
+		ret = kfd_resume(kfd->nodes[i]);
+		if (ret)
+			return ret;
+	}
+
+	/* for runtime resume, skip unlocking kfd */
+	if (!run_pm) {
+		mutex_lock(&kfd_processes_mutex);
+		count = --kfd_locked;
+		mutex_unlock(&kfd_processes_mutex);
+
+		WARN_ONCE(count < 0, "KFD suspend / resume ref. error");
+		if (count == 0)
+			ret = kfd_resume_all_processes();
+	}
+
+	return ret;
+}
+
+static int kfd_resume(struct kfd_node *node)
+{
+	int err = 0;
+
+	err = node->dqm->ops.start(node->dqm);
+	if (err)
+		dev_err(kfd_device,
+			"Error starting queue manager for device %x:%x\n",
+			node->adev->pdev->vendor, node->adev->pdev->device);
+
+	return err;
+}
+
+static inline void kfd_queue_work(struct workqueue_struct *wq,
+				  struct work_struct *work)
+{
+	int cpu, new_cpu;
+
+	cpu = new_cpu = smp_processor_id();
+	do {
+		new_cpu = cpumask_next(new_cpu, cpu_online_mask) % nr_cpu_ids;
+		if (cpu_to_node(new_cpu) == numa_node_id())
+			break;
+	} while (cpu != new_cpu);
+
+	queue_work_on(new_cpu, wq, work);
+}
+
+/* This is called directly from KGD at ISR. */
+void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
+{
+	uint32_t patched_ihre[KFD_MAX_RING_ENTRY_SIZE], i;
+	bool is_patched = false;
+	unsigned long flags;
+	struct kfd_node *node;
+
+	if (!kfd->init_complete)
+		return;
+
+	if (kfd->device_info.ih_ring_entry_size > sizeof(patched_ihre)) {
+		dev_err_once(kfd_device, "Ring entry too small\n");
+		return;
+	}
+
+	for (i = 0; i < kfd->num_nodes; i++) {
+		node = kfd->nodes[i];
+		spin_lock_irqsave(&node->interrupt_lock, flags);
+
+		if (node->interrupts_active
+		    && interrupt_is_wanted(node, ih_ring_entry,
+			    	patched_ihre, &is_patched)
+		    && enqueue_ih_ring_entry(node,
+			    	is_patched ? patched_ihre : ih_ring_entry)) {
+			kfd_queue_work(node->ih_wq, &node->interrupt_work);
+			spin_unlock_irqrestore(&node->interrupt_lock, flags);
+			return;
+		}
+		spin_unlock_irqrestore(&node->interrupt_lock, flags);
+	}
+
+}
+
+int kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger)
+{
+	struct kfd_process *p;
+	int r;
+
+	/* Because we are called from arbitrary context (workqueue) as opposed
+	 * to process context, kfd_process could attempt to exit while we are
+	 * running so the lookup function increments the process ref count.
+	 */
+	p = kfd_lookup_process_by_mm(mm);
+	if (!p)
+		return -ESRCH;
+
+	WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
+	r = kfd_process_evict_queues(p, trigger);
+
+	kfd_unref_process(p);
+	return r;
+}
+
+int kgd2kfd_resume_mm(struct mm_struct *mm)
+{
+	struct kfd_process *p;
+	int r;
+
+	/* Because we are called from arbitrary context (workqueue) as opposed
+	 * to process context, kfd_process could attempt to exit while we are
+	 * running so the lookup function increments the process ref count.
+	 */
+	p = kfd_lookup_process_by_mm(mm);
+	if (!p)
+		return -ESRCH;
+
+	r = kfd_process_restore_queues(p);
+
+	kfd_unref_process(p);
+	return r;
+}
+
+/** kgd2kfd_schedule_evict_and_restore_process - Schedules work queue that will
+ *   prepare for safe eviction of KFD BOs that belong to the specified
+ *   process.
+ *
+ * @mm: mm_struct that identifies the specified KFD process
+ * @fence: eviction fence attached to KFD process BOs
+ *
+ */
+int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
+					       struct dma_fence *fence)
+{
+	struct kfd_process *p;
+	unsigned long active_time;
+	unsigned long delay_jiffies = msecs_to_jiffies(PROCESS_ACTIVE_TIME_MS);
+
+	if (!fence)
+		return -EINVAL;
+
+	if (dma_fence_is_signaled(fence))
+		return 0;
+
+	p = kfd_lookup_process_by_mm(mm);
+	if (!p)
+		return -ENODEV;
+
+	if (fence->seqno == p->last_eviction_seqno)
+		goto out;
+
+	p->last_eviction_seqno = fence->seqno;
+
+	/* Avoid KFD process starvation. Wait for at least
+	 * PROCESS_ACTIVE_TIME_MS before evicting the process again
+	 */
+	active_time = get_jiffies_64() - p->last_restore_timestamp;
+	if (delay_jiffies > active_time)
+		delay_jiffies -= active_time;
+	else
+		delay_jiffies = 0;
+
+	/* During process initialization eviction_work.dwork is initialized
+	 * to kfd_evict_bo_worker
+	 */
+	WARN(debug_evictions, "Scheduling eviction of pid %d in %ld jiffies",
+	     p->lead_thread->pid, delay_jiffies);
+	schedule_delayed_work(&p->eviction_work, delay_jiffies);
+out:
+	kfd_unref_process(p);
+	return 0;
+}
+
+static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
+				unsigned int chunk_size)
+{
+	if (WARN_ON(buf_size < chunk_size))
+		return -EINVAL;
+	if (WARN_ON(buf_size == 0))
+		return -EINVAL;
+	if (WARN_ON(chunk_size == 0))
+		return -EINVAL;
+
+	kfd->gtt_sa_chunk_size = chunk_size;
+	kfd->gtt_sa_num_of_chunks = buf_size / chunk_size;
+
+	kfd->gtt_sa_bitmap = bitmap_zalloc(kfd->gtt_sa_num_of_chunks,
+					   GFP_KERNEL);
+	if (!kfd->gtt_sa_bitmap)
+		return -ENOMEM;
+
+	pr_debug("gtt_sa_num_of_chunks = %d, gtt_sa_bitmap = %p\n",
+			kfd->gtt_sa_num_of_chunks, kfd->gtt_sa_bitmap);
+
+	mutex_init(&kfd->gtt_sa_lock);
+
+	return 0;
+}
+
+static void kfd_gtt_sa_fini(struct kfd_dev *kfd)
+{
+	mutex_destroy(&kfd->gtt_sa_lock);
+	bitmap_free(kfd->gtt_sa_bitmap);
+}
+
+static inline uint64_t kfd_gtt_sa_calc_gpu_addr(uint64_t start_addr,
+						unsigned int bit_num,
+						unsigned int chunk_size)
+{
+	return start_addr + bit_num * chunk_size;
+}
+
+static inline uint32_t *kfd_gtt_sa_calc_cpu_addr(void *start_addr,
+						unsigned int bit_num,
+						unsigned int chunk_size)
+{
+	return (uint32_t *) ((uint64_t) start_addr + bit_num * chunk_size);
+}
+
+int kfd_gtt_sa_allocate(struct kfd_node *node, unsigned int size,
+			struct kfd_mem_obj **mem_obj)
+{
+	unsigned int found, start_search, cur_size;
+	struct kfd_dev *kfd = node->kfd;
+
+	if (size == 0)
+		return -EINVAL;
+
+	if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size)
+		return -ENOMEM;
+
+	*mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
+	if (!(*mem_obj))
+		return -ENOMEM;
+
+	pr_debug("Allocated mem_obj = %p for size = %d\n", *mem_obj, size);
+
+	start_search = 0;
+
+	mutex_lock(&kfd->gtt_sa_lock);
+
+kfd_gtt_restart_search:
+	/* Find the first chunk that is free */
+	found = find_next_zero_bit(kfd->gtt_sa_bitmap,
+					kfd->gtt_sa_num_of_chunks,
+					start_search);
+
+	pr_debug("Found = %d\n", found);
+
+	/* If there wasn't any free chunk, bail out */
+	if (found == kfd->gtt_sa_num_of_chunks)
+		goto kfd_gtt_no_free_chunk;
+
+	/* Update fields of mem_obj */
+	(*mem_obj)->range_start = found;
+	(*mem_obj)->range_end = found;
+	(*mem_obj)->gpu_addr = kfd_gtt_sa_calc_gpu_addr(
+					kfd->gtt_start_gpu_addr,
+					found,
+					kfd->gtt_sa_chunk_size);
+	(*mem_obj)->cpu_ptr = kfd_gtt_sa_calc_cpu_addr(
+					kfd->gtt_start_cpu_ptr,
+					found,
+					kfd->gtt_sa_chunk_size);
+
+	pr_debug("gpu_addr = %p, cpu_addr = %p\n",
+			(uint64_t *) (*mem_obj)->gpu_addr, (*mem_obj)->cpu_ptr);
+
+	/* If we need only one chunk, mark it as allocated and get out */
+	if (size <= kfd->gtt_sa_chunk_size) {
+		pr_debug("Single bit\n");
+		__set_bit(found, kfd->gtt_sa_bitmap);
+		goto kfd_gtt_out;
+	}
+
+	/* Otherwise, try to see if we have enough contiguous chunks */
+	cur_size = size - kfd->gtt_sa_chunk_size;
+	do {
+		(*mem_obj)->range_end =
+			find_next_zero_bit(kfd->gtt_sa_bitmap,
+					kfd->gtt_sa_num_of_chunks, ++found);
+		/*
+		 * If next free chunk is not contiguous than we need to
+		 * restart our search from the last free chunk we found (which
+		 * wasn't contiguous to the previous ones
+		 */
+		if ((*mem_obj)->range_end != found) {
+			start_search = found;
+			goto kfd_gtt_restart_search;
+		}
+
+		/*
+		 * If we reached end of buffer, bail out with error
+		 */
+		if (found == kfd->gtt_sa_num_of_chunks)
+			goto kfd_gtt_no_free_chunk;
+
+		/* Check if we don't need another chunk */
+		if (cur_size <= kfd->gtt_sa_chunk_size)
+			cur_size = 0;
+		else
+			cur_size -= kfd->gtt_sa_chunk_size;
+
+	} while (cur_size > 0);
+
+	pr_debug("range_start = %d, range_end = %d\n",
+		(*mem_obj)->range_start, (*mem_obj)->range_end);
+
+	/* Mark the chunks as allocated */
+	bitmap_set(kfd->gtt_sa_bitmap, (*mem_obj)->range_start,
+		   (*mem_obj)->range_end - (*mem_obj)->range_start + 1);
+
+kfd_gtt_out:
+	mutex_unlock(&kfd->gtt_sa_lock);
+	return 0;
+
+kfd_gtt_no_free_chunk:
+	pr_debug("Allocation failed with mem_obj = %p\n", *mem_obj);
+	mutex_unlock(&kfd->gtt_sa_lock);
+	kfree(*mem_obj);
+	return -ENOMEM;
+}
+
+int kfd_gtt_sa_free(struct kfd_node *node, struct kfd_mem_obj *mem_obj)
+{
+	struct kfd_dev *kfd = node->kfd;
+
+	/* Act like kfree when trying to free a NULL object */
+	if (!mem_obj)
+		return 0;
+
+	pr_debug("Free mem_obj = %p, range_start = %d, range_end = %d\n",
+			mem_obj, mem_obj->range_start, mem_obj->range_end);
+
+	mutex_lock(&kfd->gtt_sa_lock);
+
+	/* Mark the chunks as free */
+	bitmap_clear(kfd->gtt_sa_bitmap, mem_obj->range_start,
+		     mem_obj->range_end - mem_obj->range_start + 1);
+
+	mutex_unlock(&kfd->gtt_sa_lock);
+
+	kfree(mem_obj);
+	return 0;
+}
+
+void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd)
+{
+	/*
+	 * TODO: Currently update SRAM ECC flag for first node.
+	 * This needs to be updated later when we can
+	 * identify SRAM ECC error on other nodes also.
+	 */
+	if (kfd)
+		atomic_inc(&kfd->nodes[0]->sram_ecc_flag);
+}
+
+void kfd_inc_compute_active(struct kfd_node *node)
+{
+	if (atomic_inc_return(&node->kfd->compute_profile) == 1)
+		amdgpu_amdkfd_set_compute_idle(node->adev, false);
+}
+
+void kfd_dec_compute_active(struct kfd_node *node)
+{
+	int count = atomic_dec_return(&node->kfd->compute_profile);
+
+	if (count == 0)
+		amdgpu_amdkfd_set_compute_idle(node->adev, true);
+	WARN_ONCE(count < 0, "Compute profile ref. count error");
+}
+
+void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask)
+{
+	/*
+	 * TODO: For now, raise the throttling event only on first node.
+	 * This will need to change after we are able to determine
+	 * which node raised the throttling event.
+	 */
+	if (kfd && kfd->init_complete)
+		kfd_smi_event_update_thermal_throttling(kfd->nodes[0],
+							throttle_bitmask);
+}
+
+/* kfd_get_num_sdma_engines returns the number of PCIe optimized SDMA and
+ * kfd_get_num_xgmi_sdma_engines returns the number of XGMI SDMA.
+ * When the device has more than two engines, we reserve two for PCIe to enable
+ * full-duplex and the rest are used as XGMI.
+ */
+unsigned int kfd_get_num_sdma_engines(struct kfd_node *node)
+{
+	/* If XGMI is not supported, all SDMA engines are PCIe */
+	if (!node->adev->gmc.xgmi.supported)
+		return node->adev->sdma.num_instances/(int)node->kfd->num_nodes;
+
+	return min(node->adev->sdma.num_instances/(int)node->kfd->num_nodes, 2);
+}
+
+unsigned int kfd_get_num_xgmi_sdma_engines(struct kfd_node *node)
+{
+	/* After reserved for PCIe, the rest of engines are XGMI */
+	return node->adev->sdma.num_instances/(int)node->kfd->num_nodes -
+		kfd_get_num_sdma_engines(node);
+}
+
+int kgd2kfd_check_and_lock_kfd(void)
+{
+	mutex_lock(&kfd_processes_mutex);
+	if (!hash_empty(kfd_processes_table) || kfd_is_locked()) {
+		mutex_unlock(&kfd_processes_mutex);
+		return -EBUSY;
+	}
+
+	++kfd_locked;
+	mutex_unlock(&kfd_processes_mutex);
+
+	return 0;
+}
+
+void kgd2kfd_unlock_kfd(void)
+{
+	mutex_lock(&kfd_processes_mutex);
+	--kfd_locked;
+	mutex_unlock(&kfd_processes_mutex);
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+/* This function will send a package to HIQ to hang the HWS
+ * which will trigger a GPU reset and bring the HWS back to normal state
+ */
+int kfd_debugfs_hang_hws(struct kfd_node *dev)
+{
+	if (dev->dqm->sched_policy != KFD_SCHED_POLICY_HWS) {
+		pr_err("HWS is not enabled");
+		return -EINVAL;
+	}
+
+	return dqm_debugfs_hang_hws(dev->dqm);
+}
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
new file mode 100644
index 000000000..e07652e72
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -0,0 +1,3253 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/ratelimit.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/sched.h>
+#include "kfd_priv.h"
+#include "kfd_device_queue_manager.h"
+#include "kfd_mqd_manager.h"
+#include "cik_regs.h"
+#include "kfd_kernel_queue.h"
+#include "amdgpu_amdkfd.h"
+#include "mes_api_def.h"
+#include "kfd_debug.h"
+
+/* Size of the per-pipe EOP queue */
+#define CIK_HPD_EOP_BYTES_LOG2 11
+#define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
+
+static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
+				  u32 pasid, unsigned int vmid);
+
+static int execute_queues_cpsch(struct device_queue_manager *dqm,
+				enum kfd_unmap_queues_filter filter,
+				uint32_t filter_param,
+				uint32_t grace_period);
+static int unmap_queues_cpsch(struct device_queue_manager *dqm,
+				enum kfd_unmap_queues_filter filter,
+				uint32_t filter_param,
+				uint32_t grace_period,
+				bool reset);
+
+static int map_queues_cpsch(struct device_queue_manager *dqm);
+
+static void deallocate_sdma_queue(struct device_queue_manager *dqm,
+				struct queue *q);
+
+static inline void deallocate_hqd(struct device_queue_manager *dqm,
+				struct queue *q);
+static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q);
+static int allocate_sdma_queue(struct device_queue_manager *dqm,
+				struct queue *q, const uint32_t *restore_sdma_id);
+static void kfd_process_hw_exception(struct work_struct *work);
+
+static inline
+enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
+{
+	if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI)
+		return KFD_MQD_TYPE_SDMA;
+	return KFD_MQD_TYPE_CP;
+}
+
+static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe)
+{
+	int i;
+	int pipe_offset = (mec * dqm->dev->kfd->shared_resources.num_pipe_per_mec
+		+ pipe) * dqm->dev->kfd->shared_resources.num_queue_per_pipe;
+
+	/* queue is available for KFD usage if bit is 1 */
+	for (i = 0; i <  dqm->dev->kfd->shared_resources.num_queue_per_pipe; ++i)
+		if (test_bit(pipe_offset + i,
+			      dqm->dev->kfd->shared_resources.cp_queue_bitmap))
+			return true;
+	return false;
+}
+
+unsigned int get_cp_queues_num(struct device_queue_manager *dqm)
+{
+	return bitmap_weight(dqm->dev->kfd->shared_resources.cp_queue_bitmap,
+				KGD_MAX_QUEUES);
+}
+
+unsigned int get_queues_per_pipe(struct device_queue_manager *dqm)
+{
+	return dqm->dev->kfd->shared_resources.num_queue_per_pipe;
+}
+
+unsigned int get_pipes_per_mec(struct device_queue_manager *dqm)
+{
+	return dqm->dev->kfd->shared_resources.num_pipe_per_mec;
+}
+
+static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm)
+{
+	return kfd_get_num_sdma_engines(dqm->dev) +
+		kfd_get_num_xgmi_sdma_engines(dqm->dev);
+}
+
+unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
+{
+	return kfd_get_num_sdma_engines(dqm->dev) *
+		dqm->dev->kfd->device_info.num_sdma_queues_per_engine;
+}
+
+unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm)
+{
+	return kfd_get_num_xgmi_sdma_engines(dqm->dev) *
+		dqm->dev->kfd->device_info.num_sdma_queues_per_engine;
+}
+
+static void init_sdma_bitmaps(struct device_queue_manager *dqm)
+{
+	bitmap_zero(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES);
+	bitmap_set(dqm->sdma_bitmap, 0, get_num_sdma_queues(dqm));
+
+	bitmap_zero(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES);
+	bitmap_set(dqm->xgmi_sdma_bitmap, 0, get_num_xgmi_sdma_queues(dqm));
+
+	/* Mask out the reserved queues */
+	bitmap_andnot(dqm->sdma_bitmap, dqm->sdma_bitmap,
+		      dqm->dev->kfd->device_info.reserved_sdma_queues_bitmap,
+		      KFD_MAX_SDMA_QUEUES);
+}
+
+void program_sh_mem_settings(struct device_queue_manager *dqm,
+					struct qcm_process_device *qpd)
+{
+	uint32_t xcc_mask = dqm->dev->xcc_mask;
+	int xcc_id;
+
+	for_each_inst(xcc_id, xcc_mask)
+		dqm->dev->kfd2kgd->program_sh_mem_settings(
+			dqm->dev->adev, qpd->vmid, qpd->sh_mem_config,
+			qpd->sh_mem_ape1_base, qpd->sh_mem_ape1_limit,
+			qpd->sh_mem_bases, xcc_id);
+}
+
+static void kfd_hws_hang(struct device_queue_manager *dqm)
+{
+	/*
+	 * Issue a GPU reset if HWS is unresponsive
+	 */
+	dqm->is_hws_hang = true;
+
+	/* It's possible we're detecting a HWS hang in the
+	 * middle of a GPU reset. No need to schedule another
+	 * reset in this case.
+	 */
+	if (!dqm->is_resetting)
+		schedule_work(&dqm->hw_exception_work);
+}
+
+static int convert_to_mes_queue_type(int queue_type)
+{
+	int mes_queue_type;
+
+	switch (queue_type) {
+	case KFD_QUEUE_TYPE_COMPUTE:
+		mes_queue_type = MES_QUEUE_TYPE_COMPUTE;
+		break;
+	case KFD_QUEUE_TYPE_SDMA:
+		mes_queue_type = MES_QUEUE_TYPE_SDMA;
+		break;
+	default:
+		WARN(1, "Invalid queue type %d", queue_type);
+		mes_queue_type = -EINVAL;
+		break;
+	}
+
+	return mes_queue_type;
+}
+
+static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
+			 struct qcm_process_device *qpd)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev;
+	struct kfd_process_device *pdd = qpd_to_pdd(qpd);
+	struct mes_add_queue_input queue_input;
+	int r, queue_type;
+	uint64_t wptr_addr_off;
+
+	if (dqm->is_hws_hang)
+		return -EIO;
+
+	memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input));
+	queue_input.process_id = qpd->pqm->process->pasid;
+	queue_input.page_table_base_addr =  qpd->page_table_base;
+	queue_input.process_va_start = 0;
+	queue_input.process_va_end = adev->vm_manager.max_pfn - 1;
+	/* MES unit for quantum is 100ns */
+	queue_input.process_quantum = KFD_MES_PROCESS_QUANTUM;  /* Equivalent to 10ms. */
+	queue_input.process_context_addr = pdd->proc_ctx_gpu_addr;
+	queue_input.gang_quantum = KFD_MES_GANG_QUANTUM; /* Equivalent to 1ms */
+	queue_input.gang_context_addr = q->gang_ctx_gpu_addr;
+	queue_input.inprocess_gang_priority = q->properties.priority;
+	queue_input.gang_global_priority_level =
+					AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
+	queue_input.doorbell_offset = q->properties.doorbell_off;
+	queue_input.mqd_addr = q->gart_mqd_addr;
+	queue_input.wptr_addr = (uint64_t)q->properties.write_ptr;
+
+	if (q->wptr_bo) {
+		wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE - 1);
+		queue_input.wptr_mc_addr = amdgpu_bo_gpu_offset(q->wptr_bo) + wptr_addr_off;
+	}
+
+	queue_input.is_kfd_process = 1;
+	queue_input.is_aql_queue = (q->properties.format == KFD_QUEUE_FORMAT_AQL);
+	queue_input.queue_size = q->properties.queue_size >> 2;
+
+	queue_input.paging = false;
+	queue_input.tba_addr = qpd->tba_addr;
+	queue_input.tma_addr = qpd->tma_addr;
+	queue_input.trap_en = !kfd_dbg_has_cwsr_workaround(q->device);
+	queue_input.skip_process_ctx_clear = qpd->pqm->process->debug_trap_enabled ||
+					     kfd_dbg_has_ttmps_always_setup(q->device);
+
+	queue_type = convert_to_mes_queue_type(q->properties.type);
+	if (queue_type < 0) {
+		pr_err("Queue type not supported with MES, queue:%d\n",
+				q->properties.type);
+		return -EINVAL;
+	}
+	queue_input.queue_type = (uint32_t)queue_type;
+
+	queue_input.exclusively_scheduled = q->properties.is_gws;
+
+	amdgpu_mes_lock(&adev->mes);
+	r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
+	amdgpu_mes_unlock(&adev->mes);
+	if (r) {
+		pr_err("failed to add hardware queue to MES, doorbell=0x%x\n",
+			q->properties.doorbell_off);
+		pr_err("MES might be in unrecoverable state, issue a GPU reset\n");
+		kfd_hws_hang(dqm);
+	}
+
+	return r;
+}
+
+static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q,
+			struct qcm_process_device *qpd)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev;
+	int r;
+	struct mes_remove_queue_input queue_input;
+
+	if (dqm->is_hws_hang)
+		return -EIO;
+
+	memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input));
+	queue_input.doorbell_offset = q->properties.doorbell_off;
+	queue_input.gang_context_addr = q->gang_ctx_gpu_addr;
+
+	amdgpu_mes_lock(&adev->mes);
+	r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input);
+	amdgpu_mes_unlock(&adev->mes);
+
+	if (r) {
+		pr_err("failed to remove hardware queue from MES, doorbell=0x%x\n",
+			q->properties.doorbell_off);
+		pr_err("MES might be in unrecoverable state, issue a GPU reset\n");
+		kfd_hws_hang(dqm);
+	}
+
+	return r;
+}
+
+static int remove_all_queues_mes(struct device_queue_manager *dqm)
+{
+	struct device_process_node *cur;
+	struct qcm_process_device *qpd;
+	struct queue *q;
+	int retval = 0;
+
+	list_for_each_entry(cur, &dqm->queues, list) {
+		qpd = cur->qpd;
+		list_for_each_entry(q, &qpd->queues_list, list) {
+			if (q->properties.is_active) {
+				retval = remove_queue_mes(dqm, q, qpd);
+				if (retval) {
+					pr_err("%s: Failed to remove queue %d for dev %d",
+						__func__,
+						q->properties.queue_id,
+						dqm->dev->id);
+					return retval;
+				}
+			}
+		}
+	}
+
+	return retval;
+}
+
+static void increment_queue_count(struct device_queue_manager *dqm,
+				  struct qcm_process_device *qpd,
+				  struct queue *q)
+{
+	dqm->active_queue_count++;
+	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
+	    q->properties.type == KFD_QUEUE_TYPE_DIQ)
+		dqm->active_cp_queue_count++;
+
+	if (q->properties.is_gws) {
+		dqm->gws_queue_count++;
+		qpd->mapped_gws_queue = true;
+	}
+}
+
+static void decrement_queue_count(struct device_queue_manager *dqm,
+				  struct qcm_process_device *qpd,
+				  struct queue *q)
+{
+	dqm->active_queue_count--;
+	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
+	    q->properties.type == KFD_QUEUE_TYPE_DIQ)
+		dqm->active_cp_queue_count--;
+
+	if (q->properties.is_gws) {
+		dqm->gws_queue_count--;
+		qpd->mapped_gws_queue = false;
+	}
+}
+
+/*
+ * Allocate a doorbell ID to this queue.
+ * If doorbell_id is passed in, make sure requested ID is valid then allocate it.
+ */
+static int allocate_doorbell(struct qcm_process_device *qpd,
+			     struct queue *q,
+			     uint32_t const *restore_id)
+{
+	struct kfd_node *dev = qpd->dqm->dev;
+
+	if (!KFD_IS_SOC15(dev)) {
+		/* On pre-SOC15 chips we need to use the queue ID to
+		 * preserve the user mode ABI.
+		 */
+
+		if (restore_id && *restore_id != q->properties.queue_id)
+			return -EINVAL;
+
+		q->doorbell_id = q->properties.queue_id;
+	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
+			q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
+		/* For SDMA queues on SOC15 with 8-byte doorbell, use static
+		 * doorbell assignments based on the engine and queue id.
+		 * The doobell index distance between RLC (2*i) and (2*i+1)
+		 * for a SDMA engine is 512.
+		 */
+
+		uint32_t *idx_offset = dev->kfd->shared_resources.sdma_doorbell_idx;
+
+		/*
+		 * q->properties.sdma_engine_id corresponds to the virtual
+		 * sdma engine number. However, for doorbell allocation,
+		 * we need the physical sdma engine id in order to get the
+		 * correct doorbell offset.
+		 */
+		uint32_t valid_id = idx_offset[qpd->dqm->dev->node_id *
+					       get_num_all_sdma_engines(qpd->dqm) +
+					       q->properties.sdma_engine_id]
+						+ (q->properties.sdma_queue_id & 1)
+						* KFD_QUEUE_DOORBELL_MIRROR_OFFSET
+						+ (q->properties.sdma_queue_id >> 1);
+
+		if (restore_id && *restore_id != valid_id)
+			return -EINVAL;
+		q->doorbell_id = valid_id;
+	} else {
+		/* For CP queues on SOC15 */
+		if (restore_id) {
+			/* make sure that ID is free  */
+			if (__test_and_set_bit(*restore_id, qpd->doorbell_bitmap))
+				return -EINVAL;
+
+			q->doorbell_id = *restore_id;
+		} else {
+			/* or reserve a free doorbell ID */
+			unsigned int found;
+
+			found = find_first_zero_bit(qpd->doorbell_bitmap,
+						    KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
+			if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
+				pr_debug("No doorbells available");
+				return -EBUSY;
+			}
+			set_bit(found, qpd->doorbell_bitmap);
+			q->doorbell_id = found;
+		}
+	}
+
+	q->properties.doorbell_off = amdgpu_doorbell_index_on_bar(dev->adev,
+								  qpd->proc_doorbells,
+								  q->doorbell_id,
+								  dev->kfd->device_info.doorbell_size);
+	return 0;
+}
+
+static void deallocate_doorbell(struct qcm_process_device *qpd,
+				struct queue *q)
+{
+	unsigned int old;
+	struct kfd_node *dev = qpd->dqm->dev;
+
+	if (!KFD_IS_SOC15(dev) ||
+	    q->properties.type == KFD_QUEUE_TYPE_SDMA ||
+	    q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
+		return;
+
+	old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap);
+	WARN_ON(!old);
+}
+
+static void program_trap_handler_settings(struct device_queue_manager *dqm,
+				struct qcm_process_device *qpd)
+{
+	uint32_t xcc_mask = dqm->dev->xcc_mask;
+	int xcc_id;
+
+	if (dqm->dev->kfd2kgd->program_trap_handler_settings)
+		for_each_inst(xcc_id, xcc_mask)
+			dqm->dev->kfd2kgd->program_trap_handler_settings(
+				dqm->dev->adev, qpd->vmid, qpd->tba_addr,
+				qpd->tma_addr, xcc_id);
+}
+
+static int allocate_vmid(struct device_queue_manager *dqm,
+			struct qcm_process_device *qpd,
+			struct queue *q)
+{
+	int allocated_vmid = -1, i;
+
+	for (i = dqm->dev->vm_info.first_vmid_kfd;
+			i <= dqm->dev->vm_info.last_vmid_kfd; i++) {
+		if (!dqm->vmid_pasid[i]) {
+			allocated_vmid = i;
+			break;
+		}
+	}
+
+	if (allocated_vmid < 0) {
+		pr_err("no more vmid to allocate\n");
+		return -ENOSPC;
+	}
+
+	pr_debug("vmid allocated: %d\n", allocated_vmid);
+
+	dqm->vmid_pasid[allocated_vmid] = q->process->pasid;
+
+	set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid);
+
+	qpd->vmid = allocated_vmid;
+	q->properties.vmid = allocated_vmid;
+
+	program_sh_mem_settings(dqm, qpd);
+
+	if (KFD_IS_SOC15(dqm->dev) && dqm->dev->kfd->cwsr_enabled)
+		program_trap_handler_settings(dqm, qpd);
+
+	/* qpd->page_table_base is set earlier when register_process()
+	 * is called, i.e. when the first queue is created.
+	 */
+	dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->adev,
+			qpd->vmid,
+			qpd->page_table_base);
+	/* invalidate the VM context after pasid and vmid mapping is set up */
+	kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
+
+	if (dqm->dev->kfd2kgd->set_scratch_backing_va)
+		dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev,
+				qpd->sh_hidden_private_base, qpd->vmid);
+
+	return 0;
+}
+
+static int flush_texture_cache_nocpsch(struct kfd_node *kdev,
+				struct qcm_process_device *qpd)
+{
+	const struct packet_manager_funcs *pmf = qpd->dqm->packet_mgr.pmf;
+	int ret;
+
+	if (!qpd->ib_kaddr)
+		return -ENOMEM;
+
+	ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);
+	if (ret)
+		return ret;
+
+	return amdgpu_amdkfd_submit_ib(kdev->adev, KGD_ENGINE_MEC1, qpd->vmid,
+				qpd->ib_base, (uint32_t *)qpd->ib_kaddr,
+				pmf->release_mem_size / sizeof(uint32_t));
+}
+
+static void deallocate_vmid(struct device_queue_manager *dqm,
+				struct qcm_process_device *qpd,
+				struct queue *q)
+{
+	/* On GFX v7, CP doesn't flush TC at dequeue */
+	if (q->device->adev->asic_type == CHIP_HAWAII)
+		if (flush_texture_cache_nocpsch(q->device, qpd))
+			pr_err("Failed to flush TC\n");
+
+	kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
+
+	/* Release the vmid mapping */
+	set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
+	dqm->vmid_pasid[qpd->vmid] = 0;
+
+	qpd->vmid = 0;
+	q->properties.vmid = 0;
+}
+
+static int create_queue_nocpsch(struct device_queue_manager *dqm,
+				struct queue *q,
+				struct qcm_process_device *qpd,
+				const struct kfd_criu_queue_priv_data *qd,
+				const void *restore_mqd, const void *restore_ctl_stack)
+{
+	struct mqd_manager *mqd_mgr;
+	int retval;
+
+	dqm_lock(dqm);
+
+	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
+		pr_warn("Can't create new usermode queue because %d queues were already created\n",
+				dqm->total_queue_count);
+		retval = -EPERM;
+		goto out_unlock;
+	}
+
+	if (list_empty(&qpd->queues_list)) {
+		retval = allocate_vmid(dqm, qpd, q);
+		if (retval)
+			goto out_unlock;
+	}
+	q->properties.vmid = qpd->vmid;
+	/*
+	 * Eviction state logic: mark all queues as evicted, even ones
+	 * not currently active. Restoring inactive queues later only
+	 * updates the is_evicted flag but is a no-op otherwise.
+	 */
+	q->properties.is_evicted = !!qpd->evicted;
+
+	q->properties.tba_addr = qpd->tba_addr;
+	q->properties.tma_addr = qpd->tma_addr;
+
+	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
+			q->properties.type)];
+	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
+		retval = allocate_hqd(dqm, q);
+		if (retval)
+			goto deallocate_vmid;
+		pr_debug("Loading mqd to hqd on pipe %d, queue %d\n",
+			q->pipe, q->queue);
+	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
+		q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
+		retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL);
+		if (retval)
+			goto deallocate_vmid;
+		dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
+	}
+
+	retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL);
+	if (retval)
+		goto out_deallocate_hqd;
+
+	/* Temporarily release dqm lock to avoid a circular lock dependency */
+	dqm_unlock(dqm);
+	q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties);
+	dqm_lock(dqm);
+
+	if (!q->mqd_mem_obj) {
+		retval = -ENOMEM;
+		goto out_deallocate_doorbell;
+	}
+
+	if (qd)
+		mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr,
+				     &q->properties, restore_mqd, restore_ctl_stack,
+				     qd->ctl_stack_size);
+	else
+		mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
+					&q->gart_mqd_addr, &q->properties);
+
+	if (q->properties.is_active) {
+		if (!dqm->sched_running) {
+			WARN_ONCE(1, "Load non-HWS mqd while stopped\n");
+			goto add_queue_to_list;
+		}
+
+		if (WARN(q->process->mm != current->mm,
+					"should only run in user thread"))
+			retval = -EFAULT;
+		else
+			retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
+					q->queue, &q->properties, current->mm);
+		if (retval)
+			goto out_free_mqd;
+	}
+
+add_queue_to_list:
+	list_add(&q->list, &qpd->queues_list);
+	qpd->queue_count++;
+	if (q->properties.is_active)
+		increment_queue_count(dqm, qpd, q);
+
+	/*
+	 * Unconditionally increment this counter, regardless of the queue's
+	 * type or whether the queue is active.
+	 */
+	dqm->total_queue_count++;
+	pr_debug("Total of %d queues are accountable so far\n",
+			dqm->total_queue_count);
+	goto out_unlock;
+
+out_free_mqd:
+	mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
+out_deallocate_doorbell:
+	deallocate_doorbell(qpd, q);
+out_deallocate_hqd:
+	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
+		deallocate_hqd(dqm, q);
+	else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
+		q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
+		deallocate_sdma_queue(dqm, q);
+deallocate_vmid:
+	if (list_empty(&qpd->queues_list))
+		deallocate_vmid(dqm, qpd, q);
+out_unlock:
+	dqm_unlock(dqm);
+	return retval;
+}
+
+static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
+{
+	bool set;
+	int pipe, bit, i;
+
+	set = false;
+
+	for (pipe = dqm->next_pipe_to_allocate, i = 0;
+			i < get_pipes_per_mec(dqm);
+			pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) {
+
+		if (!is_pipe_enabled(dqm, 0, pipe))
+			continue;
+
+		if (dqm->allocated_queues[pipe] != 0) {
+			bit = ffs(dqm->allocated_queues[pipe]) - 1;
+			dqm->allocated_queues[pipe] &= ~(1 << bit);
+			q->pipe = pipe;
+			q->queue = bit;
+			set = true;
+			break;
+		}
+	}
+
+	if (!set)
+		return -EBUSY;
+
+	pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue);
+	/* horizontal hqd allocation */
+	dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm);
+
+	return 0;
+}
+
+static inline void deallocate_hqd(struct device_queue_manager *dqm,
+				struct queue *q)
+{
+	dqm->allocated_queues[q->pipe] |= (1 << q->queue);
+}
+
+#define SQ_IND_CMD_CMD_KILL		0x00000003
+#define SQ_IND_CMD_MODE_BROADCAST	0x00000001
+
+static int dbgdev_wave_reset_wavefronts(struct kfd_node *dev, struct kfd_process *p)
+{
+	int status = 0;
+	unsigned int vmid;
+	uint16_t queried_pasid;
+	union SQ_CMD_BITS reg_sq_cmd;
+	union GRBM_GFX_INDEX_BITS reg_gfx_index;
+	struct kfd_process_device *pdd;
+	int first_vmid_to_scan = dev->vm_info.first_vmid_kfd;
+	int last_vmid_to_scan = dev->vm_info.last_vmid_kfd;
+	uint32_t xcc_mask = dev->xcc_mask;
+	int xcc_id;
+
+	reg_sq_cmd.u32All = 0;
+	reg_gfx_index.u32All = 0;
+
+	pr_debug("Killing all process wavefronts\n");
+
+	if (!dev->kfd2kgd->get_atc_vmid_pasid_mapping_info) {
+		pr_err("no vmid pasid mapping supported \n");
+		return -EOPNOTSUPP;
+	}
+
+	/* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
+	 * ATC_VMID15_PASID_MAPPING
+	 * to check which VMID the current process is mapped to.
+	 */
+
+	for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
+		status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info
+				(dev->adev, vmid, &queried_pasid);
+
+		if (status && queried_pasid == p->pasid) {
+			pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n",
+					vmid, p->pasid);
+			break;
+		}
+	}
+
+	if (vmid > last_vmid_to_scan) {
+		pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid);
+		return -EFAULT;
+	}
+
+	/* taking the VMID for that process on the safe way using PDD */
+	pdd = kfd_get_process_device_data(dev, p);
+	if (!pdd)
+		return -EFAULT;
+
+	reg_gfx_index.bits.sh_broadcast_writes = 1;
+	reg_gfx_index.bits.se_broadcast_writes = 1;
+	reg_gfx_index.bits.instance_broadcast_writes = 1;
+	reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
+	reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
+	reg_sq_cmd.bits.vm_id = vmid;
+
+	for_each_inst(xcc_id, xcc_mask)
+		dev->kfd2kgd->wave_control_execute(
+			dev->adev, reg_gfx_index.u32All,
+			reg_sq_cmd.u32All, xcc_id);
+
+	return 0;
+}
+
+/* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked
+ * to avoid asynchronized access
+ */
+static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
+				struct qcm_process_device *qpd,
+				struct queue *q)
+{
+	int retval;
+	struct mqd_manager *mqd_mgr;
+
+	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
+			q->properties.type)];
+
+	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
+		deallocate_hqd(dqm, q);
+	else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
+		deallocate_sdma_queue(dqm, q);
+	else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
+		deallocate_sdma_queue(dqm, q);
+	else {
+		pr_debug("q->properties.type %d is invalid\n",
+				q->properties.type);
+		return -EINVAL;
+	}
+	dqm->total_queue_count--;
+
+	deallocate_doorbell(qpd, q);
+
+	if (!dqm->sched_running) {
+		WARN_ONCE(1, "Destroy non-HWS queue while stopped\n");
+		return 0;
+	}
+
+	retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
+				KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
+				KFD_UNMAP_LATENCY_MS,
+				q->pipe, q->queue);
+	if (retval == -ETIME)
+		qpd->reset_wavefronts = true;
+
+	list_del(&q->list);
+	if (list_empty(&qpd->queues_list)) {
+		if (qpd->reset_wavefronts) {
+			pr_warn("Resetting wave fronts (nocpsch) on dev %p\n",
+					dqm->dev);
+			/* dbgdev_wave_reset_wavefronts has to be called before
+			 * deallocate_vmid(), i.e. when vmid is still in use.
+			 */
+			dbgdev_wave_reset_wavefronts(dqm->dev,
+					qpd->pqm->process);
+			qpd->reset_wavefronts = false;
+		}
+
+		deallocate_vmid(dqm, qpd, q);
+	}
+	qpd->queue_count--;
+	if (q->properties.is_active)
+		decrement_queue_count(dqm, qpd, q);
+
+	return retval;
+}
+
+static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
+				struct qcm_process_device *qpd,
+				struct queue *q)
+{
+	int retval;
+	uint64_t sdma_val = 0;
+	struct kfd_process_device *pdd = qpd_to_pdd(qpd);
+	struct mqd_manager *mqd_mgr =
+		dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)];
+
+	/* Get the SDMA queue stats */
+	if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
+	    (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
+		retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr,
+							&sdma_val);
+		if (retval)
+			pr_err("Failed to read SDMA queue counter for queue: %d\n",
+				q->properties.queue_id);
+	}
+
+	dqm_lock(dqm);
+	retval = destroy_queue_nocpsch_locked(dqm, qpd, q);
+	if (!retval)
+		pdd->sdma_past_activity_counter += sdma_val;
+	dqm_unlock(dqm);
+
+	mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
+
+	return retval;
+}
+
+static int update_queue(struct device_queue_manager *dqm, struct queue *q,
+			struct mqd_update_info *minfo)
+{
+	int retval = 0;
+	struct mqd_manager *mqd_mgr;
+	struct kfd_process_device *pdd;
+	bool prev_active = false;
+
+	dqm_lock(dqm);
+	pdd = kfd_get_process_device_data(q->device, q->process);
+	if (!pdd) {
+		retval = -ENODEV;
+		goto out_unlock;
+	}
+	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
+			q->properties.type)];
+
+	/* Save previous activity state for counters */
+	prev_active = q->properties.is_active;
+
+	/* Make sure the queue is unmapped before updating the MQD */
+	if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
+		if (!dqm->dev->kfd->shared_resources.enable_mes)
+			retval = unmap_queues_cpsch(dqm,
+						    KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false);
+		else if (prev_active)
+			retval = remove_queue_mes(dqm, q, &pdd->qpd);
+
+		if (retval) {
+			pr_err("unmap queue failed\n");
+			goto out_unlock;
+		}
+	} else if (prev_active &&
+		   (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
+		    q->properties.type == KFD_QUEUE_TYPE_SDMA ||
+		    q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
+
+		if (!dqm->sched_running) {
+			WARN_ONCE(1, "Update non-HWS queue while stopped\n");
+			goto out_unlock;
+		}
+
+		retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
+				(dqm->dev->kfd->cwsr_enabled ?
+				 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE :
+				 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
+				KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
+		if (retval) {
+			pr_err("destroy mqd failed\n");
+			goto out_unlock;
+		}
+	}
+
+	mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties, minfo);
+
+	/*
+	 * check active state vs. the previous state and modify
+	 * counter accordingly. map_queues_cpsch uses the
+	 * dqm->active_queue_count to determine whether a new runlist must be
+	 * uploaded.
+	 */
+	if (q->properties.is_active && !prev_active) {
+		increment_queue_count(dqm, &pdd->qpd, q);
+	} else if (!q->properties.is_active && prev_active) {
+		decrement_queue_count(dqm, &pdd->qpd, q);
+	} else if (q->gws && !q->properties.is_gws) {
+		if (q->properties.is_active) {
+			dqm->gws_queue_count++;
+			pdd->qpd.mapped_gws_queue = true;
+		}
+		q->properties.is_gws = true;
+	} else if (!q->gws && q->properties.is_gws) {
+		if (q->properties.is_active) {
+			dqm->gws_queue_count--;
+			pdd->qpd.mapped_gws_queue = false;
+		}
+		q->properties.is_gws = false;
+	}
+
+	if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
+		if (!dqm->dev->kfd->shared_resources.enable_mes)
+			retval = map_queues_cpsch(dqm);
+		else if (q->properties.is_active)
+			retval = add_queue_mes(dqm, q, &pdd->qpd);
+	} else if (q->properties.is_active &&
+		 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
+		  q->properties.type == KFD_QUEUE_TYPE_SDMA ||
+		  q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
+		if (WARN(q->process->mm != current->mm,
+			 "should only run in user thread"))
+			retval = -EFAULT;
+		else
+			retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd,
+						   q->pipe, q->queue,
+						   &q->properties, current->mm);
+	}
+
+out_unlock:
+	dqm_unlock(dqm);
+	return retval;
+}
+
+/* suspend_single_queue does not lock the dqm like the
+ * evict_process_queues_cpsch or evict_process_queues_nocpsch. You should
+ * lock the dqm before calling, and unlock after calling.
+ *
+ * The reason we don't lock the dqm is because this function may be
+ * called on multiple queues in a loop, so rather than locking/unlocking
+ * multiple times, we will just keep the dqm locked for all of the calls.
+ */
+static int suspend_single_queue(struct device_queue_manager *dqm,
+				      struct kfd_process_device *pdd,
+				      struct queue *q)
+{
+	bool is_new;
+
+	if (q->properties.is_suspended)
+		return 0;
+
+	pr_debug("Suspending PASID %u queue [%i]\n",
+			pdd->process->pasid,
+			q->properties.queue_id);
+
+	is_new = q->properties.exception_status & KFD_EC_MASK(EC_QUEUE_NEW);
+
+	if (is_new || q->properties.is_being_destroyed) {
+		pr_debug("Suspend: skip %s queue id %i\n",
+				is_new ? "new" : "destroyed",
+				q->properties.queue_id);
+		return -EBUSY;
+	}
+
+	q->properties.is_suspended = true;
+	if (q->properties.is_active) {
+		if (dqm->dev->kfd->shared_resources.enable_mes) {
+			int r = remove_queue_mes(dqm, q, &pdd->qpd);
+
+			if (r)
+				return r;
+		}
+
+		decrement_queue_count(dqm, &pdd->qpd, q);
+		q->properties.is_active = false;
+	}
+
+	return 0;
+}
+
+/* resume_single_queue does not lock the dqm like the functions
+ * restore_process_queues_cpsch or restore_process_queues_nocpsch. You should
+ * lock the dqm before calling, and unlock after calling.
+ *
+ * The reason we don't lock the dqm is because this function may be
+ * called on multiple queues in a loop, so rather than locking/unlocking
+ * multiple times, we will just keep the dqm locked for all of the calls.
+ */
+static int resume_single_queue(struct device_queue_manager *dqm,
+				      struct qcm_process_device *qpd,
+				      struct queue *q)
+{
+	struct kfd_process_device *pdd;
+
+	if (!q->properties.is_suspended)
+		return 0;
+
+	pdd = qpd_to_pdd(qpd);
+
+	pr_debug("Restoring from suspend PASID %u queue [%i]\n",
+			    pdd->process->pasid,
+			    q->properties.queue_id);
+
+	q->properties.is_suspended = false;
+
+	if (QUEUE_IS_ACTIVE(q->properties)) {
+		if (dqm->dev->kfd->shared_resources.enable_mes) {
+			int r = add_queue_mes(dqm, q, &pdd->qpd);
+
+			if (r)
+				return r;
+		}
+
+		q->properties.is_active = true;
+		increment_queue_count(dqm, qpd, q);
+	}
+
+	return 0;
+}
+
+static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
+					struct qcm_process_device *qpd)
+{
+	struct queue *q;
+	struct mqd_manager *mqd_mgr;
+	struct kfd_process_device *pdd;
+	int retval, ret = 0;
+
+	dqm_lock(dqm);
+	if (qpd->evicted++ > 0) /* already evicted, do nothing */
+		goto out;
+
+	pdd = qpd_to_pdd(qpd);
+	pr_debug_ratelimited("Evicting PASID 0x%x queues\n",
+			    pdd->process->pasid);
+
+	pdd->last_evict_timestamp = get_jiffies_64();
+	/* Mark all queues as evicted. Deactivate all active queues on
+	 * the qpd.
+	 */
+	list_for_each_entry(q, &qpd->queues_list, list) {
+		q->properties.is_evicted = true;
+		if (!q->properties.is_active)
+			continue;
+
+		mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
+				q->properties.type)];
+		q->properties.is_active = false;
+		decrement_queue_count(dqm, qpd, q);
+
+		if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n"))
+			continue;
+
+		retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
+				(dqm->dev->kfd->cwsr_enabled ?
+				 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE :
+				 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
+				KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
+		if (retval && !ret)
+			/* Return the first error, but keep going to
+			 * maintain a consistent eviction state
+			 */
+			ret = retval;
+	}
+
+out:
+	dqm_unlock(dqm);
+	return ret;
+}
+
+static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
+				      struct qcm_process_device *qpd)
+{
+	struct queue *q;
+	struct kfd_process_device *pdd;
+	int retval = 0;
+
+	dqm_lock(dqm);
+	if (qpd->evicted++ > 0) /* already evicted, do nothing */
+		goto out;
+
+	pdd = qpd_to_pdd(qpd);
+
+	/* The debugger creates processes that temporarily have not acquired
+	 * all VMs for all devices and has no VMs itself.
+	 * Skip queue eviction on process eviction.
+	 */
+	if (!pdd->drm_priv)
+		goto out;
+
+	pr_debug_ratelimited("Evicting PASID 0x%x queues\n",
+			    pdd->process->pasid);
+
+	/* Mark all queues as evicted. Deactivate all active queues on
+	 * the qpd.
+	 */
+	list_for_each_entry(q, &qpd->queues_list, list) {
+		q->properties.is_evicted = true;
+		if (!q->properties.is_active)
+			continue;
+
+		q->properties.is_active = false;
+		decrement_queue_count(dqm, qpd, q);
+
+		if (dqm->dev->kfd->shared_resources.enable_mes) {
+			retval = remove_queue_mes(dqm, q, qpd);
+			if (retval) {
+				pr_err("Failed to evict queue %d\n",
+					q->properties.queue_id);
+				goto out;
+			}
+		}
+	}
+	pdd->last_evict_timestamp = get_jiffies_64();
+	if (!dqm->dev->kfd->shared_resources.enable_mes)
+		retval = execute_queues_cpsch(dqm,
+					      qpd->is_debug ?
+					      KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
+					      KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0,
+					      USE_DEFAULT_GRACE_PERIOD);
+
+out:
+	dqm_unlock(dqm);
+	return retval;
+}
+
+static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
+					  struct qcm_process_device *qpd)
+{
+	struct mm_struct *mm = NULL;
+	struct queue *q;
+	struct mqd_manager *mqd_mgr;
+	struct kfd_process_device *pdd;
+	uint64_t pd_base;
+	uint64_t eviction_duration;
+	int retval, ret = 0;
+
+	pdd = qpd_to_pdd(qpd);
+	/* Retrieve PD base */
+	pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
+
+	dqm_lock(dqm);
+	if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
+		goto out;
+	if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
+		qpd->evicted--;
+		goto out;
+	}
+
+	pr_debug_ratelimited("Restoring PASID 0x%x queues\n",
+			    pdd->process->pasid);
+
+	/* Update PD Base in QPD */
+	qpd->page_table_base = pd_base;
+	pr_debug("Updated PD address to 0x%llx\n", pd_base);
+
+	if (!list_empty(&qpd->queues_list)) {
+		dqm->dev->kfd2kgd->set_vm_context_page_table_base(
+				dqm->dev->adev,
+				qpd->vmid,
+				qpd->page_table_base);
+		kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
+	}
+
+	/* Take a safe reference to the mm_struct, which may otherwise
+	 * disappear even while the kfd_process is still referenced.
+	 */
+	mm = get_task_mm(pdd->process->lead_thread);
+	if (!mm) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	/* Remove the eviction flags. Activate queues that are not
+	 * inactive for other reasons.
+	 */
+	list_for_each_entry(q, &qpd->queues_list, list) {
+		q->properties.is_evicted = false;
+		if (!QUEUE_IS_ACTIVE(q->properties))
+			continue;
+
+		mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
+				q->properties.type)];
+		q->properties.is_active = true;
+		increment_queue_count(dqm, qpd, q);
+
+		if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n"))
+			continue;
+
+		retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
+				       q->queue, &q->properties, mm);
+		if (retval && !ret)
+			/* Return the first error, but keep going to
+			 * maintain a consistent eviction state
+			 */
+			ret = retval;
+	}
+	qpd->evicted = 0;
+	eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp;
+	atomic64_add(eviction_duration, &pdd->evict_duration_counter);
+out:
+	if (mm)
+		mmput(mm);
+	dqm_unlock(dqm);
+	return ret;
+}
+
+static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
+					struct qcm_process_device *qpd)
+{
+	struct queue *q;
+	struct kfd_process_device *pdd;
+	uint64_t eviction_duration;
+	int retval = 0;
+
+	pdd = qpd_to_pdd(qpd);
+
+	dqm_lock(dqm);
+	if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
+		goto out;
+	if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
+		qpd->evicted--;
+		goto out;
+	}
+
+	/* The debugger creates processes that temporarily have not acquired
+	 * all VMs for all devices and has no VMs itself.
+	 * Skip queue restore on process restore.
+	 */
+	if (!pdd->drm_priv)
+		goto vm_not_acquired;
+
+	pr_debug_ratelimited("Restoring PASID 0x%x queues\n",
+			    pdd->process->pasid);
+
+	/* Update PD Base in QPD */
+	qpd->page_table_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
+	pr_debug("Updated PD address to 0x%llx\n", qpd->page_table_base);
+
+	/* activate all active queues on the qpd */
+	list_for_each_entry(q, &qpd->queues_list, list) {
+		q->properties.is_evicted = false;
+		if (!QUEUE_IS_ACTIVE(q->properties))
+			continue;
+
+		q->properties.is_active = true;
+		increment_queue_count(dqm, &pdd->qpd, q);
+
+		if (dqm->dev->kfd->shared_resources.enable_mes) {
+			retval = add_queue_mes(dqm, q, qpd);
+			if (retval) {
+				pr_err("Failed to restore queue %d\n",
+					q->properties.queue_id);
+				goto out;
+			}
+		}
+	}
+	if (!dqm->dev->kfd->shared_resources.enable_mes)
+		retval = execute_queues_cpsch(dqm,
+					      KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD);
+	eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp;
+	atomic64_add(eviction_duration, &pdd->evict_duration_counter);
+vm_not_acquired:
+	qpd->evicted = 0;
+out:
+	dqm_unlock(dqm);
+	return retval;
+}
+
+static int register_process(struct device_queue_manager *dqm,
+					struct qcm_process_device *qpd)
+{
+	struct device_process_node *n;
+	struct kfd_process_device *pdd;
+	uint64_t pd_base;
+	int retval;
+
+	n = kzalloc(sizeof(*n), GFP_KERNEL);
+	if (!n)
+		return -ENOMEM;
+
+	n->qpd = qpd;
+
+	pdd = qpd_to_pdd(qpd);
+	/* Retrieve PD base */
+	pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
+
+	dqm_lock(dqm);
+	list_add(&n->list, &dqm->queues);
+
+	/* Update PD Base in QPD */
+	qpd->page_table_base = pd_base;
+	pr_debug("Updated PD address to 0x%llx\n", pd_base);
+
+	retval = dqm->asic_ops.update_qpd(dqm, qpd);
+
+	dqm->processes_count++;
+
+	dqm_unlock(dqm);
+
+	/* Outside the DQM lock because under the DQM lock we can't do
+	 * reclaim or take other locks that others hold while reclaiming.
+	 */
+	kfd_inc_compute_active(dqm->dev);
+
+	return retval;
+}
+
+static int unregister_process(struct device_queue_manager *dqm,
+					struct qcm_process_device *qpd)
+{
+	int retval;
+	struct device_process_node *cur, *next;
+
+	pr_debug("qpd->queues_list is %s\n",
+			list_empty(&qpd->queues_list) ? "empty" : "not empty");
+
+	retval = 0;
+	dqm_lock(dqm);
+
+	list_for_each_entry_safe(cur, next, &dqm->queues, list) {
+		if (qpd == cur->qpd) {
+			list_del(&cur->list);
+			kfree(cur);
+			dqm->processes_count--;
+			goto out;
+		}
+	}
+	/* qpd not found in dqm list */
+	retval = 1;
+out:
+	dqm_unlock(dqm);
+
+	/* Outside the DQM lock because under the DQM lock we can't do
+	 * reclaim or take other locks that others hold while reclaiming.
+	 */
+	if (!retval)
+		kfd_dec_compute_active(dqm->dev);
+
+	return retval;
+}
+
+static int
+set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid,
+			unsigned int vmid)
+{
+	uint32_t xcc_mask = dqm->dev->xcc_mask;
+	int xcc_id, ret;
+
+	for_each_inst(xcc_id, xcc_mask) {
+		ret = dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
+			dqm->dev->adev, pasid, vmid, xcc_id);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+static void init_interrupts(struct device_queue_manager *dqm)
+{
+	uint32_t xcc_mask = dqm->dev->xcc_mask;
+	unsigned int i, xcc_id;
+
+	for_each_inst(xcc_id, xcc_mask) {
+		for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) {
+			if (is_pipe_enabled(dqm, 0, i)) {
+				dqm->dev->kfd2kgd->init_interrupts(
+					dqm->dev->adev, i, xcc_id);
+			}
+		}
+	}
+}
+
+static int initialize_nocpsch(struct device_queue_manager *dqm)
+{
+	int pipe, queue;
+
+	pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
+
+	dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm),
+					sizeof(unsigned int), GFP_KERNEL);
+	if (!dqm->allocated_queues)
+		return -ENOMEM;
+
+	mutex_init(&dqm->lock_hidden);
+	INIT_LIST_HEAD(&dqm->queues);
+	dqm->active_queue_count = dqm->next_pipe_to_allocate = 0;
+	dqm->active_cp_queue_count = 0;
+	dqm->gws_queue_count = 0;
+
+	for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
+		int pipe_offset = pipe * get_queues_per_pipe(dqm);
+
+		for (queue = 0; queue < get_queues_per_pipe(dqm); queue++)
+			if (test_bit(pipe_offset + queue,
+				     dqm->dev->kfd->shared_resources.cp_queue_bitmap))
+				dqm->allocated_queues[pipe] |= 1 << queue;
+	}
+
+	memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid));
+
+	init_sdma_bitmaps(dqm);
+
+	return 0;
+}
+
+static void uninitialize(struct device_queue_manager *dqm)
+{
+	int i;
+
+	WARN_ON(dqm->active_queue_count > 0 || dqm->processes_count > 0);
+
+	kfree(dqm->allocated_queues);
+	for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
+		kfree(dqm->mqd_mgrs[i]);
+	mutex_destroy(&dqm->lock_hidden);
+}
+
+static int start_nocpsch(struct device_queue_manager *dqm)
+{
+	int r = 0;
+
+	pr_info("SW scheduler is used");
+	init_interrupts(dqm);
+
+	if (dqm->dev->adev->asic_type == CHIP_HAWAII)
+		r = pm_init(&dqm->packet_mgr, dqm);
+	if (!r)
+		dqm->sched_running = true;
+
+	return r;
+}
+
+static int stop_nocpsch(struct device_queue_manager *dqm)
+{
+	dqm_lock(dqm);
+	if (!dqm->sched_running) {
+		dqm_unlock(dqm);
+		return 0;
+	}
+
+	if (dqm->dev->adev->asic_type == CHIP_HAWAII)
+		pm_uninit(&dqm->packet_mgr, false);
+	dqm->sched_running = false;
+	dqm_unlock(dqm);
+
+	return 0;
+}
+
+static void pre_reset(struct device_queue_manager *dqm)
+{
+	dqm_lock(dqm);
+	dqm->is_resetting = true;
+	dqm_unlock(dqm);
+}
+
+static int allocate_sdma_queue(struct device_queue_manager *dqm,
+				struct queue *q, const uint32_t *restore_sdma_id)
+{
+	int bit;
+
+	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
+		if (bitmap_empty(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES)) {
+			pr_err("No more SDMA queue to allocate\n");
+			return -ENOMEM;
+		}
+
+		if (restore_sdma_id) {
+			/* Re-use existing sdma_id */
+			if (!test_bit(*restore_sdma_id, dqm->sdma_bitmap)) {
+				pr_err("SDMA queue already in use\n");
+				return -EBUSY;
+			}
+			clear_bit(*restore_sdma_id, dqm->sdma_bitmap);
+			q->sdma_id = *restore_sdma_id;
+		} else {
+			/* Find first available sdma_id */
+			bit = find_first_bit(dqm->sdma_bitmap,
+					     get_num_sdma_queues(dqm));
+			clear_bit(bit, dqm->sdma_bitmap);
+			q->sdma_id = bit;
+		}
+
+		q->properties.sdma_engine_id =
+			q->sdma_id % kfd_get_num_sdma_engines(dqm->dev);
+		q->properties.sdma_queue_id = q->sdma_id /
+				kfd_get_num_sdma_engines(dqm->dev);
+	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
+		if (bitmap_empty(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES)) {
+			pr_err("No more XGMI SDMA queue to allocate\n");
+			return -ENOMEM;
+		}
+		if (restore_sdma_id) {
+			/* Re-use existing sdma_id */
+			if (!test_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap)) {
+				pr_err("SDMA queue already in use\n");
+				return -EBUSY;
+			}
+			clear_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap);
+			q->sdma_id = *restore_sdma_id;
+		} else {
+			bit = find_first_bit(dqm->xgmi_sdma_bitmap,
+					     get_num_xgmi_sdma_queues(dqm));
+			clear_bit(bit, dqm->xgmi_sdma_bitmap);
+			q->sdma_id = bit;
+		}
+		/* sdma_engine_id is sdma id including
+		 * both PCIe-optimized SDMAs and XGMI-
+		 * optimized SDMAs. The calculation below
+		 * assumes the first N engines are always
+		 * PCIe-optimized ones
+		 */
+		q->properties.sdma_engine_id =
+			kfd_get_num_sdma_engines(dqm->dev) +
+			q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev);
+		q->properties.sdma_queue_id = q->sdma_id /
+			kfd_get_num_xgmi_sdma_engines(dqm->dev);
+	}
+
+	pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
+	pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
+
+	return 0;
+}
+
+static void deallocate_sdma_queue(struct device_queue_manager *dqm,
+				struct queue *q)
+{
+	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
+		if (q->sdma_id >= get_num_sdma_queues(dqm))
+			return;
+		set_bit(q->sdma_id, dqm->sdma_bitmap);
+	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
+		if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm))
+			return;
+		set_bit(q->sdma_id, dqm->xgmi_sdma_bitmap);
+	}
+}
+
+/*
+ * Device Queue Manager implementation for cp scheduler
+ */
+
+static int set_sched_resources(struct device_queue_manager *dqm)
+{
+	int i, mec;
+	struct scheduling_resources res;
+
+	res.vmid_mask = dqm->dev->compute_vmid_bitmap;
+
+	res.queue_mask = 0;
+	for (i = 0; i < KGD_MAX_QUEUES; ++i) {
+		mec = (i / dqm->dev->kfd->shared_resources.num_queue_per_pipe)
+			/ dqm->dev->kfd->shared_resources.num_pipe_per_mec;
+
+		if (!test_bit(i, dqm->dev->kfd->shared_resources.cp_queue_bitmap))
+			continue;
+
+		/* only acquire queues from the first MEC */
+		if (mec > 0)
+			continue;
+
+		/* This situation may be hit in the future if a new HW
+		 * generation exposes more than 64 queues. If so, the
+		 * definition of res.queue_mask needs updating
+		 */
+		if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) {
+			pr_err("Invalid queue enabled by amdgpu: %d\n", i);
+			break;
+		}
+
+		res.queue_mask |= 1ull
+			<< amdgpu_queue_mask_bit_to_set_resource_bit(
+				dqm->dev->adev, i);
+	}
+	res.gws_mask = ~0ull;
+	res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0;
+
+	pr_debug("Scheduling resources:\n"
+			"vmid mask: 0x%8X\n"
+			"queue mask: 0x%8llX\n",
+			res.vmid_mask, res.queue_mask);
+
+	return pm_send_set_resources(&dqm->packet_mgr, &res);
+}
+
+static int initialize_cpsch(struct device_queue_manager *dqm)
+{
+	pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
+
+	mutex_init(&dqm->lock_hidden);
+	INIT_LIST_HEAD(&dqm->queues);
+	dqm->active_queue_count = dqm->processes_count = 0;
+	dqm->active_cp_queue_count = 0;
+	dqm->gws_queue_count = 0;
+	dqm->active_runlist = false;
+	INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
+	dqm->trap_debug_vmid = 0;
+
+	init_sdma_bitmaps(dqm);
+
+	if (dqm->dev->kfd2kgd->get_iq_wait_times)
+		dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev,
+					&dqm->wait_times,
+					ffs(dqm->dev->xcc_mask) - 1);
+	return 0;
+}
+
+static int start_cpsch(struct device_queue_manager *dqm)
+{
+	int retval;
+
+	retval = 0;
+
+	dqm_lock(dqm);
+
+	if (!dqm->dev->kfd->shared_resources.enable_mes) {
+		retval = pm_init(&dqm->packet_mgr, dqm);
+		if (retval)
+			goto fail_packet_manager_init;
+
+		retval = set_sched_resources(dqm);
+		if (retval)
+			goto fail_set_sched_resources;
+	}
+	pr_debug("Allocating fence memory\n");
+
+	/* allocate fence memory on the gart */
+	retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr),
+					&dqm->fence_mem);
+
+	if (retval)
+		goto fail_allocate_vidmem;
+
+	dqm->fence_addr = (uint64_t *)dqm->fence_mem->cpu_ptr;
+	dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
+
+	init_interrupts(dqm);
+
+	/* clear hang status when driver try to start the hw scheduler */
+	dqm->is_hws_hang = false;
+	dqm->is_resetting = false;
+	dqm->sched_running = true;
+
+	if (!dqm->dev->kfd->shared_resources.enable_mes)
+		execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD);
+
+	/* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */
+	if (amdgpu_emu_mode == 0 && dqm->dev->adev->gmc.is_app_apu &&
+	    (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3))) {
+		uint32_t reg_offset = 0;
+		uint32_t grace_period = 1;
+
+		retval = pm_update_grace_period(&dqm->packet_mgr,
+						grace_period);
+		if (retval)
+			pr_err("Setting grace timeout failed\n");
+		else if (dqm->dev->kfd2kgd->build_grace_period_packet_info)
+			/* Update dqm->wait_times maintained in software */
+			dqm->dev->kfd2kgd->build_grace_period_packet_info(
+					dqm->dev->adev,	dqm->wait_times,
+					grace_period, &reg_offset,
+					&dqm->wait_times);
+	}
+
+	dqm_unlock(dqm);
+
+	return 0;
+fail_allocate_vidmem:
+fail_set_sched_resources:
+	if (!dqm->dev->kfd->shared_resources.enable_mes)
+		pm_uninit(&dqm->packet_mgr, false);
+fail_packet_manager_init:
+	dqm_unlock(dqm);
+	return retval;
+}
+
+static int stop_cpsch(struct device_queue_manager *dqm)
+{
+	bool hanging;
+
+	dqm_lock(dqm);
+	if (!dqm->sched_running) {
+		dqm_unlock(dqm);
+		return 0;
+	}
+
+	if (!dqm->is_hws_hang) {
+		if (!dqm->dev->kfd->shared_resources.enable_mes)
+			unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false);
+		else
+			remove_all_queues_mes(dqm);
+	}
+
+	hanging = dqm->is_hws_hang || dqm->is_resetting;
+	dqm->sched_running = false;
+
+	if (!dqm->dev->kfd->shared_resources.enable_mes)
+		pm_release_ib(&dqm->packet_mgr);
+
+	kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
+	if (!dqm->dev->kfd->shared_resources.enable_mes)
+		pm_uninit(&dqm->packet_mgr, hanging);
+	dqm_unlock(dqm);
+
+	return 0;
+}
+
+static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
+					struct kernel_queue *kq,
+					struct qcm_process_device *qpd)
+{
+	dqm_lock(dqm);
+	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
+		pr_warn("Can't create new kernel queue because %d queues were already created\n",
+				dqm->total_queue_count);
+		dqm_unlock(dqm);
+		return -EPERM;
+	}
+
+	/*
+	 * Unconditionally increment this counter, regardless of the queue's
+	 * type or whether the queue is active.
+	 */
+	dqm->total_queue_count++;
+	pr_debug("Total of %d queues are accountable so far\n",
+			dqm->total_queue_count);
+
+	list_add(&kq->list, &qpd->priv_queue_list);
+	increment_queue_count(dqm, qpd, kq->queue);
+	qpd->is_debug = true;
+	execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0,
+			USE_DEFAULT_GRACE_PERIOD);
+	dqm_unlock(dqm);
+
+	return 0;
+}
+
+static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
+					struct kernel_queue *kq,
+					struct qcm_process_device *qpd)
+{
+	dqm_lock(dqm);
+	list_del(&kq->list);
+	decrement_queue_count(dqm, qpd, kq->queue);
+	qpd->is_debug = false;
+	execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0,
+			USE_DEFAULT_GRACE_PERIOD);
+	/*
+	 * Unconditionally decrement this counter, regardless of the queue's
+	 * type.
+	 */
+	dqm->total_queue_count--;
+	pr_debug("Total of %d queues are accountable so far\n",
+			dqm->total_queue_count);
+	dqm_unlock(dqm);
+}
+
+static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
+			struct qcm_process_device *qpd,
+			const struct kfd_criu_queue_priv_data *qd,
+			const void *restore_mqd, const void *restore_ctl_stack)
+{
+	int retval;
+	struct mqd_manager *mqd_mgr;
+
+	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
+		pr_warn("Can't create new usermode queue because %d queues were already created\n",
+				dqm->total_queue_count);
+		retval = -EPERM;
+		goto out;
+	}
+
+	if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
+		q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
+		dqm_lock(dqm);
+		retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL);
+		dqm_unlock(dqm);
+		if (retval)
+			goto out;
+	}
+
+	retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL);
+	if (retval)
+		goto out_deallocate_sdma_queue;
+
+	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
+			q->properties.type)];
+
+	if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
+		q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
+		dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
+	q->properties.tba_addr = qpd->tba_addr;
+	q->properties.tma_addr = qpd->tma_addr;
+	q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties);
+	if (!q->mqd_mem_obj) {
+		retval = -ENOMEM;
+		goto out_deallocate_doorbell;
+	}
+
+	dqm_lock(dqm);
+	/*
+	 * Eviction state logic: mark all queues as evicted, even ones
+	 * not currently active. Restoring inactive queues later only
+	 * updates the is_evicted flag but is a no-op otherwise.
+	 */
+	q->properties.is_evicted = !!qpd->evicted;
+	q->properties.is_dbg_wa = qpd->pqm->process->debug_trap_enabled &&
+				  kfd_dbg_has_cwsr_workaround(q->device);
+
+	if (qd)
+		mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr,
+				     &q->properties, restore_mqd, restore_ctl_stack,
+				     qd->ctl_stack_size);
+	else
+		mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
+					&q->gart_mqd_addr, &q->properties);
+
+	list_add(&q->list, &qpd->queues_list);
+	qpd->queue_count++;
+
+	if (q->properties.is_active) {
+		increment_queue_count(dqm, qpd, q);
+
+		if (!dqm->dev->kfd->shared_resources.enable_mes)
+			retval = execute_queues_cpsch(dqm,
+					KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD);
+		else
+			retval = add_queue_mes(dqm, q, qpd);
+		if (retval)
+			goto cleanup_queue;
+	}
+
+	/*
+	 * Unconditionally increment this counter, regardless of the queue's
+	 * type or whether the queue is active.
+	 */
+	dqm->total_queue_count++;
+
+	pr_debug("Total of %d queues are accountable so far\n",
+			dqm->total_queue_count);
+
+	dqm_unlock(dqm);
+	return retval;
+
+cleanup_queue:
+	qpd->queue_count--;
+	list_del(&q->list);
+	if (q->properties.is_active)
+		decrement_queue_count(dqm, qpd, q);
+	mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
+	dqm_unlock(dqm);
+out_deallocate_doorbell:
+	deallocate_doorbell(qpd, q);
+out_deallocate_sdma_queue:
+	if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
+		q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
+		dqm_lock(dqm);
+		deallocate_sdma_queue(dqm, q);
+		dqm_unlock(dqm);
+	}
+out:
+	return retval;
+}
+
+int amdkfd_fence_wait_timeout(uint64_t *fence_addr,
+				uint64_t fence_value,
+				unsigned int timeout_ms)
+{
+	unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies;
+
+	while (*fence_addr != fence_value) {
+		if (time_after(jiffies, end_jiffies)) {
+			pr_err("qcm fence wait loop timeout expired\n");
+			/* In HWS case, this is used to halt the driver thread
+			 * in order not to mess up CP states before doing
+			 * scandumps for FW debugging.
+			 */
+			while (halt_if_hws_hang)
+				schedule();
+
+			return -ETIME;
+		}
+		schedule();
+	}
+
+	return 0;
+}
+
+/* dqm->lock mutex has to be locked before calling this function */
+static int map_queues_cpsch(struct device_queue_manager *dqm)
+{
+	int retval;
+
+	if (!dqm->sched_running)
+		return 0;
+	if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0)
+		return 0;
+	if (dqm->active_runlist)
+		return 0;
+
+	retval = pm_send_runlist(&dqm->packet_mgr, &dqm->queues);
+	pr_debug("%s sent runlist\n", __func__);
+	if (retval) {
+		pr_err("failed to execute runlist\n");
+		return retval;
+	}
+	dqm->active_runlist = true;
+
+	return retval;
+}
+
+/* dqm->lock mutex has to be locked before calling this function */
+static int unmap_queues_cpsch(struct device_queue_manager *dqm,
+				enum kfd_unmap_queues_filter filter,
+				uint32_t filter_param,
+				uint32_t grace_period,
+				bool reset)
+{
+	int retval = 0;
+	struct mqd_manager *mqd_mgr;
+
+	if (!dqm->sched_running)
+		return 0;
+	if (dqm->is_hws_hang || dqm->is_resetting)
+		return -EIO;
+	if (!dqm->active_runlist)
+		return retval;
+
+	if (grace_period != USE_DEFAULT_GRACE_PERIOD) {
+		retval = pm_update_grace_period(&dqm->packet_mgr, grace_period);
+		if (retval)
+			return retval;
+	}
+
+	retval = pm_send_unmap_queue(&dqm->packet_mgr, filter, filter_param, reset);
+	if (retval)
+		return retval;
+
+	*dqm->fence_addr = KFD_FENCE_INIT;
+	pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr,
+				KFD_FENCE_COMPLETED);
+	/* should be timed out */
+	retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
+				queue_preemption_timeout_ms);
+	if (retval) {
+		pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
+		kfd_hws_hang(dqm);
+		return retval;
+	}
+
+	/* In the current MEC firmware implementation, if compute queue
+	 * doesn't response to the preemption request in time, HIQ will
+	 * abandon the unmap request without returning any timeout error
+	 * to driver. Instead, MEC firmware will log the doorbell of the
+	 * unresponding compute queue to HIQ.MQD.queue_doorbell_id fields.
+	 * To make sure the queue unmap was successful, driver need to
+	 * check those fields
+	 */
+	mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
+	if (mqd_mgr->read_doorbell_id(dqm->packet_mgr.priv_queue->queue->mqd)) {
+		pr_err("HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n");
+		while (halt_if_hws_hang)
+			schedule();
+		return -ETIME;
+	}
+
+	/* We need to reset the grace period value for this device */
+	if (grace_period != USE_DEFAULT_GRACE_PERIOD) {
+		if (pm_update_grace_period(&dqm->packet_mgr,
+					USE_DEFAULT_GRACE_PERIOD))
+			pr_err("Failed to reset grace period\n");
+	}
+
+	pm_release_ib(&dqm->packet_mgr);
+	dqm->active_runlist = false;
+
+	return retval;
+}
+
+/* only for compute queue */
+static int reset_queues_cpsch(struct device_queue_manager *dqm,
+			uint16_t pasid)
+{
+	int retval;
+
+	dqm_lock(dqm);
+
+	retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID,
+			pasid, USE_DEFAULT_GRACE_PERIOD, true);
+
+	dqm_unlock(dqm);
+	return retval;
+}
+
+/* dqm->lock mutex has to be locked before calling this function */
+static int execute_queues_cpsch(struct device_queue_manager *dqm,
+				enum kfd_unmap_queues_filter filter,
+				uint32_t filter_param,
+				uint32_t grace_period)
+{
+	int retval;
+
+	if (dqm->is_hws_hang)
+		return -EIO;
+	retval = unmap_queues_cpsch(dqm, filter, filter_param, grace_period, false);
+	if (retval)
+		return retval;
+
+	return map_queues_cpsch(dqm);
+}
+
+static int wait_on_destroy_queue(struct device_queue_manager *dqm,
+				 struct queue *q)
+{
+	struct kfd_process_device *pdd = kfd_get_process_device_data(q->device,
+								q->process);
+	int ret = 0;
+
+	if (pdd->qpd.is_debug)
+		return ret;
+
+	q->properties.is_being_destroyed = true;
+
+	if (pdd->process->debug_trap_enabled && q->properties.is_suspended) {
+		dqm_unlock(dqm);
+		mutex_unlock(&q->process->mutex);
+		ret = wait_event_interruptible(dqm->destroy_wait,
+						!q->properties.is_suspended);
+
+		mutex_lock(&q->process->mutex);
+		dqm_lock(dqm);
+	}
+
+	return ret;
+}
+
+static int destroy_queue_cpsch(struct device_queue_manager *dqm,
+				struct qcm_process_device *qpd,
+				struct queue *q)
+{
+	int retval;
+	struct mqd_manager *mqd_mgr;
+	uint64_t sdma_val = 0;
+	struct kfd_process_device *pdd = qpd_to_pdd(qpd);
+
+	/* Get the SDMA queue stats */
+	if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
+	    (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
+		retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr,
+							&sdma_val);
+		if (retval)
+			pr_err("Failed to read SDMA queue counter for queue: %d\n",
+				q->properties.queue_id);
+	}
+
+	/* remove queue from list to prevent rescheduling after preemption */
+	dqm_lock(dqm);
+
+	retval = wait_on_destroy_queue(dqm, q);
+
+	if (retval) {
+		dqm_unlock(dqm);
+		return retval;
+	}
+
+	if (qpd->is_debug) {
+		/*
+		 * error, currently we do not allow to destroy a queue
+		 * of a currently debugged process
+		 */
+		retval = -EBUSY;
+		goto failed_try_destroy_debugged_queue;
+
+	}
+
+	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
+			q->properties.type)];
+
+	deallocate_doorbell(qpd, q);
+
+	if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
+	    (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
+		deallocate_sdma_queue(dqm, q);
+		pdd->sdma_past_activity_counter += sdma_val;
+	}
+
+	list_del(&q->list);
+	qpd->queue_count--;
+	if (q->properties.is_active) {
+		decrement_queue_count(dqm, qpd, q);
+		if (!dqm->dev->kfd->shared_resources.enable_mes) {
+			retval = execute_queues_cpsch(dqm,
+						      KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0,
+						      USE_DEFAULT_GRACE_PERIOD);
+			if (retval == -ETIME)
+				qpd->reset_wavefronts = true;
+		} else {
+			retval = remove_queue_mes(dqm, q, qpd);
+		}
+	}
+
+	/*
+	 * Unconditionally decrement this counter, regardless of the queue's
+	 * type
+	 */
+	dqm->total_queue_count--;
+	pr_debug("Total of %d queues are accountable so far\n",
+			dqm->total_queue_count);
+
+	dqm_unlock(dqm);
+
+	/*
+	 * Do free_mqd and raise delete event after dqm_unlock(dqm) to avoid
+	 * circular locking
+	 */
+	kfd_dbg_ev_raise(KFD_EC_MASK(EC_DEVICE_QUEUE_DELETE),
+				qpd->pqm->process, q->device,
+				-1, false, NULL, 0);
+
+	mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
+
+	return retval;
+
+failed_try_destroy_debugged_queue:
+
+	dqm_unlock(dqm);
+	return retval;
+}
+
+/*
+ * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
+ * stay in user mode.
+ */
+#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
+/* APE1 limit is inclusive and 64K aligned. */
+#define APE1_LIMIT_ALIGNMENT 0xFFFF
+
+static bool set_cache_memory_policy(struct device_queue_manager *dqm,
+				   struct qcm_process_device *qpd,
+				   enum cache_policy default_policy,
+				   enum cache_policy alternate_policy,
+				   void __user *alternate_aperture_base,
+				   uint64_t alternate_aperture_size)
+{
+	bool retval = true;
+
+	if (!dqm->asic_ops.set_cache_memory_policy)
+		return retval;
+
+	dqm_lock(dqm);
+
+	if (alternate_aperture_size == 0) {
+		/* base > limit disables APE1 */
+		qpd->sh_mem_ape1_base = 1;
+		qpd->sh_mem_ape1_limit = 0;
+	} else {
+		/*
+		 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
+		 *			SH_MEM_APE1_BASE[31:0], 0x0000 }
+		 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
+		 *			SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
+		 * Verify that the base and size parameters can be
+		 * represented in this format and convert them.
+		 * Additionally restrict APE1 to user-mode addresses.
+		 */
+
+		uint64_t base = (uintptr_t)alternate_aperture_base;
+		uint64_t limit = base + alternate_aperture_size - 1;
+
+		if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
+		   (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
+			retval = false;
+			goto out;
+		}
+
+		qpd->sh_mem_ape1_base = base >> 16;
+		qpd->sh_mem_ape1_limit = limit >> 16;
+	}
+
+	retval = dqm->asic_ops.set_cache_memory_policy(
+			dqm,
+			qpd,
+			default_policy,
+			alternate_policy,
+			alternate_aperture_base,
+			alternate_aperture_size);
+
+	if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
+		program_sh_mem_settings(dqm, qpd);
+
+	pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
+		qpd->sh_mem_config, qpd->sh_mem_ape1_base,
+		qpd->sh_mem_ape1_limit);
+
+out:
+	dqm_unlock(dqm);
+	return retval;
+}
+
+static int process_termination_nocpsch(struct device_queue_manager *dqm,
+		struct qcm_process_device *qpd)
+{
+	struct queue *q;
+	struct device_process_node *cur, *next_dpn;
+	int retval = 0;
+	bool found = false;
+
+	dqm_lock(dqm);
+
+	/* Clear all user mode queues */
+	while (!list_empty(&qpd->queues_list)) {
+		struct mqd_manager *mqd_mgr;
+		int ret;
+
+		q = list_first_entry(&qpd->queues_list, struct queue, list);
+		mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
+				q->properties.type)];
+		ret = destroy_queue_nocpsch_locked(dqm, qpd, q);
+		if (ret)
+			retval = ret;
+		dqm_unlock(dqm);
+		mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
+		dqm_lock(dqm);
+	}
+
+	/* Unregister process */
+	list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
+		if (qpd == cur->qpd) {
+			list_del(&cur->list);
+			kfree(cur);
+			dqm->processes_count--;
+			found = true;
+			break;
+		}
+	}
+
+	dqm_unlock(dqm);
+
+	/* Outside the DQM lock because under the DQM lock we can't do
+	 * reclaim or take other locks that others hold while reclaiming.
+	 */
+	if (found)
+		kfd_dec_compute_active(dqm->dev);
+
+	return retval;
+}
+
+static int get_wave_state(struct device_queue_manager *dqm,
+			  struct queue *q,
+			  void __user *ctl_stack,
+			  u32 *ctl_stack_used_size,
+			  u32 *save_area_used_size)
+{
+	struct mqd_manager *mqd_mgr;
+
+	dqm_lock(dqm);
+
+	mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP];
+
+	if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE ||
+	    q->properties.is_active || !q->device->kfd->cwsr_enabled ||
+	    !mqd_mgr->get_wave_state) {
+		dqm_unlock(dqm);
+		return -EINVAL;
+	}
+
+	dqm_unlock(dqm);
+
+	/*
+	 * get_wave_state is outside the dqm lock to prevent circular locking
+	 * and the queue should be protected against destruction by the process
+	 * lock.
+	 */
+	return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, &q->properties,
+			ctl_stack, ctl_stack_used_size, save_area_used_size);
+}
+
+static void get_queue_checkpoint_info(struct device_queue_manager *dqm,
+			const struct queue *q,
+			u32 *mqd_size,
+			u32 *ctl_stack_size)
+{
+	struct mqd_manager *mqd_mgr;
+	enum KFD_MQD_TYPE mqd_type =
+			get_mqd_type_from_queue_type(q->properties.type);
+
+	dqm_lock(dqm);
+	mqd_mgr = dqm->mqd_mgrs[mqd_type];
+	*mqd_size = mqd_mgr->mqd_size;
+	*ctl_stack_size = 0;
+
+	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE && mqd_mgr->get_checkpoint_info)
+		mqd_mgr->get_checkpoint_info(mqd_mgr, q->mqd, ctl_stack_size);
+
+	dqm_unlock(dqm);
+}
+
+static int checkpoint_mqd(struct device_queue_manager *dqm,
+			  const struct queue *q,
+			  void *mqd,
+			  void *ctl_stack)
+{
+	struct mqd_manager *mqd_mgr;
+	int r = 0;
+	enum KFD_MQD_TYPE mqd_type =
+			get_mqd_type_from_queue_type(q->properties.type);
+
+	dqm_lock(dqm);
+
+	if (q->properties.is_active || !q->device->kfd->cwsr_enabled) {
+		r = -EINVAL;
+		goto dqm_unlock;
+	}
+
+	mqd_mgr = dqm->mqd_mgrs[mqd_type];
+	if (!mqd_mgr->checkpoint_mqd) {
+		r = -EOPNOTSUPP;
+		goto dqm_unlock;
+	}
+
+	mqd_mgr->checkpoint_mqd(mqd_mgr, q->mqd, mqd, ctl_stack);
+
+dqm_unlock:
+	dqm_unlock(dqm);
+	return r;
+}
+
+static int process_termination_cpsch(struct device_queue_manager *dqm,
+		struct qcm_process_device *qpd)
+{
+	int retval;
+	struct queue *q;
+	struct kernel_queue *kq, *kq_next;
+	struct mqd_manager *mqd_mgr;
+	struct device_process_node *cur, *next_dpn;
+	enum kfd_unmap_queues_filter filter =
+		KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;
+	bool found = false;
+
+	retval = 0;
+
+	dqm_lock(dqm);
+
+	/* Clean all kernel queues */
+	list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) {
+		list_del(&kq->list);
+		decrement_queue_count(dqm, qpd, kq->queue);
+		qpd->is_debug = false;
+		dqm->total_queue_count--;
+		filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES;
+	}
+
+	/* Clear all user mode queues */
+	list_for_each_entry(q, &qpd->queues_list, list) {
+		if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
+			deallocate_sdma_queue(dqm, q);
+		else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
+			deallocate_sdma_queue(dqm, q);
+
+		if (q->properties.is_active) {
+			decrement_queue_count(dqm, qpd, q);
+
+			if (dqm->dev->kfd->shared_resources.enable_mes) {
+				retval = remove_queue_mes(dqm, q, qpd);
+				if (retval)
+					pr_err("Failed to remove queue %d\n",
+						q->properties.queue_id);
+			}
+		}
+
+		dqm->total_queue_count--;
+	}
+
+	/* Unregister process */
+	list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
+		if (qpd == cur->qpd) {
+			list_del(&cur->list);
+			kfree(cur);
+			dqm->processes_count--;
+			found = true;
+			break;
+		}
+	}
+
+	if (!dqm->dev->kfd->shared_resources.enable_mes)
+		retval = execute_queues_cpsch(dqm, filter, 0, USE_DEFAULT_GRACE_PERIOD);
+
+	if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) {
+		pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev);
+		dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process);
+		qpd->reset_wavefronts = false;
+	}
+
+	/* Lastly, free mqd resources.
+	 * Do free_mqd() after dqm_unlock to avoid circular locking.
+	 */
+	while (!list_empty(&qpd->queues_list)) {
+		q = list_first_entry(&qpd->queues_list, struct queue, list);
+		mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
+				q->properties.type)];
+		list_del(&q->list);
+		qpd->queue_count--;
+		dqm_unlock(dqm);
+		mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
+		dqm_lock(dqm);
+	}
+	dqm_unlock(dqm);
+
+	/* Outside the DQM lock because under the DQM lock we can't do
+	 * reclaim or take other locks that others hold while reclaiming.
+	 */
+	if (found)
+		kfd_dec_compute_active(dqm->dev);
+
+	return retval;
+}
+
+static int init_mqd_managers(struct device_queue_manager *dqm)
+{
+	int i, j;
+	struct mqd_manager *mqd_mgr;
+
+	for (i = 0; i < KFD_MQD_TYPE_MAX; i++) {
+		mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev);
+		if (!mqd_mgr) {
+			pr_err("mqd manager [%d] initialization failed\n", i);
+			goto out_free;
+		}
+		dqm->mqd_mgrs[i] = mqd_mgr;
+	}
+
+	return 0;
+
+out_free:
+	for (j = 0; j < i; j++) {
+		kfree(dqm->mqd_mgrs[j]);
+		dqm->mqd_mgrs[j] = NULL;
+	}
+
+	return -ENOMEM;
+}
+
+/* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/
+static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
+{
+	int retval;
+	struct kfd_node *dev = dqm->dev;
+	struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;
+	uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
+		get_num_all_sdma_engines(dqm) *
+		dev->kfd->device_info.num_sdma_queues_per_engine +
+		(dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size *
+		NUM_XCC(dqm->dev->xcc_mask));
+
+	retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size,
+		&(mem_obj->gtt_mem), &(mem_obj->gpu_addr),
+		(void *)&(mem_obj->cpu_ptr), false);
+
+	return retval;
+}
+
+struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev)
+{
+	struct device_queue_manager *dqm;
+
+	pr_debug("Loading device queue manager\n");
+
+	dqm = kzalloc(sizeof(*dqm), GFP_KERNEL);
+	if (!dqm)
+		return NULL;
+
+	switch (dev->adev->asic_type) {
+	/* HWS is not available on Hawaii. */
+	case CHIP_HAWAII:
+	/* HWS depends on CWSR for timely dequeue. CWSR is not
+	 * available on Tonga.
+	 *
+	 * FIXME: This argument also applies to Kaveri.
+	 */
+	case CHIP_TONGA:
+		dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS;
+		break;
+	default:
+		dqm->sched_policy = sched_policy;
+		break;
+	}
+
+	dqm->dev = dev;
+	switch (dqm->sched_policy) {
+	case KFD_SCHED_POLICY_HWS:
+	case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION:
+		/* initialize dqm for cp scheduling */
+		dqm->ops.create_queue = create_queue_cpsch;
+		dqm->ops.initialize = initialize_cpsch;
+		dqm->ops.start = start_cpsch;
+		dqm->ops.stop = stop_cpsch;
+		dqm->ops.pre_reset = pre_reset;
+		dqm->ops.destroy_queue = destroy_queue_cpsch;
+		dqm->ops.update_queue = update_queue;
+		dqm->ops.register_process = register_process;
+		dqm->ops.unregister_process = unregister_process;
+		dqm->ops.uninitialize = uninitialize;
+		dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
+		dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
+		dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
+		dqm->ops.process_termination = process_termination_cpsch;
+		dqm->ops.evict_process_queues = evict_process_queues_cpsch;
+		dqm->ops.restore_process_queues = restore_process_queues_cpsch;
+		dqm->ops.get_wave_state = get_wave_state;
+		dqm->ops.reset_queues = reset_queues_cpsch;
+		dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info;
+		dqm->ops.checkpoint_mqd = checkpoint_mqd;
+		break;
+	case KFD_SCHED_POLICY_NO_HWS:
+		/* initialize dqm for no cp scheduling */
+		dqm->ops.start = start_nocpsch;
+		dqm->ops.stop = stop_nocpsch;
+		dqm->ops.pre_reset = pre_reset;
+		dqm->ops.create_queue = create_queue_nocpsch;
+		dqm->ops.destroy_queue = destroy_queue_nocpsch;
+		dqm->ops.update_queue = update_queue;
+		dqm->ops.register_process = register_process;
+		dqm->ops.unregister_process = unregister_process;
+		dqm->ops.initialize = initialize_nocpsch;
+		dqm->ops.uninitialize = uninitialize;
+		dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
+		dqm->ops.process_termination = process_termination_nocpsch;
+		dqm->ops.evict_process_queues = evict_process_queues_nocpsch;
+		dqm->ops.restore_process_queues =
+			restore_process_queues_nocpsch;
+		dqm->ops.get_wave_state = get_wave_state;
+		dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info;
+		dqm->ops.checkpoint_mqd = checkpoint_mqd;
+		break;
+	default:
+		pr_err("Invalid scheduling policy %d\n", dqm->sched_policy);
+		goto out_free;
+	}
+
+	switch (dev->adev->asic_type) {
+	case CHIP_KAVERI:
+	case CHIP_HAWAII:
+		device_queue_manager_init_cik(&dqm->asic_ops);
+		break;
+
+	case CHIP_CARRIZO:
+	case CHIP_TONGA:
+	case CHIP_FIJI:
+	case CHIP_POLARIS10:
+	case CHIP_POLARIS11:
+	case CHIP_POLARIS12:
+	case CHIP_VEGAM:
+		device_queue_manager_init_vi(&dqm->asic_ops);
+		break;
+
+	default:
+		if (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0))
+			device_queue_manager_init_v11(&dqm->asic_ops);
+		else if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1))
+			device_queue_manager_init_v10(&dqm->asic_ops);
+		else if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1))
+			device_queue_manager_init_v9(&dqm->asic_ops);
+		else {
+			WARN(1, "Unexpected ASIC family %u",
+			     dev->adev->asic_type);
+			goto out_free;
+		}
+	}
+
+	if (init_mqd_managers(dqm))
+		goto out_free;
+
+	if (!dev->kfd->shared_resources.enable_mes && allocate_hiq_sdma_mqd(dqm)) {
+		pr_err("Failed to allocate hiq sdma mqd trunk buffer\n");
+		goto out_free;
+	}
+
+	if (!dqm->ops.initialize(dqm)) {
+		init_waitqueue_head(&dqm->destroy_wait);
+		return dqm;
+	}
+
+out_free:
+	kfree(dqm);
+	return NULL;
+}
+
+static void deallocate_hiq_sdma_mqd(struct kfd_node *dev,
+				    struct kfd_mem_obj *mqd)
+{
+	WARN(!mqd, "No hiq sdma mqd trunk to free");
+
+	amdgpu_amdkfd_free_gtt_mem(dev->adev, mqd->gtt_mem);
+}
+
+void device_queue_manager_uninit(struct device_queue_manager *dqm)
+{
+	dqm->ops.stop(dqm);
+	dqm->ops.uninitialize(dqm);
+	if (!dqm->dev->kfd->shared_resources.enable_mes)
+		deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd);
+	kfree(dqm);
+}
+
+int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid)
+{
+	struct kfd_process_device *pdd;
+	struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+	int ret = 0;
+
+	if (!p)
+		return -EINVAL;
+	WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
+	pdd = kfd_get_process_device_data(dqm->dev, p);
+	if (pdd)
+		ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd);
+	kfd_unref_process(p);
+
+	return ret;
+}
+
+static void kfd_process_hw_exception(struct work_struct *work)
+{
+	struct device_queue_manager *dqm = container_of(work,
+			struct device_queue_manager, hw_exception_work);
+	amdgpu_amdkfd_gpu_reset(dqm->dev->adev);
+}
+
+int reserve_debug_trap_vmid(struct device_queue_manager *dqm,
+				struct qcm_process_device *qpd)
+{
+	int r;
+	int updated_vmid_mask;
+
+	if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
+		pr_err("Unsupported on sched_policy: %i\n", dqm->sched_policy);
+		return -EINVAL;
+	}
+
+	dqm_lock(dqm);
+
+	if (dqm->trap_debug_vmid != 0) {
+		pr_err("Trap debug id already reserved\n");
+		r = -EBUSY;
+		goto out_unlock;
+	}
+
+	r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0,
+			USE_DEFAULT_GRACE_PERIOD, false);
+	if (r)
+		goto out_unlock;
+
+	updated_vmid_mask = dqm->dev->kfd->shared_resources.compute_vmid_bitmap;
+	updated_vmid_mask &= ~(1 << dqm->dev->vm_info.last_vmid_kfd);
+
+	dqm->dev->kfd->shared_resources.compute_vmid_bitmap = updated_vmid_mask;
+	dqm->trap_debug_vmid = dqm->dev->vm_info.last_vmid_kfd;
+	r = set_sched_resources(dqm);
+	if (r)
+		goto out_unlock;
+
+	r = map_queues_cpsch(dqm);
+	if (r)
+		goto out_unlock;
+
+	pr_debug("Reserved VMID for trap debug: %i\n", dqm->trap_debug_vmid);
+
+out_unlock:
+	dqm_unlock(dqm);
+	return r;
+}
+
+/*
+ * Releases vmid for the trap debugger
+ */
+int release_debug_trap_vmid(struct device_queue_manager *dqm,
+			struct qcm_process_device *qpd)
+{
+	int r;
+	int updated_vmid_mask;
+	uint32_t trap_debug_vmid;
+
+	if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
+		pr_err("Unsupported on sched_policy: %i\n", dqm->sched_policy);
+		return -EINVAL;
+	}
+
+	dqm_lock(dqm);
+	trap_debug_vmid = dqm->trap_debug_vmid;
+	if (dqm->trap_debug_vmid == 0) {
+		pr_err("Trap debug id is not reserved\n");
+		r = -EINVAL;
+		goto out_unlock;
+	}
+
+	r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0,
+			USE_DEFAULT_GRACE_PERIOD, false);
+	if (r)
+		goto out_unlock;
+
+	updated_vmid_mask = dqm->dev->kfd->shared_resources.compute_vmid_bitmap;
+	updated_vmid_mask |= (1 << dqm->dev->vm_info.last_vmid_kfd);
+
+	dqm->dev->kfd->shared_resources.compute_vmid_bitmap = updated_vmid_mask;
+	dqm->trap_debug_vmid = 0;
+	r = set_sched_resources(dqm);
+	if (r)
+		goto out_unlock;
+
+	r = map_queues_cpsch(dqm);
+	if (r)
+		goto out_unlock;
+
+	pr_debug("Released VMID for trap debug: %i\n", trap_debug_vmid);
+
+out_unlock:
+	dqm_unlock(dqm);
+	return r;
+}
+
+#define QUEUE_NOT_FOUND		-1
+/* invalidate queue operation in array */
+static void q_array_invalidate(uint32_t num_queues, uint32_t *queue_ids)
+{
+	int i;
+
+	for (i = 0; i < num_queues; i++)
+		queue_ids[i] |= KFD_DBG_QUEUE_INVALID_MASK;
+}
+
+/* find queue index in array */
+static int q_array_get_index(unsigned int queue_id,
+		uint32_t num_queues,
+		uint32_t *queue_ids)
+{
+	int i;
+
+	for (i = 0; i < num_queues; i++)
+		if (queue_id == (queue_ids[i] & ~KFD_DBG_QUEUE_INVALID_MASK))
+			return i;
+
+	return QUEUE_NOT_FOUND;
+}
+
+struct copy_context_work_handler_workarea {
+	struct work_struct copy_context_work;
+	struct kfd_process *p;
+};
+
+static void copy_context_work_handler (struct work_struct *work)
+{
+	struct copy_context_work_handler_workarea *workarea;
+	struct mqd_manager *mqd_mgr;
+	struct queue *q;
+	struct mm_struct *mm;
+	struct kfd_process *p;
+	uint32_t tmp_ctl_stack_used_size, tmp_save_area_used_size;
+	int i;
+
+	workarea = container_of(work,
+			struct copy_context_work_handler_workarea,
+			copy_context_work);
+
+	p = workarea->p;
+	mm = get_task_mm(p->lead_thread);
+
+	if (!mm)
+		return;
+
+	kthread_use_mm(mm);
+	for (i = 0; i < p->n_pdds; i++) {
+		struct kfd_process_device *pdd = p->pdds[i];
+		struct device_queue_manager *dqm = pdd->dev->dqm;
+		struct qcm_process_device *qpd = &pdd->qpd;
+
+		list_for_each_entry(q, &qpd->queues_list, list) {
+			mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP];
+
+			/* We ignore the return value from get_wave_state
+			 * because
+			 * i) right now, it always returns 0, and
+			 * ii) if we hit an error, we would continue to the
+			 *      next queue anyway.
+			 */
+			mqd_mgr->get_wave_state(mqd_mgr,
+					q->mqd,
+					&q->properties,
+					(void __user *)	q->properties.ctx_save_restore_area_address,
+					&tmp_ctl_stack_used_size,
+					&tmp_save_area_used_size);
+		}
+	}
+	kthread_unuse_mm(mm);
+	mmput(mm);
+}
+
+static uint32_t *get_queue_ids(uint32_t num_queues, uint32_t *usr_queue_id_array)
+{
+	size_t array_size = num_queues * sizeof(uint32_t);
+
+	if (!usr_queue_id_array)
+		return NULL;
+
+	return memdup_user(usr_queue_id_array, array_size);
+}
+
+int resume_queues(struct kfd_process *p,
+		uint32_t num_queues,
+		uint32_t *usr_queue_id_array)
+{
+	uint32_t *queue_ids = NULL;
+	int total_resumed = 0;
+	int i;
+
+	if (usr_queue_id_array) {
+		queue_ids = get_queue_ids(num_queues, usr_queue_id_array);
+
+		if (IS_ERR(queue_ids))
+			return PTR_ERR(queue_ids);
+
+		/* mask all queues as invalid.  unmask per successful request */
+		q_array_invalidate(num_queues, queue_ids);
+	}
+
+	for (i = 0; i < p->n_pdds; i++) {
+		struct kfd_process_device *pdd = p->pdds[i];
+		struct device_queue_manager *dqm = pdd->dev->dqm;
+		struct qcm_process_device *qpd = &pdd->qpd;
+		struct queue *q;
+		int r, per_device_resumed = 0;
+
+		dqm_lock(dqm);
+
+		/* unmask queues that resume or already resumed as valid */
+		list_for_each_entry(q, &qpd->queues_list, list) {
+			int q_idx = QUEUE_NOT_FOUND;
+
+			if (queue_ids)
+				q_idx = q_array_get_index(
+						q->properties.queue_id,
+						num_queues,
+						queue_ids);
+
+			if (!queue_ids || q_idx != QUEUE_NOT_FOUND) {
+				int err = resume_single_queue(dqm, &pdd->qpd, q);
+
+				if (queue_ids) {
+					if (!err) {
+						queue_ids[q_idx] &=
+							~KFD_DBG_QUEUE_INVALID_MASK;
+					} else {
+						queue_ids[q_idx] |=
+							KFD_DBG_QUEUE_ERROR_MASK;
+						break;
+					}
+				}
+
+				if (dqm->dev->kfd->shared_resources.enable_mes) {
+					wake_up_all(&dqm->destroy_wait);
+					if (!err)
+						total_resumed++;
+				} else {
+					per_device_resumed++;
+				}
+			}
+		}
+
+		if (!per_device_resumed) {
+			dqm_unlock(dqm);
+			continue;
+		}
+
+		r = execute_queues_cpsch(dqm,
+					KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES,
+					0,
+					USE_DEFAULT_GRACE_PERIOD);
+		if (r) {
+			pr_err("Failed to resume process queues\n");
+			if (queue_ids) {
+				list_for_each_entry(q, &qpd->queues_list, list) {
+					int q_idx = q_array_get_index(
+							q->properties.queue_id,
+							num_queues,
+							queue_ids);
+
+					/* mask queue as error on resume fail */
+					if (q_idx != QUEUE_NOT_FOUND)
+						queue_ids[q_idx] |=
+							KFD_DBG_QUEUE_ERROR_MASK;
+				}
+			}
+		} else {
+			wake_up_all(&dqm->destroy_wait);
+			total_resumed += per_device_resumed;
+		}
+
+		dqm_unlock(dqm);
+	}
+
+	if (queue_ids) {
+		if (copy_to_user((void __user *)usr_queue_id_array, queue_ids,
+				num_queues * sizeof(uint32_t)))
+			pr_err("copy_to_user failed on queue resume\n");
+
+		kfree(queue_ids);
+	}
+
+	return total_resumed;
+}
+
+int suspend_queues(struct kfd_process *p,
+			uint32_t num_queues,
+			uint32_t grace_period,
+			uint64_t exception_clear_mask,
+			uint32_t *usr_queue_id_array)
+{
+	uint32_t *queue_ids = get_queue_ids(num_queues, usr_queue_id_array);
+	int total_suspended = 0;
+	int i;
+
+	if (IS_ERR(queue_ids))
+		return PTR_ERR(queue_ids);
+
+	/* mask all queues as invalid.  umask on successful request */
+	q_array_invalidate(num_queues, queue_ids);
+
+	for (i = 0; i < p->n_pdds; i++) {
+		struct kfd_process_device *pdd = p->pdds[i];
+		struct device_queue_manager *dqm = pdd->dev->dqm;
+		struct qcm_process_device *qpd = &pdd->qpd;
+		struct queue *q;
+		int r, per_device_suspended = 0;
+
+		mutex_lock(&p->event_mutex);
+		dqm_lock(dqm);
+
+		/* unmask queues that suspend or already suspended */
+		list_for_each_entry(q, &qpd->queues_list, list) {
+			int q_idx = q_array_get_index(q->properties.queue_id,
+							num_queues,
+							queue_ids);
+
+			if (q_idx != QUEUE_NOT_FOUND) {
+				int err = suspend_single_queue(dqm, pdd, q);
+				bool is_mes = dqm->dev->kfd->shared_resources.enable_mes;
+
+				if (!err) {
+					queue_ids[q_idx] &= ~KFD_DBG_QUEUE_INVALID_MASK;
+					if (exception_clear_mask && is_mes)
+						q->properties.exception_status &=
+							~exception_clear_mask;
+
+					if (is_mes)
+						total_suspended++;
+					else
+						per_device_suspended++;
+				} else if (err != -EBUSY) {
+					r = err;
+					queue_ids[q_idx] |= KFD_DBG_QUEUE_ERROR_MASK;
+					break;
+				}
+			}
+		}
+
+		if (!per_device_suspended) {
+			dqm_unlock(dqm);
+			mutex_unlock(&p->event_mutex);
+			if (total_suspended)
+				amdgpu_amdkfd_debug_mem_fence(dqm->dev->adev);
+			continue;
+		}
+
+		r = execute_queues_cpsch(dqm,
+			KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0,
+			grace_period);
+
+		if (r)
+			pr_err("Failed to suspend process queues.\n");
+		else
+			total_suspended += per_device_suspended;
+
+		list_for_each_entry(q, &qpd->queues_list, list) {
+			int q_idx = q_array_get_index(q->properties.queue_id,
+						num_queues, queue_ids);
+
+			if (q_idx == QUEUE_NOT_FOUND)
+				continue;
+
+			/* mask queue as error on suspend fail */
+			if (r)
+				queue_ids[q_idx] |= KFD_DBG_QUEUE_ERROR_MASK;
+			else if (exception_clear_mask)
+				q->properties.exception_status &=
+							~exception_clear_mask;
+		}
+
+		dqm_unlock(dqm);
+		mutex_unlock(&p->event_mutex);
+		amdgpu_device_flush_hdp(dqm->dev->adev, NULL);
+	}
+
+	if (total_suspended) {
+		struct copy_context_work_handler_workarea copy_context_worker;
+
+		INIT_WORK_ONSTACK(
+				&copy_context_worker.copy_context_work,
+				copy_context_work_handler);
+
+		copy_context_worker.p = p;
+
+		schedule_work(&copy_context_worker.copy_context_work);
+
+
+		flush_work(&copy_context_worker.copy_context_work);
+		destroy_work_on_stack(&copy_context_worker.copy_context_work);
+	}
+
+	if (copy_to_user((void __user *)usr_queue_id_array, queue_ids,
+			num_queues * sizeof(uint32_t)))
+		pr_err("copy_to_user failed on queue suspend\n");
+
+	kfree(queue_ids);
+
+	return total_suspended;
+}
+
+static uint32_t set_queue_type_for_user(struct queue_properties *q_props)
+{
+	switch (q_props->type) {
+	case KFD_QUEUE_TYPE_COMPUTE:
+		return q_props->format == KFD_QUEUE_FORMAT_PM4
+					? KFD_IOC_QUEUE_TYPE_COMPUTE
+					: KFD_IOC_QUEUE_TYPE_COMPUTE_AQL;
+	case KFD_QUEUE_TYPE_SDMA:
+		return KFD_IOC_QUEUE_TYPE_SDMA;
+	case KFD_QUEUE_TYPE_SDMA_XGMI:
+		return KFD_IOC_QUEUE_TYPE_SDMA_XGMI;
+	default:
+		WARN_ONCE(true, "queue type not recognized!");
+		return 0xffffffff;
+	};
+}
+
+void set_queue_snapshot_entry(struct queue *q,
+			      uint64_t exception_clear_mask,
+			      struct kfd_queue_snapshot_entry *qss_entry)
+{
+	qss_entry->ring_base_address = q->properties.queue_address;
+	qss_entry->write_pointer_address = (uint64_t)q->properties.write_ptr;
+	qss_entry->read_pointer_address = (uint64_t)q->properties.read_ptr;
+	qss_entry->ctx_save_restore_address =
+				q->properties.ctx_save_restore_area_address;
+	qss_entry->ctx_save_restore_area_size =
+				q->properties.ctx_save_restore_area_size;
+	qss_entry->exception_status = q->properties.exception_status;
+	qss_entry->queue_id = q->properties.queue_id;
+	qss_entry->gpu_id = q->device->id;
+	qss_entry->ring_size = (uint32_t)q->properties.queue_size;
+	qss_entry->queue_type = set_queue_type_for_user(&q->properties);
+	q->properties.exception_status &= ~exception_clear_mask;
+}
+
+int debug_lock_and_unmap(struct device_queue_manager *dqm)
+{
+	int r;
+
+	if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
+		pr_err("Unsupported on sched_policy: %i\n", dqm->sched_policy);
+		return -EINVAL;
+	}
+
+	if (!kfd_dbg_is_per_vmid_supported(dqm->dev))
+		return 0;
+
+	dqm_lock(dqm);
+
+	r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 0, false);
+	if (r)
+		dqm_unlock(dqm);
+
+	return r;
+}
+
+int debug_map_and_unlock(struct device_queue_manager *dqm)
+{
+	int r;
+
+	if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
+		pr_err("Unsupported on sched_policy: %i\n", dqm->sched_policy);
+		return -EINVAL;
+	}
+
+	if (!kfd_dbg_is_per_vmid_supported(dqm->dev))
+		return 0;
+
+	r = map_queues_cpsch(dqm);
+
+	dqm_unlock(dqm);
+
+	return r;
+}
+
+int debug_refresh_runlist(struct device_queue_manager *dqm)
+{
+	int r = debug_lock_and_unmap(dqm);
+
+	if (r)
+		return r;
+
+	return debug_map_and_unlock(dqm);
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+static void seq_reg_dump(struct seq_file *m,
+			 uint32_t (*dump)[2], uint32_t n_regs)
+{
+	uint32_t i, count;
+
+	for (i = 0, count = 0; i < n_regs; i++) {
+		if (count == 0 ||
+		    dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) {
+			seq_printf(m, "%s    %08x: %08x",
+				   i ? "\n" : "",
+				   dump[i][0], dump[i][1]);
+			count = 7;
+		} else {
+			seq_printf(m, " %08x", dump[i][1]);
+			count--;
+		}
+	}
+
+	seq_puts(m, "\n");
+}
+
+int dqm_debugfs_hqds(struct seq_file *m, void *data)
+{
+	struct device_queue_manager *dqm = data;
+	uint32_t xcc_mask = dqm->dev->xcc_mask;
+	uint32_t (*dump)[2], n_regs;
+	int pipe, queue;
+	int r = 0, xcc_id;
+	uint32_t sdma_engine_start;
+
+	if (!dqm->sched_running) {
+		seq_puts(m, " Device is stopped\n");
+		return 0;
+	}
+
+	for_each_inst(xcc_id, xcc_mask) {
+		r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev,
+						KFD_CIK_HIQ_PIPE,
+						KFD_CIK_HIQ_QUEUE, &dump,
+						&n_regs, xcc_id);
+		if (!r) {
+			seq_printf(
+				m,
+				"   Inst %d, HIQ on MEC %d Pipe %d Queue %d\n",
+				xcc_id,
+				KFD_CIK_HIQ_PIPE / get_pipes_per_mec(dqm) + 1,
+				KFD_CIK_HIQ_PIPE % get_pipes_per_mec(dqm),
+				KFD_CIK_HIQ_QUEUE);
+			seq_reg_dump(m, dump, n_regs);
+
+			kfree(dump);
+		}
+
+		for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
+			int pipe_offset = pipe * get_queues_per_pipe(dqm);
+
+			for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) {
+				if (!test_bit(pipe_offset + queue,
+				      dqm->dev->kfd->shared_resources.cp_queue_bitmap))
+					continue;
+
+				r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev,
+								pipe, queue,
+								&dump, &n_regs,
+								xcc_id);
+				if (r)
+					break;
+
+				seq_printf(m,
+					   " Inst %d,  CP Pipe %d, Queue %d\n",
+					   xcc_id, pipe, queue);
+				seq_reg_dump(m, dump, n_regs);
+
+				kfree(dump);
+			}
+		}
+	}
+
+	sdma_engine_start = dqm->dev->node_id * get_num_all_sdma_engines(dqm);
+	for (pipe = sdma_engine_start;
+	     pipe < (sdma_engine_start + get_num_all_sdma_engines(dqm));
+	     pipe++) {
+		for (queue = 0;
+		     queue < dqm->dev->kfd->device_info.num_sdma_queues_per_engine;
+		     queue++) {
+			r = dqm->dev->kfd2kgd->hqd_sdma_dump(
+				dqm->dev->adev, pipe, queue, &dump, &n_regs);
+			if (r)
+				break;
+
+			seq_printf(m, "  SDMA Engine %d, RLC %d\n",
+				  pipe, queue);
+			seq_reg_dump(m, dump, n_regs);
+
+			kfree(dump);
+		}
+	}
+
+	return r;
+}
+
+int dqm_debugfs_hang_hws(struct device_queue_manager *dqm)
+{
+	int r = 0;
+
+	dqm_lock(dqm);
+	r = pm_debugfs_hang_hws(&dqm->packet_mgr);
+	if (r) {
+		dqm_unlock(dqm);
+		return r;
+	}
+	dqm->active_runlist = true;
+	r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES,
+				0, USE_DEFAULT_GRACE_PERIOD);
+	dqm_unlock(dqm);
+
+	return r;
+}
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
new file mode 100644
index 000000000..cf7e18258
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -0,0 +1,337 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef KFD_DEVICE_QUEUE_MANAGER_H_
+#define KFD_DEVICE_QUEUE_MANAGER_H_
+
+#include <linux/rwsem.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/sched/mm.h>
+#include "kfd_priv.h"
+#include "kfd_mqd_manager.h"
+
+
+#define VMID_NUM 16
+
+#define KFD_MES_PROCESS_QUANTUM		100000
+#define KFD_MES_GANG_QUANTUM		10000
+#define USE_DEFAULT_GRACE_PERIOD 0xffffffff
+
+struct device_process_node {
+	struct qcm_process_device *qpd;
+	struct list_head list;
+};
+
+union SQ_CMD_BITS {
+	struct {
+		uint32_t cmd:3;
+		uint32_t:1;
+		uint32_t mode:3;
+		uint32_t check_vmid:1;
+		uint32_t trap_id:3;
+		uint32_t:5;
+		uint32_t wave_id:4;
+		uint32_t simd_id:2;
+		uint32_t:2;
+		uint32_t queue_id:3;
+		uint32_t:1;
+		uint32_t vm_id:4;
+	} bitfields, bits;
+	uint32_t u32All;
+	signed int i32All;
+	float f32All;
+};
+
+union GRBM_GFX_INDEX_BITS {
+	struct {
+		uint32_t instance_index:8;
+		uint32_t sh_index:8;
+		uint32_t se_index:8;
+		uint32_t:5;
+		uint32_t sh_broadcast_writes:1;
+		uint32_t instance_broadcast_writes:1;
+		uint32_t se_broadcast_writes:1;
+	} bitfields, bits;
+	uint32_t u32All;
+	signed int i32All;
+	float f32All;
+};
+
+/**
+ * struct device_queue_manager_ops
+ *
+ * @create_queue: Queue creation routine.
+ *
+ * @destroy_queue: Queue destruction routine.
+ *
+ * @update_queue: Queue update routine.
+ *
+ * @exeute_queues: Dispatches the queues list to the H/W.
+ *
+ * @register_process: This routine associates a specific process with device.
+ *
+ * @unregister_process: destroys the associations between process to device.
+ *
+ * @initialize: Initializes the pipelines and memory module for that device.
+ *
+ * @start: Initializes the resources/modules the device needs for queues
+ * execution. This function is called on device initialization and after the
+ * system woke up after suspension.
+ *
+ * @stop: This routine stops execution of all the active queue running on the
+ * H/W and basically this function called on system suspend.
+ *
+ * @uninitialize: Destroys all the device queue manager resources allocated in
+ * initialize routine.
+ *
+ * @create_kernel_queue: Creates kernel queue. Used for debug queue.
+ *
+ * @destroy_kernel_queue: Destroys kernel queue. Used for debug queue.
+ *
+ * @set_cache_memory_policy: Sets memory policy (cached/ non cached) for the
+ * memory apertures.
+ *
+ * @process_termination: Clears all process queues belongs to that device.
+ *
+ * @evict_process_queues: Evict all active queues of a process
+ *
+ * @restore_process_queues: Restore all evicted queues of a process
+ *
+ * @get_wave_state: Retrieves context save state and optionally copies the
+ * control stack, if kept in the MQD, to the given userspace address.
+ *
+ * @reset_queues: reset queues which consume RAS poison
+ * @get_queue_checkpoint_info: Retrieves queue size information for CRIU checkpoint.
+ *
+ * @checkpoint_mqd: checkpoint queue MQD contents for CRIU.
+ */
+
+struct device_queue_manager_ops {
+	int	(*create_queue)(struct device_queue_manager *dqm,
+				struct queue *q,
+				struct qcm_process_device *qpd,
+				const struct kfd_criu_queue_priv_data *qd,
+				const void *restore_mqd,
+				const void *restore_ctl_stack);
+
+	int	(*destroy_queue)(struct device_queue_manager *dqm,
+				struct qcm_process_device *qpd,
+				struct queue *q);
+
+	int	(*update_queue)(struct device_queue_manager *dqm,
+				struct queue *q, struct mqd_update_info *minfo);
+
+	int	(*register_process)(struct device_queue_manager *dqm,
+					struct qcm_process_device *qpd);
+
+	int	(*unregister_process)(struct device_queue_manager *dqm,
+					struct qcm_process_device *qpd);
+
+	int	(*initialize)(struct device_queue_manager *dqm);
+	int	(*start)(struct device_queue_manager *dqm);
+	int	(*stop)(struct device_queue_manager *dqm);
+	void	(*pre_reset)(struct device_queue_manager *dqm);
+	void	(*uninitialize)(struct device_queue_manager *dqm);
+	int	(*create_kernel_queue)(struct device_queue_manager *dqm,
+					struct kernel_queue *kq,
+					struct qcm_process_device *qpd);
+
+	void	(*destroy_kernel_queue)(struct device_queue_manager *dqm,
+					struct kernel_queue *kq,
+					struct qcm_process_device *qpd);
+
+	bool	(*set_cache_memory_policy)(struct device_queue_manager *dqm,
+					   struct qcm_process_device *qpd,
+					   enum cache_policy default_policy,
+					   enum cache_policy alternate_policy,
+					   void __user *alternate_aperture_base,
+					   uint64_t alternate_aperture_size);
+
+	int (*process_termination)(struct device_queue_manager *dqm,
+			struct qcm_process_device *qpd);
+
+	int (*evict_process_queues)(struct device_queue_manager *dqm,
+				    struct qcm_process_device *qpd);
+	int (*restore_process_queues)(struct device_queue_manager *dqm,
+				      struct qcm_process_device *qpd);
+
+	int	(*get_wave_state)(struct device_queue_manager *dqm,
+				  struct queue *q,
+				  void __user *ctl_stack,
+				  u32 *ctl_stack_used_size,
+				  u32 *save_area_used_size);
+
+	int (*reset_queues)(struct device_queue_manager *dqm,
+					uint16_t pasid);
+	void	(*get_queue_checkpoint_info)(struct device_queue_manager *dqm,
+				  const struct queue *q, u32 *mqd_size,
+				  u32 *ctl_stack_size);
+
+	int	(*checkpoint_mqd)(struct device_queue_manager *dqm,
+				  const struct queue *q,
+				  void *mqd,
+				  void *ctl_stack);
+};
+
+struct device_queue_manager_asic_ops {
+	int	(*update_qpd)(struct device_queue_manager *dqm,
+					struct qcm_process_device *qpd);
+	bool	(*set_cache_memory_policy)(struct device_queue_manager *dqm,
+					   struct qcm_process_device *qpd,
+					   enum cache_policy default_policy,
+					   enum cache_policy alternate_policy,
+					   void __user *alternate_aperture_base,
+					   uint64_t alternate_aperture_size);
+	void	(*init_sdma_vm)(struct device_queue_manager *dqm,
+				struct queue *q,
+				struct qcm_process_device *qpd);
+	struct mqd_manager *	(*mqd_manager_init)(enum KFD_MQD_TYPE type,
+				 struct kfd_node *dev);
+};
+
+/**
+ * struct device_queue_manager
+ *
+ * This struct is a base class for the kfd queues scheduler in the
+ * device level. The device base class should expose the basic operations
+ * for queue creation and queue destruction. This base class hides the
+ * scheduling mode of the driver and the specific implementation of the
+ * concrete device. This class is the only class in the queues scheduler
+ * that configures the H/W.
+ *
+ */
+
+struct device_queue_manager {
+	struct device_queue_manager_ops ops;
+	struct device_queue_manager_asic_ops asic_ops;
+
+	struct mqd_manager	*mqd_mgrs[KFD_MQD_TYPE_MAX];
+	struct packet_manager	packet_mgr;
+	struct kfd_node		*dev;
+	struct mutex		lock_hidden; /* use dqm_lock/unlock(dqm) */
+	struct list_head	queues;
+	unsigned int		saved_flags;
+	unsigned int		processes_count;
+	unsigned int		active_queue_count;
+	unsigned int		active_cp_queue_count;
+	unsigned int		gws_queue_count;
+	unsigned int		total_queue_count;
+	unsigned int		next_pipe_to_allocate;
+	unsigned int		*allocated_queues;
+	DECLARE_BITMAP(sdma_bitmap, KFD_MAX_SDMA_QUEUES);
+	DECLARE_BITMAP(xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES);
+	/* the pasid mapping for each kfd vmid */
+	uint16_t		vmid_pasid[VMID_NUM];
+	uint64_t		pipelines_addr;
+	uint64_t		fence_gpu_addr;
+	uint64_t		*fence_addr;
+	struct kfd_mem_obj	*fence_mem;
+	bool			active_runlist;
+	int			sched_policy;
+	uint32_t		trap_debug_vmid;
+
+	/* hw exception  */
+	bool			is_hws_hang;
+	bool			is_resetting;
+	struct work_struct	hw_exception_work;
+	struct kfd_mem_obj	hiq_sdma_mqd;
+	bool			sched_running;
+
+	/* used for GFX 9.4.3 only */
+	uint32_t		current_logical_xcc_start;
+
+	uint32_t		wait_times;
+
+	wait_queue_head_t	destroy_wait;
+};
+
+void device_queue_manager_init_cik(
+		struct device_queue_manager_asic_ops *asic_ops);
+void device_queue_manager_init_vi(
+		struct device_queue_manager_asic_ops *asic_ops);
+void device_queue_manager_init_v9(
+		struct device_queue_manager_asic_ops *asic_ops);
+void device_queue_manager_init_v10(
+		struct device_queue_manager_asic_ops *asic_ops);
+void device_queue_manager_init_v11(
+		struct device_queue_manager_asic_ops *asic_ops);
+void program_sh_mem_settings(struct device_queue_manager *dqm,
+					struct qcm_process_device *qpd);
+unsigned int get_cp_queues_num(struct device_queue_manager *dqm);
+unsigned int get_queues_per_pipe(struct device_queue_manager *dqm);
+unsigned int get_pipes_per_mec(struct device_queue_manager *dqm);
+unsigned int get_num_sdma_queues(struct device_queue_manager *dqm);
+unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm);
+int reserve_debug_trap_vmid(struct device_queue_manager *dqm,
+			struct qcm_process_device *qpd);
+int release_debug_trap_vmid(struct device_queue_manager *dqm,
+			struct qcm_process_device *qpd);
+int suspend_queues(struct kfd_process *p,
+			uint32_t num_queues,
+			uint32_t grace_period,
+			uint64_t exception_clear_mask,
+			uint32_t *usr_queue_id_array);
+int resume_queues(struct kfd_process *p,
+		uint32_t num_queues,
+		uint32_t *usr_queue_id_array);
+void set_queue_snapshot_entry(struct queue *q,
+			      uint64_t exception_clear_mask,
+			      struct kfd_queue_snapshot_entry *qss_entry);
+int debug_lock_and_unmap(struct device_queue_manager *dqm);
+int debug_map_and_unlock(struct device_queue_manager *dqm);
+int debug_refresh_runlist(struct device_queue_manager *dqm);
+
+static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)
+{
+	return (pdd->lds_base >> 16) & 0xFF;
+}
+
+static inline unsigned int
+get_sh_mem_bases_nybble_64(struct kfd_process_device *pdd)
+{
+	return (pdd->lds_base >> 60) & 0x0E;
+}
+
+/* The DQM lock can be taken in MMU notifiers. Make sure no reclaim-FS
+ * happens while holding this lock anywhere to prevent deadlocks when
+ * an MMU notifier runs in reclaim-FS context.
+ */
+static inline void dqm_lock(struct device_queue_manager *dqm)
+{
+	mutex_lock(&dqm->lock_hidden);
+	dqm->saved_flags = memalloc_noreclaim_save();
+}
+static inline void dqm_unlock(struct device_queue_manager *dqm)
+{
+	memalloc_noreclaim_restore(dqm->saved_flags);
+	mutex_unlock(&dqm->lock_hidden);
+}
+
+static inline int read_sdma_queue_counter(uint64_t __user *q_rptr, uint64_t *val)
+{
+	/* SDMA activity counter is stored at queue's RPTR + 0x8 location. */
+	return get_user(*val, q_rptr + 1);
+}
+#endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
new file mode 100644
index 000000000..d4d95c7f2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "kfd_device_queue_manager.h"
+#include "cik_regs.h"
+#include "oss/oss_2_4_sh_mask.h"
+#include "gca/gfx_7_2_sh_mask.h"
+
+static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
+				   struct qcm_process_device *qpd,
+				   enum cache_policy default_policy,
+				   enum cache_policy alternate_policy,
+				   void __user *alternate_aperture_base,
+				   uint64_t alternate_aperture_size);
+static int update_qpd_cik(struct device_queue_manager *dqm,
+			  struct qcm_process_device *qpd);
+static void init_sdma_vm(struct device_queue_manager *dqm,
+			 struct queue *q,
+			 struct qcm_process_device *qpd);
+
+void device_queue_manager_init_cik(
+	struct device_queue_manager_asic_ops *asic_ops)
+{
+	asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik;
+	asic_ops->update_qpd = update_qpd_cik;
+	asic_ops->init_sdma_vm = init_sdma_vm;
+	asic_ops->mqd_manager_init = mqd_manager_init_cik;
+}
+
+static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
+{
+	/* In 64-bit mode, we can only control the top 3 bits of the LDS,
+	 * scratch and GPUVM apertures.
+	 * The hardware fills in the remaining 59 bits according to the
+	 * following pattern:
+	 * LDS:		X0000000'00000000 - X0000001'00000000 (4GB)
+	 * Scratch:	X0000001'00000000 - X0000002'00000000 (4GB)
+	 * GPUVM:	Y0010000'00000000 - Y0020000'00000000 (1TB)
+	 *
+	 * (where X/Y is the configurable nybble with the low-bit 0)
+	 *
+	 * LDS and scratch will have the same top nybble programmed in the
+	 * top 3 bits of SH_MEM_BASES.PRIVATE_BASE.
+	 * GPUVM can have a different top nybble programmed in the
+	 * top 3 bits of SH_MEM_BASES.SHARED_BASE.
+	 * We don't bother to support different top nybbles
+	 * for LDS/Scratch and GPUVM.
+	 */
+
+	WARN_ON((top_address_nybble & 1) || top_address_nybble > 0xE ||
+		top_address_nybble == 0);
+
+	return PRIVATE_BASE(top_address_nybble << 12) |
+			SHARED_BASE(top_address_nybble << 12);
+}
+
+static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
+				   struct qcm_process_device *qpd,
+				   enum cache_policy default_policy,
+				   enum cache_policy alternate_policy,
+				   void __user *alternate_aperture_base,
+				   uint64_t alternate_aperture_size)
+{
+	uint32_t default_mtype;
+	uint32_t ape1_mtype;
+
+	default_mtype = (default_policy == cache_policy_coherent) ?
+			MTYPE_NONCACHED :
+			MTYPE_CACHED;
+
+	ape1_mtype = (alternate_policy == cache_policy_coherent) ?
+			MTYPE_NONCACHED :
+			MTYPE_CACHED;
+
+	qpd->sh_mem_config = (qpd->sh_mem_config & PTR32)
+			| ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
+			| DEFAULT_MTYPE(default_mtype)
+			| APE1_MTYPE(ape1_mtype);
+
+	return true;
+}
+
+static int update_qpd_cik(struct device_queue_manager *dqm,
+			  struct qcm_process_device *qpd)
+{
+	struct kfd_process_device *pdd;
+	unsigned int temp;
+
+	pdd = qpd_to_pdd(qpd);
+
+	/* check if sh_mem_config register already configured */
+	if (qpd->sh_mem_config == 0) {
+		qpd->sh_mem_config =
+			ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) |
+			DEFAULT_MTYPE(MTYPE_NONCACHED) |
+			APE1_MTYPE(MTYPE_NONCACHED);
+		qpd->sh_mem_ape1_limit = 0;
+		qpd->sh_mem_ape1_base = 0;
+	}
+
+	/* On dGPU we're always in GPUVM64 addressing mode with 64-bit
+	 * aperture addresses.
+	 */
+	temp = get_sh_mem_bases_nybble_64(pdd);
+	qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
+
+	pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
+		qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
+
+	return 0;
+}
+
+static void init_sdma_vm(struct device_queue_manager *dqm,
+			 struct queue *q,
+			 struct qcm_process_device *qpd)
+{
+	/* On dGPU we're always in GPUVM64 addressing mode with 64-bit
+	 * aperture addresses.
+	 */
+	q->properties.sdma_vm_addr =
+		((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) <<
+		 SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) &
+		SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
new file mode 100644
index 000000000..245a90dfc
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2018-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "kfd_device_queue_manager.h"
+#include "navi10_enum.h"
+#include "gc/gc_10_1_0_offset.h"
+#include "gc/gc_10_1_0_sh_mask.h"
+
+static int update_qpd_v10(struct device_queue_manager *dqm,
+			 struct qcm_process_device *qpd);
+static void init_sdma_vm_v10(struct device_queue_manager *dqm, struct queue *q,
+			    struct qcm_process_device *qpd);
+
+void device_queue_manager_init_v10(
+	struct device_queue_manager_asic_ops *asic_ops)
+{
+	asic_ops->update_qpd = update_qpd_v10;
+	asic_ops->init_sdma_vm = init_sdma_vm_v10;
+	asic_ops->mqd_manager_init = mqd_manager_init_v10;
+}
+
+static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
+{
+	uint32_t shared_base = pdd->lds_base >> 48;
+	uint32_t private_base = pdd->scratch_base >> 48;
+
+	return (shared_base << SH_MEM_BASES__SHARED_BASE__SHIFT) |
+		private_base;
+}
+
+static int update_qpd_v10(struct device_queue_manager *dqm,
+			 struct qcm_process_device *qpd)
+{
+	struct kfd_process_device *pdd;
+
+	pdd = qpd_to_pdd(qpd);
+
+	/* check if sh_mem_config register already configured */
+	if (qpd->sh_mem_config == 0) {
+		qpd->sh_mem_config =
+			(SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+				SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
+			(3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
+		qpd->sh_mem_ape1_limit = 0;
+		qpd->sh_mem_ape1_base = 0;
+	}
+
+	qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd);
+
+	pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases);
+
+	return 0;
+}
+
+static void init_sdma_vm_v10(struct device_queue_manager *dqm, struct queue *q,
+			    struct qcm_process_device *qpd)
+{
+	/* Not needed on SDMAv4 onwards any more */
+	q->properties.sdma_vm_addr = 0;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
new file mode 100644
index 000000000..2e129da7a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v11.c
@@ -0,0 +1,81 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "kfd_device_queue_manager.h"
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
+#include "soc21_enum.h"
+
+static int update_qpd_v11(struct device_queue_manager *dqm,
+			 struct qcm_process_device *qpd);
+static void init_sdma_vm_v11(struct device_queue_manager *dqm, struct queue *q,
+			    struct qcm_process_device *qpd);
+
+void device_queue_manager_init_v11(
+	struct device_queue_manager_asic_ops *asic_ops)
+{
+	asic_ops->update_qpd = update_qpd_v11;
+	asic_ops->init_sdma_vm = init_sdma_vm_v11;
+	asic_ops->mqd_manager_init = mqd_manager_init_v11;
+}
+
+static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
+{
+	uint32_t shared_base = pdd->lds_base >> 48;
+	uint32_t private_base = pdd->scratch_base >> 48;
+
+	return (shared_base << SH_MEM_BASES__SHARED_BASE__SHIFT) |
+		private_base;
+}
+
+static int update_qpd_v11(struct device_queue_manager *dqm,
+			 struct qcm_process_device *qpd)
+{
+	struct kfd_process_device *pdd;
+
+	pdd = qpd_to_pdd(qpd);
+
+	/* check if sh_mem_config register already configured */
+	if (qpd->sh_mem_config == 0) {
+		qpd->sh_mem_config =
+			(SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+				SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) |
+			(3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT);
+
+		qpd->sh_mem_ape1_limit = 0;
+		qpd->sh_mem_ape1_base = 0;
+	}
+
+	qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd);
+
+	pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases);
+
+	return 0;
+}
+
+static void init_sdma_vm_v11(struct device_queue_manager *dqm, struct queue *q,
+			    struct qcm_process_device *qpd)
+{
+	/* Not needed on SDMAv4 onwards any more */
+	q->properties.sdma_vm_addr = 0;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
new file mode 100644
index 000000000..54eb1bff9
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2016-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "kfd_device_queue_manager.h"
+#include "vega10_enum.h"
+#include "gc/gc_9_4_3_sh_mask.h"
+
+static int update_qpd_v9(struct device_queue_manager *dqm,
+			 struct qcm_process_device *qpd);
+static void init_sdma_vm_v9(struct device_queue_manager *dqm, struct queue *q,
+			    struct qcm_process_device *qpd);
+
+void device_queue_manager_init_v9(
+	struct device_queue_manager_asic_ops *asic_ops)
+{
+	asic_ops->update_qpd = update_qpd_v9;
+	asic_ops->init_sdma_vm = init_sdma_vm_v9;
+	asic_ops->mqd_manager_init = mqd_manager_init_v9;
+}
+
+static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
+{
+	uint32_t shared_base = pdd->lds_base >> 48;
+	uint32_t private_base = pdd->scratch_base >> 48;
+
+	return (shared_base << SH_MEM_BASES__SHARED_BASE__SHIFT) |
+		private_base;
+}
+
+static int update_qpd_v9(struct device_queue_manager *dqm,
+			 struct qcm_process_device *qpd)
+{
+	struct kfd_process_device *pdd;
+
+	pdd = qpd_to_pdd(qpd);
+
+	/* check if sh_mem_config register already configured */
+	if (qpd->sh_mem_config == 0) {
+		qpd->sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+					SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
+
+		if (dqm->dev->kfd->noretry)
+			qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
+
+		if (KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 3))
+			qpd->sh_mem_config |=
+				(1 << SH_MEM_CONFIG__F8_MODE__SHIFT);
+
+		qpd->sh_mem_ape1_limit = 0;
+		qpd->sh_mem_ape1_base = 0;
+	}
+
+	if (KFD_SUPPORT_XNACK_PER_PROCESS(dqm->dev)) {
+		if (!pdd->process->xnack_enabled)
+			qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
+		else
+			qpd->sh_mem_config &= ~(1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT);
+	}
+
+	qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd);
+
+	pr_debug("sh_mem_bases 0x%X sh_mem_config 0x%X\n", qpd->sh_mem_bases,
+		 qpd->sh_mem_config);
+
+	return 0;
+}
+
+static void init_sdma_vm_v9(struct device_queue_manager *dqm, struct queue *q,
+			    struct qcm_process_device *qpd)
+{
+	/* Not needed on SDMAv4 any more */
+	q->properties.sdma_vm_addr = 0;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
new file mode 100644
index 000000000..b291ee0fa
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "kfd_device_queue_manager.h"
+#include "gca/gfx_8_0_enum.h"
+#include "gca/gfx_8_0_sh_mask.h"
+#include "oss/oss_3_0_sh_mask.h"
+
+static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
+				       struct qcm_process_device *qpd,
+				       enum cache_policy default_policy,
+				       enum cache_policy alternate_policy,
+				       void __user *alternate_aperture_base,
+				       uint64_t alternate_aperture_size);
+static int update_qpd_vi(struct device_queue_manager *dqm,
+			 struct qcm_process_device *qpd);
+static void init_sdma_vm(struct device_queue_manager *dqm,
+			 struct queue *q,
+			 struct qcm_process_device *qpd);
+
+void device_queue_manager_init_vi(
+	struct device_queue_manager_asic_ops *asic_ops)
+{
+	asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi;
+	asic_ops->update_qpd = update_qpd_vi;
+	asic_ops->init_sdma_vm = init_sdma_vm;
+	asic_ops->mqd_manager_init = mqd_manager_init_vi;
+}
+
+static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
+{
+	/* In 64-bit mode, we can only control the top 3 bits of the LDS,
+	 * scratch and GPUVM apertures.
+	 * The hardware fills in the remaining 59 bits according to the
+	 * following pattern:
+	 * LDS:		X0000000'00000000 - X0000001'00000000 (4GB)
+	 * Scratch:	X0000001'00000000 - X0000002'00000000 (4GB)
+	 * GPUVM:	Y0010000'00000000 - Y0020000'00000000 (1TB)
+	 *
+	 * (where X/Y is the configurable nybble with the low-bit 0)
+	 *
+	 * LDS and scratch will have the same top nybble programmed in the
+	 * top 3 bits of SH_MEM_BASES.PRIVATE_BASE.
+	 * GPUVM can have a different top nybble programmed in the
+	 * top 3 bits of SH_MEM_BASES.SHARED_BASE.
+	 * We don't bother to support different top nybbles
+	 * for LDS/Scratch and GPUVM.
+	 */
+
+	WARN_ON((top_address_nybble & 1) || top_address_nybble > 0xE ||
+		top_address_nybble == 0);
+
+	return top_address_nybble << 12 |
+			(top_address_nybble << 12) <<
+			SH_MEM_BASES__SHARED_BASE__SHIFT;
+}
+
+static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
+		struct qcm_process_device *qpd,
+		enum cache_policy default_policy,
+		enum cache_policy alternate_policy,
+		void __user *alternate_aperture_base,
+		uint64_t alternate_aperture_size)
+{
+	uint32_t default_mtype;
+	uint32_t ape1_mtype;
+
+	default_mtype = (default_policy == cache_policy_coherent) ?
+			MTYPE_UC :
+			MTYPE_NC;
+
+	ape1_mtype = (alternate_policy == cache_policy_coherent) ?
+			MTYPE_UC :
+			MTYPE_NC;
+
+	qpd->sh_mem_config =
+			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+				   SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
+			default_mtype << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
+			ape1_mtype << SH_MEM_CONFIG__APE1_MTYPE__SHIFT;
+
+	return true;
+}
+
+static int update_qpd_vi(struct device_queue_manager *dqm,
+			 struct qcm_process_device *qpd)
+{
+	struct kfd_process_device *pdd;
+	unsigned int temp;
+
+	pdd = qpd_to_pdd(qpd);
+
+	/* check if sh_mem_config register already configured */
+	if (qpd->sh_mem_config == 0) {
+		qpd->sh_mem_config =
+				SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+					SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
+				MTYPE_UC <<
+					SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
+				MTYPE_UC <<
+					SH_MEM_CONFIG__APE1_MTYPE__SHIFT;
+
+		qpd->sh_mem_ape1_limit = 0;
+		qpd->sh_mem_ape1_base = 0;
+	}
+
+	/* On dGPU we're always in GPUVM64 addressing mode with 64-bit
+	 * aperture addresses.
+	 */
+	temp = get_sh_mem_bases_nybble_64(pdd);
+	qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
+
+	pr_debug("sh_mem_bases nybble: 0x%X and register 0x%X\n",
+		temp, qpd->sh_mem_bases);
+
+	return 0;
+}
+
+static void init_sdma_vm(struct device_queue_manager *dqm,
+			 struct queue *q,
+			 struct qcm_process_device *qpd)
+{
+	/* On dGPU we're always in GPUVM64 addressing mode with 64-bit
+	 * aperture addresses.
+	 */
+	q->properties.sdma_vm_addr =
+		((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) <<
+		 SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) &
+		SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
new file mode 100644
index 000000000..05c74887f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -0,0 +1,303 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "kfd_priv.h"
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/idr.h>
+
+/*
+ * This extension supports a kernel level doorbells management for the
+ * kernel queues using the first doorbell page reserved for the kernel.
+ */
+
+/*
+ * Each device exposes a doorbell aperture, a PCI MMIO aperture that
+ * receives 32-bit writes that are passed to queues as wptr values.
+ * The doorbells are intended to be written by applications as part
+ * of queueing work on user-mode queues.
+ * We assign doorbells to applications in PAGE_SIZE-sized and aligned chunks.
+ * We map the doorbell address space into user-mode when a process creates
+ * its first queue on each device.
+ * Although the mapping is done by KFD, it is equivalent to an mmap of
+ * the /dev/kfd with the particular device encoded in the mmap offset.
+ * There will be other uses for mmap of /dev/kfd, so only a range of
+ * offsets (KFD_MMAP_DOORBELL_START-END) is used for doorbells.
+ */
+
+/* # of doorbell bytes allocated for each process. */
+size_t kfd_doorbell_process_slice(struct kfd_dev *kfd)
+{
+	if (!kfd->shared_resources.enable_mes)
+		return roundup(kfd->device_info.doorbell_size *
+				KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
+				PAGE_SIZE);
+	else
+		return amdgpu_mes_doorbell_process_slice(
+					(struct amdgpu_device *)kfd->adev);
+}
+
+/* Doorbell calculations for device init. */
+int kfd_doorbell_init(struct kfd_dev *kfd)
+{
+	int size = PAGE_SIZE;
+	int r;
+
+	/*
+	 * Todo: KFD kernel level operations need only one doorbell for
+	 * ring test/HWS. So instead of reserving a whole page here for
+	 * kernel, reserve and consume a doorbell from existing KGD kernel
+	 * doorbell page.
+	 */
+
+	/* Bitmap to dynamically allocate doorbells from kernel page */
+	kfd->doorbell_bitmap = bitmap_zalloc(size / sizeof(u32), GFP_KERNEL);
+	if (!kfd->doorbell_bitmap) {
+		DRM_ERROR("Failed to allocate kernel doorbell bitmap\n");
+		return -ENOMEM;
+	}
+
+	/* Alloc a doorbell page for KFD kernel usages */
+	r = amdgpu_bo_create_kernel(kfd->adev,
+				    size,
+				    PAGE_SIZE,
+				    AMDGPU_GEM_DOMAIN_DOORBELL,
+				    &kfd->doorbells,
+				    NULL,
+				    (void **)&kfd->doorbell_kernel_ptr);
+	if (r) {
+		pr_err("failed to allocate kernel doorbells\n");
+		bitmap_free(kfd->doorbell_bitmap);
+		return r;
+	}
+
+	pr_debug("Doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr);
+	return 0;
+}
+
+void kfd_doorbell_fini(struct kfd_dev *kfd)
+{
+	bitmap_free(kfd->doorbell_bitmap);
+	amdgpu_bo_free_kernel(&kfd->doorbells, NULL,
+			     (void **)&kfd->doorbell_kernel_ptr);
+}
+
+int kfd_doorbell_mmap(struct kfd_node *dev, struct kfd_process *process,
+		      struct vm_area_struct *vma)
+{
+	phys_addr_t address;
+	struct kfd_process_device *pdd;
+
+	/*
+	 * For simplicitly we only allow mapping of the entire doorbell
+	 * allocation of a single device & process.
+	 */
+	if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev->kfd))
+		return -EINVAL;
+
+	pdd = kfd_get_process_device_data(dev, process);
+	if (!pdd)
+		return -EINVAL;
+
+	/* Calculate physical address of doorbell */
+	address = kfd_get_process_doorbells(pdd);
+	if (!address)
+		return -ENOMEM;
+	vm_flags_set(vma, VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
+				VM_DONTDUMP | VM_PFNMAP);
+
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	pr_debug("Mapping doorbell page\n"
+		 "     target user address == 0x%08llX\n"
+		 "     physical address    == 0x%08llX\n"
+		 "     vm_flags            == 0x%04lX\n"
+		 "     size                == 0x%04lX\n",
+		 (unsigned long long) vma->vm_start, address, vma->vm_flags,
+		 kfd_doorbell_process_slice(dev->kfd));
+
+
+	return io_remap_pfn_range(vma,
+				vma->vm_start,
+				address >> PAGE_SHIFT,
+				kfd_doorbell_process_slice(dev->kfd),
+				vma->vm_page_prot);
+}
+
+
+/* get kernel iomem pointer for a doorbell */
+void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
+					unsigned int *doorbell_off)
+{
+	u32 inx;
+
+	mutex_lock(&kfd->doorbell_mutex);
+	inx = find_first_zero_bit(kfd->doorbell_bitmap, PAGE_SIZE / sizeof(u32));
+
+	__set_bit(inx, kfd->doorbell_bitmap);
+	mutex_unlock(&kfd->doorbell_mutex);
+
+	if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
+		return NULL;
+
+	*doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev,
+						     kfd->doorbells,
+						     inx,
+						     kfd->device_info.doorbell_size);
+	inx *= 2;
+
+	pr_debug("Get kernel queue doorbell\n"
+			"     doorbell offset   == 0x%08X\n"
+			"     doorbell index    == 0x%x\n",
+		*doorbell_off, inx);
+
+	return kfd->doorbell_kernel_ptr + inx;
+}
+
+void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
+{
+	unsigned int inx;
+
+	inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr);
+	inx /= 2;
+
+	mutex_lock(&kfd->doorbell_mutex);
+	__clear_bit(inx, kfd->doorbell_bitmap);
+	mutex_unlock(&kfd->doorbell_mutex);
+}
+
+void write_kernel_doorbell(void __iomem *db, u32 value)
+{
+	if (db) {
+		writel(value, db);
+		pr_debug("Writing %d to doorbell address %p\n", value, db);
+	}
+}
+
+void write_kernel_doorbell64(void __iomem *db, u64 value)
+{
+	if (db) {
+		WARN(((unsigned long)db & 7) != 0,
+		     "Unaligned 64-bit doorbell");
+		writeq(value, (u64 __iomem *)db);
+		pr_debug("writing %llu to doorbell address %p\n", value, db);
+	}
+}
+
+static int init_doorbell_bitmap(struct qcm_process_device *qpd,
+				struct kfd_dev *dev)
+{
+	unsigned int i;
+	int range_start = dev->shared_resources.non_cp_doorbells_start;
+	int range_end = dev->shared_resources.non_cp_doorbells_end;
+
+	if (!KFD_IS_SOC15(dev))
+		return 0;
+
+	/* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */
+	pr_debug("reserved doorbell 0x%03x - 0x%03x\n", range_start, range_end);
+	pr_debug("reserved doorbell 0x%03x - 0x%03x\n",
+			range_start + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
+			range_end + KFD_QUEUE_DOORBELL_MIRROR_OFFSET);
+
+	for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) {
+		if (i >= range_start && i <= range_end) {
+			__set_bit(i, qpd->doorbell_bitmap);
+			__set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
+				  qpd->doorbell_bitmap);
+		}
+	}
+
+	return 0;
+}
+
+phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd)
+{
+	struct amdgpu_device *adev = pdd->dev->adev;
+	uint32_t first_db_index;
+
+	if (!pdd->qpd.proc_doorbells) {
+		if (kfd_alloc_process_doorbells(pdd->dev->kfd, pdd))
+			/* phys_addr_t 0 is error */
+			return 0;
+	}
+
+	first_db_index = amdgpu_doorbell_index_on_bar(adev,
+						      pdd->qpd.proc_doorbells,
+						      0,
+						      pdd->dev->kfd->device_info.doorbell_size);
+	return adev->doorbell.base + first_db_index * sizeof(uint32_t);
+}
+
+int kfd_alloc_process_doorbells(struct kfd_dev *kfd, struct kfd_process_device *pdd)
+{
+	int r;
+	struct qcm_process_device *qpd = &pdd->qpd;
+
+	/* Allocate bitmap for dynamic doorbell allocation */
+	qpd->doorbell_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
+					     GFP_KERNEL);
+	if (!qpd->doorbell_bitmap) {
+		DRM_ERROR("Failed to allocate process doorbell bitmap\n");
+		return -ENOMEM;
+	}
+
+	r = init_doorbell_bitmap(&pdd->qpd, kfd);
+	if (r) {
+		DRM_ERROR("Failed to initialize process doorbells\n");
+		r = -ENOMEM;
+		goto err;
+	}
+
+	/* Allocate doorbells for this process */
+	r = amdgpu_bo_create_kernel(kfd->adev,
+				    kfd_doorbell_process_slice(kfd),
+				    PAGE_SIZE,
+				    AMDGPU_GEM_DOMAIN_DOORBELL,
+				    &qpd->proc_doorbells,
+				    NULL,
+				    NULL);
+	if (r) {
+		DRM_ERROR("Failed to allocate process doorbells\n");
+		goto err;
+	}
+	return 0;
+
+err:
+	bitmap_free(qpd->doorbell_bitmap);
+	qpd->doorbell_bitmap = NULL;
+	return r;
+}
+
+void kfd_free_process_doorbells(struct kfd_dev *kfd, struct kfd_process_device *pdd)
+{
+	struct qcm_process_device *qpd = &pdd->qpd;
+
+	if (qpd->doorbell_bitmap) {
+		bitmap_free(qpd->doorbell_bitmap);
+		qpd->doorbell_bitmap = NULL;
+	}
+
+	amdgpu_bo_free_kernel(&qpd->proc_doorbells, NULL, NULL);
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
new file mode 100644
index 000000000..0f58be651
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -0,0 +1,1327 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/mm_types.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
+#include <linux/uaccess.h>
+#include <linux/mman.h>
+#include <linux/memory.h>
+#include "kfd_priv.h"
+#include "kfd_events.h"
+#include <linux/device.h>
+
+/*
+ * Wrapper around wait_queue_entry_t
+ */
+struct kfd_event_waiter {
+	wait_queue_entry_t wait;
+	struct kfd_event *event; /* Event to wait for */
+	bool activated;		 /* Becomes true when event is signaled */
+	bool event_age_enabled;  /* set to true when last_event_age is non-zero */
+};
+
+/*
+ * Each signal event needs a 64-bit signal slot where the signaler will write
+ * a 1 before sending an interrupt. (This is needed because some interrupts
+ * do not contain enough spare data bits to identify an event.)
+ * We get whole pages and map them to the process VA.
+ * Individual signal events use their event_id as slot index.
+ */
+struct kfd_signal_page {
+	uint64_t *kernel_address;
+	uint64_t __user *user_address;
+	bool need_to_free_pages;
+};
+
+static uint64_t *page_slots(struct kfd_signal_page *page)
+{
+	return page->kernel_address;
+}
+
+static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p)
+{
+	void *backing_store;
+	struct kfd_signal_page *page;
+
+	page = kzalloc(sizeof(*page), GFP_KERNEL);
+	if (!page)
+		return NULL;
+
+	backing_store = (void *) __get_free_pages(GFP_KERNEL,
+					get_order(KFD_SIGNAL_EVENT_LIMIT * 8));
+	if (!backing_store)
+		goto fail_alloc_signal_store;
+
+	/* Initialize all events to unsignaled */
+	memset(backing_store, (uint8_t) UNSIGNALED_EVENT_SLOT,
+	       KFD_SIGNAL_EVENT_LIMIT * 8);
+
+	page->kernel_address = backing_store;
+	page->need_to_free_pages = true;
+	pr_debug("Allocated new event signal page at %p, for process %p\n",
+			page, p);
+
+	return page;
+
+fail_alloc_signal_store:
+	kfree(page);
+	return NULL;
+}
+
+static int allocate_event_notification_slot(struct kfd_process *p,
+					    struct kfd_event *ev,
+					    const int *restore_id)
+{
+	int id;
+
+	if (!p->signal_page) {
+		p->signal_page = allocate_signal_page(p);
+		if (!p->signal_page)
+			return -ENOMEM;
+		/* Oldest user mode expects 256 event slots */
+		p->signal_mapped_size = 256*8;
+	}
+
+	if (restore_id) {
+		id = idr_alloc(&p->event_idr, ev, *restore_id, *restore_id + 1,
+				GFP_KERNEL);
+	} else {
+		/*
+		 * Compatibility with old user mode: Only use signal slots
+		 * user mode has mapped, may be less than
+		 * KFD_SIGNAL_EVENT_LIMIT. This also allows future increase
+		 * of the event limit without breaking user mode.
+		 */
+		id = idr_alloc(&p->event_idr, ev, 0, p->signal_mapped_size / 8,
+				GFP_KERNEL);
+	}
+	if (id < 0)
+		return id;
+
+	ev->event_id = id;
+	page_slots(p->signal_page)[id] = UNSIGNALED_EVENT_SLOT;
+
+	return 0;
+}
+
+/*
+ * Assumes that p->event_mutex or rcu_readlock is held and of course that p is
+ * not going away.
+ */
+static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id)
+{
+	return idr_find(&p->event_idr, id);
+}
+
+/**
+ * lookup_signaled_event_by_partial_id - Lookup signaled event from partial ID
+ * @p:     Pointer to struct kfd_process
+ * @id:    ID to look up
+ * @bits:  Number of valid bits in @id
+ *
+ * Finds the first signaled event with a matching partial ID. If no
+ * matching signaled event is found, returns NULL. In that case the
+ * caller should assume that the partial ID is invalid and do an
+ * exhaustive search of all siglaned events.
+ *
+ * If multiple events with the same partial ID signal at the same
+ * time, they will be found one interrupt at a time, not necessarily
+ * in the same order the interrupts occurred. As long as the number of
+ * interrupts is correct, all signaled events will be seen by the
+ * driver.
+ */
+static struct kfd_event *lookup_signaled_event_by_partial_id(
+	struct kfd_process *p, uint32_t id, uint32_t bits)
+{
+	struct kfd_event *ev;
+
+	if (!p->signal_page || id >= KFD_SIGNAL_EVENT_LIMIT)
+		return NULL;
+
+	/* Fast path for the common case that @id is not a partial ID
+	 * and we only need a single lookup.
+	 */
+	if (bits > 31 || (1U << bits) >= KFD_SIGNAL_EVENT_LIMIT) {
+		if (page_slots(p->signal_page)[id] == UNSIGNALED_EVENT_SLOT)
+			return NULL;
+
+		return idr_find(&p->event_idr, id);
+	}
+
+	/* General case for partial IDs: Iterate over all matching IDs
+	 * and find the first one that has signaled.
+	 */
+	for (ev = NULL; id < KFD_SIGNAL_EVENT_LIMIT && !ev; id += 1U << bits) {
+		if (page_slots(p->signal_page)[id] == UNSIGNALED_EVENT_SLOT)
+			continue;
+
+		ev = idr_find(&p->event_idr, id);
+	}
+
+	return ev;
+}
+
+static int create_signal_event(struct file *devkfd, struct kfd_process *p,
+				struct kfd_event *ev, const int *restore_id)
+{
+	int ret;
+
+	if (p->signal_mapped_size &&
+	    p->signal_event_count == p->signal_mapped_size / 8) {
+		if (!p->signal_event_limit_reached) {
+			pr_debug("Signal event wasn't created because limit was reached\n");
+			p->signal_event_limit_reached = true;
+		}
+		return -ENOSPC;
+	}
+
+	ret = allocate_event_notification_slot(p, ev, restore_id);
+	if (ret) {
+		pr_warn("Signal event wasn't created because out of kernel memory\n");
+		return ret;
+	}
+
+	p->signal_event_count++;
+
+	ev->user_signal_address = &p->signal_page->user_address[ev->event_id];
+	pr_debug("Signal event number %zu created with id %d, address %p\n",
+			p->signal_event_count, ev->event_id,
+			ev->user_signal_address);
+
+	return 0;
+}
+
+static int create_other_event(struct kfd_process *p, struct kfd_event *ev, const int *restore_id)
+{
+	int id;
+
+	if (restore_id)
+		id = idr_alloc(&p->event_idr, ev, *restore_id, *restore_id + 1,
+			GFP_KERNEL);
+	else
+		/* Cast KFD_LAST_NONSIGNAL_EVENT to uint32_t. This allows an
+		 * intentional integer overflow to -1 without a compiler
+		 * warning. idr_alloc treats a negative value as "maximum
+		 * signed integer".
+		 */
+		id = idr_alloc(&p->event_idr, ev, KFD_FIRST_NONSIGNAL_EVENT_ID,
+				(uint32_t)KFD_LAST_NONSIGNAL_EVENT_ID + 1,
+				GFP_KERNEL);
+
+	if (id < 0)
+		return id;
+	ev->event_id = id;
+
+	return 0;
+}
+
+int kfd_event_init_process(struct kfd_process *p)
+{
+	int id;
+
+	mutex_init(&p->event_mutex);
+	idr_init(&p->event_idr);
+	p->signal_page = NULL;
+	p->signal_event_count = 1;
+	/* Allocate event ID 0. It is used for a fast path to ignore bogus events
+	 * that are sent by the CP without a context ID
+	 */
+	id = idr_alloc(&p->event_idr, NULL, 0, 1, GFP_KERNEL);
+	if (id < 0) {
+		idr_destroy(&p->event_idr);
+		mutex_destroy(&p->event_mutex);
+		return id;
+	}
+	return 0;
+}
+
+static void destroy_event(struct kfd_process *p, struct kfd_event *ev)
+{
+	struct kfd_event_waiter *waiter;
+
+	/* Wake up pending waiters. They will return failure */
+	spin_lock(&ev->lock);
+	list_for_each_entry(waiter, &ev->wq.head, wait.entry)
+		WRITE_ONCE(waiter->event, NULL);
+	wake_up_all(&ev->wq);
+	spin_unlock(&ev->lock);
+
+	if (ev->type == KFD_EVENT_TYPE_SIGNAL ||
+	    ev->type == KFD_EVENT_TYPE_DEBUG)
+		p->signal_event_count--;
+
+	idr_remove(&p->event_idr, ev->event_id);
+	kfree_rcu(ev, rcu);
+}
+
+static void destroy_events(struct kfd_process *p)
+{
+	struct kfd_event *ev;
+	uint32_t id;
+
+	idr_for_each_entry(&p->event_idr, ev, id)
+		if (ev)
+			destroy_event(p, ev);
+	idr_destroy(&p->event_idr);
+	mutex_destroy(&p->event_mutex);
+}
+
+/*
+ * We assume that the process is being destroyed and there is no need to
+ * unmap the pages or keep bookkeeping data in order.
+ */
+static void shutdown_signal_page(struct kfd_process *p)
+{
+	struct kfd_signal_page *page = p->signal_page;
+
+	if (page) {
+		if (page->need_to_free_pages)
+			free_pages((unsigned long)page->kernel_address,
+				   get_order(KFD_SIGNAL_EVENT_LIMIT * 8));
+		kfree(page);
+	}
+}
+
+void kfd_event_free_process(struct kfd_process *p)
+{
+	destroy_events(p);
+	shutdown_signal_page(p);
+}
+
+static bool event_can_be_gpu_signaled(const struct kfd_event *ev)
+{
+	return ev->type == KFD_EVENT_TYPE_SIGNAL ||
+					ev->type == KFD_EVENT_TYPE_DEBUG;
+}
+
+static bool event_can_be_cpu_signaled(const struct kfd_event *ev)
+{
+	return ev->type == KFD_EVENT_TYPE_SIGNAL;
+}
+
+static int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
+		       uint64_t size, uint64_t user_handle)
+{
+	struct kfd_signal_page *page;
+
+	if (p->signal_page)
+		return -EBUSY;
+
+	page = kzalloc(sizeof(*page), GFP_KERNEL);
+	if (!page)
+		return -ENOMEM;
+
+	/* Initialize all events to unsignaled */
+	memset(kernel_address, (uint8_t) UNSIGNALED_EVENT_SLOT,
+	       KFD_SIGNAL_EVENT_LIMIT * 8);
+
+	page->kernel_address = kernel_address;
+
+	p->signal_page = page;
+	p->signal_mapped_size = size;
+	p->signal_handle = user_handle;
+	return 0;
+}
+
+int kfd_kmap_event_page(struct kfd_process *p, uint64_t event_page_offset)
+{
+	struct kfd_node *kfd;
+	struct kfd_process_device *pdd;
+	void *mem, *kern_addr;
+	uint64_t size;
+	int err = 0;
+
+	if (p->signal_page) {
+		pr_err("Event page is already set\n");
+		return -EINVAL;
+	}
+
+	pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(event_page_offset));
+	if (!pdd) {
+		pr_err("Getting device by id failed in %s\n", __func__);
+		return -EINVAL;
+	}
+	kfd = pdd->dev;
+
+	pdd = kfd_bind_process_to_device(kfd, p);
+	if (IS_ERR(pdd))
+		return PTR_ERR(pdd);
+
+	mem = kfd_process_device_translate_handle(pdd,
+			GET_IDR_HANDLE(event_page_offset));
+	if (!mem) {
+		pr_err("Can't find BO, offset is 0x%llx\n", event_page_offset);
+		return -EINVAL;
+	}
+
+	err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(mem, &kern_addr, &size);
+	if (err) {
+		pr_err("Failed to map event page to kernel\n");
+		return err;
+	}
+
+	err = kfd_event_page_set(p, kern_addr, size, event_page_offset);
+	if (err) {
+		pr_err("Failed to set event page\n");
+		amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem);
+		return err;
+	}
+	return err;
+}
+
+int kfd_event_create(struct file *devkfd, struct kfd_process *p,
+		     uint32_t event_type, bool auto_reset, uint32_t node_id,
+		     uint32_t *event_id, uint32_t *event_trigger_data,
+		     uint64_t *event_page_offset, uint32_t *event_slot_index)
+{
+	int ret = 0;
+	struct kfd_event *ev = kzalloc(sizeof(*ev), GFP_KERNEL);
+
+	if (!ev)
+		return -ENOMEM;
+
+	ev->type = event_type;
+	ev->auto_reset = auto_reset;
+	ev->signaled = false;
+
+	spin_lock_init(&ev->lock);
+	init_waitqueue_head(&ev->wq);
+
+	*event_page_offset = 0;
+
+	mutex_lock(&p->event_mutex);
+
+	switch (event_type) {
+	case KFD_EVENT_TYPE_SIGNAL:
+	case KFD_EVENT_TYPE_DEBUG:
+		ret = create_signal_event(devkfd, p, ev, NULL);
+		if (!ret) {
+			*event_page_offset = KFD_MMAP_TYPE_EVENTS;
+			*event_slot_index = ev->event_id;
+		}
+		break;
+	default:
+		ret = create_other_event(p, ev, NULL);
+		break;
+	}
+
+	if (!ret) {
+		*event_id = ev->event_id;
+		*event_trigger_data = ev->event_id;
+		ev->event_age = 1;
+	} else {
+		kfree(ev);
+	}
+
+	mutex_unlock(&p->event_mutex);
+
+	return ret;
+}
+
+int kfd_criu_restore_event(struct file *devkfd,
+			   struct kfd_process *p,
+			   uint8_t __user *user_priv_ptr,
+			   uint64_t *priv_data_offset,
+			   uint64_t max_priv_data_size)
+{
+	struct kfd_criu_event_priv_data *ev_priv;
+	struct kfd_event *ev = NULL;
+	int ret = 0;
+
+	ev_priv = kmalloc(sizeof(*ev_priv), GFP_KERNEL);
+	if (!ev_priv)
+		return -ENOMEM;
+
+	ev = kzalloc(sizeof(*ev), GFP_KERNEL);
+	if (!ev) {
+		ret = -ENOMEM;
+		goto exit;
+	}
+
+	if (*priv_data_offset + sizeof(*ev_priv) > max_priv_data_size) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	ret = copy_from_user(ev_priv, user_priv_ptr + *priv_data_offset, sizeof(*ev_priv));
+	if (ret) {
+		ret = -EFAULT;
+		goto exit;
+	}
+	*priv_data_offset += sizeof(*ev_priv);
+
+	if (ev_priv->user_handle) {
+		ret = kfd_kmap_event_page(p, ev_priv->user_handle);
+		if (ret)
+			goto exit;
+	}
+
+	ev->type = ev_priv->type;
+	ev->auto_reset = ev_priv->auto_reset;
+	ev->signaled = ev_priv->signaled;
+
+	spin_lock_init(&ev->lock);
+	init_waitqueue_head(&ev->wq);
+
+	mutex_lock(&p->event_mutex);
+	switch (ev->type) {
+	case KFD_EVENT_TYPE_SIGNAL:
+	case KFD_EVENT_TYPE_DEBUG:
+		ret = create_signal_event(devkfd, p, ev, &ev_priv->event_id);
+		break;
+	case KFD_EVENT_TYPE_MEMORY:
+		memcpy(&ev->memory_exception_data,
+			&ev_priv->memory_exception_data,
+			sizeof(struct kfd_hsa_memory_exception_data));
+
+		ret = create_other_event(p, ev, &ev_priv->event_id);
+		break;
+	case KFD_EVENT_TYPE_HW_EXCEPTION:
+		memcpy(&ev->hw_exception_data,
+			&ev_priv->hw_exception_data,
+			sizeof(struct kfd_hsa_hw_exception_data));
+
+		ret = create_other_event(p, ev, &ev_priv->event_id);
+		break;
+	}
+	mutex_unlock(&p->event_mutex);
+
+exit:
+	if (ret)
+		kfree(ev);
+
+	kfree(ev_priv);
+
+	return ret;
+}
+
+int kfd_criu_checkpoint_events(struct kfd_process *p,
+			 uint8_t __user *user_priv_data,
+			 uint64_t *priv_data_offset)
+{
+	struct kfd_criu_event_priv_data *ev_privs;
+	int i = 0;
+	int ret =  0;
+	struct kfd_event *ev;
+	uint32_t ev_id;
+
+	uint32_t num_events = kfd_get_num_events(p);
+
+	if (!num_events)
+		return 0;
+
+	ev_privs = kvzalloc(num_events * sizeof(*ev_privs), GFP_KERNEL);
+	if (!ev_privs)
+		return -ENOMEM;
+
+
+	idr_for_each_entry(&p->event_idr, ev, ev_id) {
+		struct kfd_criu_event_priv_data *ev_priv;
+
+		/*
+		 * Currently, all events have same size of private_data, but the current ioctl's
+		 * and CRIU plugin supports private_data of variable sizes
+		 */
+		ev_priv = &ev_privs[i];
+
+		ev_priv->object_type = KFD_CRIU_OBJECT_TYPE_EVENT;
+
+		/* We store the user_handle with the first event */
+		if (i == 0 && p->signal_page)
+			ev_priv->user_handle = p->signal_handle;
+
+		ev_priv->event_id = ev->event_id;
+		ev_priv->auto_reset = ev->auto_reset;
+		ev_priv->type = ev->type;
+		ev_priv->signaled = ev->signaled;
+
+		if (ev_priv->type == KFD_EVENT_TYPE_MEMORY)
+			memcpy(&ev_priv->memory_exception_data,
+				&ev->memory_exception_data,
+				sizeof(struct kfd_hsa_memory_exception_data));
+		else if (ev_priv->type == KFD_EVENT_TYPE_HW_EXCEPTION)
+			memcpy(&ev_priv->hw_exception_data,
+				&ev->hw_exception_data,
+				sizeof(struct kfd_hsa_hw_exception_data));
+
+		pr_debug("Checkpointed event[%d] id = 0x%08x auto_reset = %x type = %x signaled = %x\n",
+			  i,
+			  ev_priv->event_id,
+			  ev_priv->auto_reset,
+			  ev_priv->type,
+			  ev_priv->signaled);
+		i++;
+	}
+
+	ret = copy_to_user(user_priv_data + *priv_data_offset,
+			   ev_privs, num_events * sizeof(*ev_privs));
+	if (ret) {
+		pr_err("Failed to copy events priv to user\n");
+		ret = -EFAULT;
+	}
+
+	*priv_data_offset += num_events * sizeof(*ev_privs);
+
+	kvfree(ev_privs);
+	return ret;
+}
+
+int kfd_get_num_events(struct kfd_process *p)
+{
+	struct kfd_event *ev;
+	uint32_t id;
+	u32 num_events = 0;
+
+	idr_for_each_entry(&p->event_idr, ev, id)
+		num_events++;
+
+	return num_events;
+}
+
+/* Assumes that p is current. */
+int kfd_event_destroy(struct kfd_process *p, uint32_t event_id)
+{
+	struct kfd_event *ev;
+	int ret = 0;
+
+	mutex_lock(&p->event_mutex);
+
+	ev = lookup_event_by_id(p, event_id);
+
+	if (ev)
+		destroy_event(p, ev);
+	else
+		ret = -EINVAL;
+
+	mutex_unlock(&p->event_mutex);
+	return ret;
+}
+
+static void set_event(struct kfd_event *ev)
+{
+	struct kfd_event_waiter *waiter;
+
+	/* Auto reset if the list is non-empty and we're waking
+	 * someone. waitqueue_active is safe here because we're
+	 * protected by the ev->lock, which is also held when
+	 * updating the wait queues in kfd_wait_on_events.
+	 */
+	ev->signaled = !ev->auto_reset || !waitqueue_active(&ev->wq);
+	if (!(++ev->event_age)) {
+		/* Never wrap back to reserved/default event age 0/1 */
+		ev->event_age = 2;
+		WARN_ONCE(1, "event_age wrap back!");
+	}
+
+	list_for_each_entry(waiter, &ev->wq.head, wait.entry)
+		WRITE_ONCE(waiter->activated, true);
+
+	wake_up_all(&ev->wq);
+}
+
+/* Assumes that p is current. */
+int kfd_set_event(struct kfd_process *p, uint32_t event_id)
+{
+	int ret = 0;
+	struct kfd_event *ev;
+
+	rcu_read_lock();
+
+	ev = lookup_event_by_id(p, event_id);
+	if (!ev) {
+		ret = -EINVAL;
+		goto unlock_rcu;
+	}
+	spin_lock(&ev->lock);
+
+	if (event_can_be_cpu_signaled(ev))
+		set_event(ev);
+	else
+		ret = -EINVAL;
+
+	spin_unlock(&ev->lock);
+unlock_rcu:
+	rcu_read_unlock();
+	return ret;
+}
+
+static void reset_event(struct kfd_event *ev)
+{
+	ev->signaled = false;
+}
+
+/* Assumes that p is current. */
+int kfd_reset_event(struct kfd_process *p, uint32_t event_id)
+{
+	int ret = 0;
+	struct kfd_event *ev;
+
+	rcu_read_lock();
+
+	ev = lookup_event_by_id(p, event_id);
+	if (!ev) {
+		ret = -EINVAL;
+		goto unlock_rcu;
+	}
+	spin_lock(&ev->lock);
+
+	if (event_can_be_cpu_signaled(ev))
+		reset_event(ev);
+	else
+		ret = -EINVAL;
+
+	spin_unlock(&ev->lock);
+unlock_rcu:
+	rcu_read_unlock();
+	return ret;
+
+}
+
+static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev)
+{
+	WRITE_ONCE(page_slots(p->signal_page)[ev->event_id], UNSIGNALED_EVENT_SLOT);
+}
+
+static void set_event_from_interrupt(struct kfd_process *p,
+					struct kfd_event *ev)
+{
+	if (ev && event_can_be_gpu_signaled(ev)) {
+		acknowledge_signal(p, ev);
+		spin_lock(&ev->lock);
+		set_event(ev);
+		spin_unlock(&ev->lock);
+	}
+}
+
+void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
+				uint32_t valid_id_bits)
+{
+	struct kfd_event *ev = NULL;
+
+	/*
+	 * Because we are called from arbitrary context (workqueue) as opposed
+	 * to process context, kfd_process could attempt to exit while we are
+	 * running so the lookup function increments the process ref count.
+	 */
+	struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+
+	if (!p)
+		return; /* Presumably process exited. */
+
+	rcu_read_lock();
+
+	if (valid_id_bits)
+		ev = lookup_signaled_event_by_partial_id(p, partial_id,
+							 valid_id_bits);
+	if (ev) {
+		set_event_from_interrupt(p, ev);
+	} else if (p->signal_page) {
+		/*
+		 * Partial ID lookup failed. Assume that the event ID
+		 * in the interrupt payload was invalid and do an
+		 * exhaustive search of signaled events.
+		 */
+		uint64_t *slots = page_slots(p->signal_page);
+		uint32_t id;
+
+		if (valid_id_bits)
+			pr_debug_ratelimited("Partial ID invalid: %u (%u valid bits)\n",
+					     partial_id, valid_id_bits);
+
+		if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT / 64) {
+			/* With relatively few events, it's faster to
+			 * iterate over the event IDR
+			 */
+			idr_for_each_entry(&p->event_idr, ev, id) {
+				if (id >= KFD_SIGNAL_EVENT_LIMIT)
+					break;
+
+				if (READ_ONCE(slots[id]) != UNSIGNALED_EVENT_SLOT)
+					set_event_from_interrupt(p, ev);
+			}
+		} else {
+			/* With relatively many events, it's faster to
+			 * iterate over the signal slots and lookup
+			 * only signaled events from the IDR.
+			 */
+			for (id = 1; id < KFD_SIGNAL_EVENT_LIMIT; id++)
+				if (READ_ONCE(slots[id]) != UNSIGNALED_EVENT_SLOT) {
+					ev = lookup_event_by_id(p, id);
+					set_event_from_interrupt(p, ev);
+				}
+		}
+	}
+
+	rcu_read_unlock();
+	kfd_unref_process(p);
+}
+
+static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events)
+{
+	struct kfd_event_waiter *event_waiters;
+	uint32_t i;
+
+	event_waiters = kcalloc(num_events, sizeof(struct kfd_event_waiter),
+				GFP_KERNEL);
+	if (!event_waiters)
+		return NULL;
+
+	for (i = 0; i < num_events; i++)
+		init_wait(&event_waiters[i].wait);
+
+	return event_waiters;
+}
+
+static int init_event_waiter(struct kfd_process *p,
+		struct kfd_event_waiter *waiter,
+		struct kfd_event_data *event_data)
+{
+	struct kfd_event *ev = lookup_event_by_id(p, event_data->event_id);
+
+	if (!ev)
+		return -EINVAL;
+
+	spin_lock(&ev->lock);
+	waiter->event = ev;
+	waiter->activated = ev->signaled;
+	ev->signaled = ev->signaled && !ev->auto_reset;
+
+	/* last_event_age = 0 reserved for backward compatible */
+	if (waiter->event->type == KFD_EVENT_TYPE_SIGNAL &&
+		event_data->signal_event_data.last_event_age) {
+		waiter->event_age_enabled = true;
+		if (ev->event_age != event_data->signal_event_data.last_event_age)
+			waiter->activated = true;
+	}
+
+	if (!waiter->activated)
+		add_wait_queue(&ev->wq, &waiter->wait);
+	spin_unlock(&ev->lock);
+
+	return 0;
+}
+
+/* test_event_condition - Test condition of events being waited for
+ * @all:           Return completion only if all events have signaled
+ * @num_events:    Number of events to wait for
+ * @event_waiters: Array of event waiters, one per event
+ *
+ * Returns KFD_IOC_WAIT_RESULT_COMPLETE if all (or one) event(s) have
+ * signaled. Returns KFD_IOC_WAIT_RESULT_TIMEOUT if no (or not all)
+ * events have signaled. Returns KFD_IOC_WAIT_RESULT_FAIL if any of
+ * the events have been destroyed.
+ */
+static uint32_t test_event_condition(bool all, uint32_t num_events,
+				struct kfd_event_waiter *event_waiters)
+{
+	uint32_t i;
+	uint32_t activated_count = 0;
+
+	for (i = 0; i < num_events; i++) {
+		if (!READ_ONCE(event_waiters[i].event))
+			return KFD_IOC_WAIT_RESULT_FAIL;
+
+		if (READ_ONCE(event_waiters[i].activated)) {
+			if (!all)
+				return KFD_IOC_WAIT_RESULT_COMPLETE;
+
+			activated_count++;
+		}
+	}
+
+	return activated_count == num_events ?
+		KFD_IOC_WAIT_RESULT_COMPLETE : KFD_IOC_WAIT_RESULT_TIMEOUT;
+}
+
+/*
+ * Copy event specific data, if defined.
+ * Currently only memory exception events have additional data to copy to user
+ */
+static int copy_signaled_event_data(uint32_t num_events,
+		struct kfd_event_waiter *event_waiters,
+		struct kfd_event_data __user *data)
+{
+	void *src;
+	void __user *dst;
+	struct kfd_event_waiter *waiter;
+	struct kfd_event *event;
+	uint32_t i, size = 0;
+
+	for (i = 0; i < num_events; i++) {
+		waiter = &event_waiters[i];
+		event = waiter->event;
+		if (!event)
+			return -EINVAL; /* event was destroyed */
+		if (waiter->activated) {
+			if (event->type == KFD_EVENT_TYPE_MEMORY) {
+				dst = &data[i].memory_exception_data;
+				src = &event->memory_exception_data;
+				size = sizeof(struct kfd_hsa_memory_exception_data);
+			} else if (event->type == KFD_EVENT_TYPE_SIGNAL &&
+				waiter->event_age_enabled) {
+				dst = &data[i].signal_event_data.last_event_age;
+				src = &event->event_age;
+				size = sizeof(u64);
+			}
+			if (size && copy_to_user(dst, src, size))
+				return -EFAULT;
+		}
+	}
+
+	return 0;
+}
+
+static long user_timeout_to_jiffies(uint32_t user_timeout_ms)
+{
+	if (user_timeout_ms == KFD_EVENT_TIMEOUT_IMMEDIATE)
+		return 0;
+
+	if (user_timeout_ms == KFD_EVENT_TIMEOUT_INFINITE)
+		return MAX_SCHEDULE_TIMEOUT;
+
+	/*
+	 * msecs_to_jiffies interprets all values above 2^31-1 as infinite,
+	 * but we consider them finite.
+	 * This hack is wrong, but nobody is likely to notice.
+	 */
+	user_timeout_ms = min_t(uint32_t, user_timeout_ms, 0x7FFFFFFF);
+
+	return msecs_to_jiffies(user_timeout_ms) + 1;
+}
+
+static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters,
+			 bool undo_auto_reset)
+{
+	uint32_t i;
+
+	for (i = 0; i < num_events; i++)
+		if (waiters[i].event) {
+			spin_lock(&waiters[i].event->lock);
+			remove_wait_queue(&waiters[i].event->wq,
+					  &waiters[i].wait);
+			if (undo_auto_reset && waiters[i].activated &&
+			    waiters[i].event && waiters[i].event->auto_reset)
+				set_event(waiters[i].event);
+			spin_unlock(&waiters[i].event->lock);
+		}
+
+	kfree(waiters);
+}
+
+int kfd_wait_on_events(struct kfd_process *p,
+		       uint32_t num_events, void __user *data,
+		       bool all, uint32_t *user_timeout_ms,
+		       uint32_t *wait_result)
+{
+	struct kfd_event_data __user *events =
+			(struct kfd_event_data __user *) data;
+	uint32_t i;
+	int ret = 0;
+
+	struct kfd_event_waiter *event_waiters = NULL;
+	long timeout = user_timeout_to_jiffies(*user_timeout_ms);
+
+	event_waiters = alloc_event_waiters(num_events);
+	if (!event_waiters) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	/* Use p->event_mutex here to protect against concurrent creation and
+	 * destruction of events while we initialize event_waiters.
+	 */
+	mutex_lock(&p->event_mutex);
+
+	for (i = 0; i < num_events; i++) {
+		struct kfd_event_data event_data;
+
+		if (copy_from_user(&event_data, &events[i],
+				sizeof(struct kfd_event_data))) {
+			ret = -EFAULT;
+			goto out_unlock;
+		}
+
+		ret = init_event_waiter(p, &event_waiters[i], &event_data);
+		if (ret)
+			goto out_unlock;
+	}
+
+	/* Check condition once. */
+	*wait_result = test_event_condition(all, num_events, event_waiters);
+	if (*wait_result == KFD_IOC_WAIT_RESULT_COMPLETE) {
+		ret = copy_signaled_event_data(num_events,
+					       event_waiters, events);
+		goto out_unlock;
+	} else if (WARN_ON(*wait_result == KFD_IOC_WAIT_RESULT_FAIL)) {
+		/* This should not happen. Events shouldn't be
+		 * destroyed while we're holding the event_mutex
+		 */
+		goto out_unlock;
+	}
+
+	mutex_unlock(&p->event_mutex);
+
+	while (true) {
+		if (fatal_signal_pending(current)) {
+			ret = -EINTR;
+			break;
+		}
+
+		if (signal_pending(current)) {
+			ret = -ERESTARTSYS;
+			if (*user_timeout_ms != KFD_EVENT_TIMEOUT_IMMEDIATE &&
+			    *user_timeout_ms != KFD_EVENT_TIMEOUT_INFINITE)
+				*user_timeout_ms = jiffies_to_msecs(
+					max(0l, timeout-1));
+			break;
+		}
+
+		/* Set task state to interruptible sleep before
+		 * checking wake-up conditions. A concurrent wake-up
+		 * will put the task back into runnable state. In that
+		 * case schedule_timeout will not put the task to
+		 * sleep and we'll get a chance to re-check the
+		 * updated conditions almost immediately. Otherwise,
+		 * this race condition would lead to a soft hang or a
+		 * very long sleep.
+		 */
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		*wait_result = test_event_condition(all, num_events,
+						    event_waiters);
+		if (*wait_result != KFD_IOC_WAIT_RESULT_TIMEOUT)
+			break;
+
+		if (timeout <= 0)
+			break;
+
+		timeout = schedule_timeout(timeout);
+	}
+	__set_current_state(TASK_RUNNING);
+
+	mutex_lock(&p->event_mutex);
+	/* copy_signaled_event_data may sleep. So this has to happen
+	 * after the task state is set back to RUNNING.
+	 *
+	 * The event may also have been destroyed after signaling. So
+	 * copy_signaled_event_data also must confirm that the event
+	 * still exists. Therefore this must be under the p->event_mutex
+	 * which is also held when events are destroyed.
+	 */
+	if (!ret && *wait_result == KFD_IOC_WAIT_RESULT_COMPLETE)
+		ret = copy_signaled_event_data(num_events,
+					       event_waiters, events);
+
+out_unlock:
+	free_waiters(num_events, event_waiters, ret == -ERESTARTSYS);
+	mutex_unlock(&p->event_mutex);
+out:
+	if (ret)
+		*wait_result = KFD_IOC_WAIT_RESULT_FAIL;
+	else if (*wait_result == KFD_IOC_WAIT_RESULT_FAIL)
+		ret = -EIO;
+
+	return ret;
+}
+
+int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma)
+{
+	unsigned long pfn;
+	struct kfd_signal_page *page;
+	int ret;
+
+	/* check required size doesn't exceed the allocated size */
+	if (get_order(KFD_SIGNAL_EVENT_LIMIT * 8) <
+			get_order(vma->vm_end - vma->vm_start)) {
+		pr_err("Event page mmap requested illegal size\n");
+		return -EINVAL;
+	}
+
+	page = p->signal_page;
+	if (!page) {
+		/* Probably KFD bug, but mmap is user-accessible. */
+		pr_debug("Signal page could not be found\n");
+		return -EINVAL;
+	}
+
+	pfn = __pa(page->kernel_address);
+	pfn >>= PAGE_SHIFT;
+
+	vm_flags_set(vma, VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE
+		       | VM_DONTDUMP | VM_PFNMAP);
+
+	pr_debug("Mapping signal page\n");
+	pr_debug("     start user address  == 0x%08lx\n", vma->vm_start);
+	pr_debug("     end user address    == 0x%08lx\n", vma->vm_end);
+	pr_debug("     pfn                 == 0x%016lX\n", pfn);
+	pr_debug("     vm_flags            == 0x%08lX\n", vma->vm_flags);
+	pr_debug("     size                == 0x%08lX\n",
+			vma->vm_end - vma->vm_start);
+
+	page->user_address = (uint64_t __user *)vma->vm_start;
+
+	/* mapping the page to user process */
+	ret = remap_pfn_range(vma, vma->vm_start, pfn,
+			vma->vm_end - vma->vm_start, vma->vm_page_prot);
+	if (!ret)
+		p->signal_mapped_size = vma->vm_end - vma->vm_start;
+
+	return ret;
+}
+
+/*
+ * Assumes that p is not going away.
+ */
+static void lookup_events_by_type_and_signal(struct kfd_process *p,
+		int type, void *event_data)
+{
+	struct kfd_hsa_memory_exception_data *ev_data;
+	struct kfd_event *ev;
+	uint32_t id;
+	bool send_signal = true;
+
+	ev_data = (struct kfd_hsa_memory_exception_data *) event_data;
+
+	rcu_read_lock();
+
+	id = KFD_FIRST_NONSIGNAL_EVENT_ID;
+	idr_for_each_entry_continue(&p->event_idr, ev, id)
+		if (ev->type == type) {
+			send_signal = false;
+			dev_dbg(kfd_device,
+					"Event found: id %X type %d",
+					ev->event_id, ev->type);
+			spin_lock(&ev->lock);
+			set_event(ev);
+			if (ev->type == KFD_EVENT_TYPE_MEMORY && ev_data)
+				ev->memory_exception_data = *ev_data;
+			spin_unlock(&ev->lock);
+		}
+
+	if (type == KFD_EVENT_TYPE_MEMORY) {
+		dev_warn(kfd_device,
+			"Sending SIGSEGV to process %d (pasid 0x%x)",
+				p->lead_thread->pid, p->pasid);
+		send_sig(SIGSEGV, p->lead_thread, 0);
+	}
+
+	/* Send SIGTERM no event of type "type" has been found*/
+	if (send_signal) {
+		if (send_sigterm) {
+			dev_warn(kfd_device,
+				"Sending SIGTERM to process %d (pasid 0x%x)",
+					p->lead_thread->pid, p->pasid);
+			send_sig(SIGTERM, p->lead_thread, 0);
+		} else {
+			dev_err(kfd_device,
+				"Process %d (pasid 0x%x) got unhandled exception",
+				p->lead_thread->pid, p->pasid);
+		}
+	}
+
+	rcu_read_unlock();
+}
+
+void kfd_signal_hw_exception_event(u32 pasid)
+{
+	/*
+	 * Because we are called from arbitrary context (workqueue) as opposed
+	 * to process context, kfd_process could attempt to exit while we are
+	 * running so the lookup function increments the process ref count.
+	 */
+	struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+
+	if (!p)
+		return; /* Presumably process exited. */
+
+	lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_HW_EXCEPTION, NULL);
+	kfd_unref_process(p);
+}
+
+void kfd_signal_vm_fault_event(struct kfd_node *dev, u32 pasid,
+				struct kfd_vm_fault_info *info,
+				struct kfd_hsa_memory_exception_data *data)
+{
+	struct kfd_event *ev;
+	uint32_t id;
+	struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+	struct kfd_hsa_memory_exception_data memory_exception_data;
+	int user_gpu_id;
+
+	if (!p)
+		return; /* Presumably process exited. */
+
+	user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id);
+	if (unlikely(user_gpu_id == -EINVAL)) {
+		WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id);
+		return;
+	}
+
+	/* SoC15 chips and onwards will pass in data from now on. */
+	if (!data) {
+		memset(&memory_exception_data, 0, sizeof(memory_exception_data));
+		memory_exception_data.gpu_id = user_gpu_id;
+		memory_exception_data.failure.imprecise = true;
+
+		/* Set failure reason */
+		if (info) {
+			memory_exception_data.va = (info->page_addr) <<
+								PAGE_SHIFT;
+			memory_exception_data.failure.NotPresent =
+				info->prot_valid ? 1 : 0;
+			memory_exception_data.failure.NoExecute =
+				info->prot_exec ? 1 : 0;
+			memory_exception_data.failure.ReadOnly =
+				info->prot_write ? 1 : 0;
+			memory_exception_data.failure.imprecise = 0;
+		}
+	}
+
+	rcu_read_lock();
+
+	id = KFD_FIRST_NONSIGNAL_EVENT_ID;
+	idr_for_each_entry_continue(&p->event_idr, ev, id)
+		if (ev->type == KFD_EVENT_TYPE_MEMORY) {
+			spin_lock(&ev->lock);
+			ev->memory_exception_data = data ? *data :
+							memory_exception_data;
+			set_event(ev);
+			spin_unlock(&ev->lock);
+		}
+
+	rcu_read_unlock();
+	kfd_unref_process(p);
+}
+
+void kfd_signal_reset_event(struct kfd_node *dev)
+{
+	struct kfd_hsa_hw_exception_data hw_exception_data;
+	struct kfd_hsa_memory_exception_data memory_exception_data;
+	struct kfd_process *p;
+	struct kfd_event *ev;
+	unsigned int temp;
+	uint32_t id, idx;
+	int reset_cause = atomic_read(&dev->sram_ecc_flag) ?
+			KFD_HW_EXCEPTION_ECC :
+			KFD_HW_EXCEPTION_GPU_HANG;
+
+	/* Whole gpu reset caused by GPU hang and memory is lost */
+	memset(&hw_exception_data, 0, sizeof(hw_exception_data));
+	hw_exception_data.memory_lost = 1;
+	hw_exception_data.reset_cause = reset_cause;
+
+	memset(&memory_exception_data, 0, sizeof(memory_exception_data));
+	memory_exception_data.ErrorType = KFD_MEM_ERR_SRAM_ECC;
+	memory_exception_data.failure.imprecise = true;
+
+	idx = srcu_read_lock(&kfd_processes_srcu);
+	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
+		int user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id);
+
+		if (unlikely(user_gpu_id == -EINVAL)) {
+			WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id);
+			continue;
+		}
+
+		rcu_read_lock();
+
+		id = KFD_FIRST_NONSIGNAL_EVENT_ID;
+		idr_for_each_entry_continue(&p->event_idr, ev, id) {
+			if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
+				spin_lock(&ev->lock);
+				ev->hw_exception_data = hw_exception_data;
+				ev->hw_exception_data.gpu_id = user_gpu_id;
+				set_event(ev);
+				spin_unlock(&ev->lock);
+			}
+			if (ev->type == KFD_EVENT_TYPE_MEMORY &&
+			    reset_cause == KFD_HW_EXCEPTION_ECC) {
+				spin_lock(&ev->lock);
+				ev->memory_exception_data = memory_exception_data;
+				ev->memory_exception_data.gpu_id = user_gpu_id;
+				set_event(ev);
+				spin_unlock(&ev->lock);
+			}
+		}
+
+		rcu_read_unlock();
+	}
+	srcu_read_unlock(&kfd_processes_srcu, idx);
+}
+
+void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid)
+{
+	struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+	struct kfd_hsa_memory_exception_data memory_exception_data;
+	struct kfd_hsa_hw_exception_data hw_exception_data;
+	struct kfd_event *ev;
+	uint32_t id = KFD_FIRST_NONSIGNAL_EVENT_ID;
+	int user_gpu_id;
+
+	if (!p)
+		return; /* Presumably process exited. */
+
+	user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id);
+	if (unlikely(user_gpu_id == -EINVAL)) {
+		WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", dev->id);
+		return;
+	}
+
+	memset(&hw_exception_data, 0, sizeof(hw_exception_data));
+	hw_exception_data.gpu_id = user_gpu_id;
+	hw_exception_data.memory_lost = 1;
+	hw_exception_data.reset_cause = KFD_HW_EXCEPTION_ECC;
+
+	memset(&memory_exception_data, 0, sizeof(memory_exception_data));
+	memory_exception_data.ErrorType = KFD_MEM_ERR_POISON_CONSUMED;
+	memory_exception_data.gpu_id = user_gpu_id;
+	memory_exception_data.failure.imprecise = true;
+
+	rcu_read_lock();
+
+	idr_for_each_entry_continue(&p->event_idr, ev, id) {
+		if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
+			spin_lock(&ev->lock);
+			ev->hw_exception_data = hw_exception_data;
+			set_event(ev);
+			spin_unlock(&ev->lock);
+		}
+
+		if (ev->type == KFD_EVENT_TYPE_MEMORY) {
+			spin_lock(&ev->lock);
+			ev->memory_exception_data = memory_exception_data;
+			set_event(ev);
+			spin_unlock(&ev->lock);
+		}
+	}
+
+	rcu_read_unlock();
+
+	/* user application will handle SIGBUS signal */
+	send_sig(SIGBUS, p->lead_thread, 0);
+
+	kfd_unref_process(p);
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
new file mode 100644
index 000000000..52ccfd397
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef KFD_EVENTS_H_INCLUDED
+#define KFD_EVENTS_H_INCLUDED
+
+#include <linux/kernel.h>
+#include <linux/hashtable.h>
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/wait.h>
+#include "kfd_priv.h"
+#include <uapi/linux/kfd_ioctl.h>
+
+/*
+ * IDR supports non-negative integer IDs. Small IDs are used for
+ * signal events to match their signal slot. Use the upper half of the
+ * ID space for non-signal events.
+ */
+#define KFD_FIRST_NONSIGNAL_EVENT_ID ((INT_MAX >> 1) + 1)
+#define KFD_LAST_NONSIGNAL_EVENT_ID INT_MAX
+
+/*
+ * Written into kfd_signal_slot_t to indicate that the event is not signaled.
+ * Since the event protocol may need to write the event ID into memory, this
+ * must not be a valid event ID.
+ * For the sake of easy memset-ing, this must be a byte pattern.
+ */
+#define UNSIGNALED_EVENT_SLOT ((uint64_t)-1)
+
+struct kfd_event_waiter;
+struct signal_page;
+
+struct kfd_event {
+	u32 event_id;
+	u64 event_age;
+
+	bool signaled;
+	bool auto_reset;
+
+	int type;
+
+	spinlock_t lock;
+	wait_queue_head_t wq; /* List of event waiters. */
+
+	/* Only for signal events. */
+	uint64_t __user *user_signal_address;
+
+	/* type specific data */
+	union {
+		struct kfd_hsa_memory_exception_data memory_exception_data;
+		struct kfd_hsa_hw_exception_data hw_exception_data;
+	};
+
+	struct rcu_head rcu; /* for asynchronous kfree_rcu */
+};
+
+#define KFD_EVENT_TIMEOUT_IMMEDIATE 0
+#define KFD_EVENT_TIMEOUT_INFINITE 0xFFFFFFFFu
+
+/* Matching HSA_EVENTTYPE */
+#define KFD_EVENT_TYPE_SIGNAL 0
+#define KFD_EVENT_TYPE_HW_EXCEPTION 3
+#define KFD_EVENT_TYPE_DEBUG 5
+#define KFD_EVENT_TYPE_MEMORY 8
+
+extern void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
+				       uint32_t valid_id_bits);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
new file mode 100644
index 000000000..6604a3f99
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -0,0 +1,431 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/export.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/compat.h>
+#include <uapi/linux/kfd_ioctl.h>
+#include <linux/time.h>
+#include "kfd_priv.h"
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/processor.h>
+
+/*
+ * The primary memory I/O features being added for revisions of gfxip
+ * beyond 7.0 (Kaveri) are:
+ *
+ * Access to ATC/IOMMU mapped memory w/ associated extension of VA to 48b
+ *
+ * “Flat” shader memory access – These are new shader vector memory
+ * operations that do not reference a T#/V# so a “pointer” is what is
+ * sourced from the vector gprs for direct access to memory.
+ * This pointer space has the Shared(LDS) and Private(Scratch) memory
+ * mapped into this pointer space as apertures.
+ * The hardware then determines how to direct the memory request
+ * based on what apertures the request falls in.
+ *
+ * Unaligned support and alignment check
+ *
+ *
+ * System Unified Address - SUA
+ *
+ * The standard usage for GPU virtual addresses are that they are mapped by
+ * a set of page tables we call GPUVM and these page tables are managed by
+ * a combination of vidMM/driver software components.  The current virtual
+ * address (VA) range for GPUVM is 40b.
+ *
+ * As of gfxip7.1 and beyond we’re adding the ability for compute memory
+ * clients (CP/RLC, DMA, SHADER(ifetch, scalar, and vector ops)) to access
+ * the same page tables used by host x86 processors and that are managed by
+ * the operating system. This is via a technique and hardware called ATC/IOMMU.
+ * The GPU has the capability of accessing both the GPUVM and ATC address
+ * spaces for a given VMID (process) simultaneously and we call this feature
+ * system unified address (SUA).
+ *
+ * There are three fundamental address modes of operation for a given VMID
+ * (process) on the GPU:
+ *
+ *	HSA64 – 64b pointers and the default address space is ATC
+ *	HSA32 – 32b pointers and the default address space is ATC
+ *	GPUVM – 64b pointers and the default address space is GPUVM (driver
+ *		model mode)
+ *
+ *
+ * HSA64 - ATC/IOMMU 64b
+ *
+ * A 64b pointer in the AMD64/IA64 CPU architecture is not fully utilized
+ * by the CPU so an AMD CPU can only access the high area
+ * (VA[63:47] == 0x1FFFF) and low area (VA[63:47 == 0) of the address space
+ * so the actual VA carried to translation is 48b.  There is a “hole” in
+ * the middle of the 64b VA space.
+ *
+ * The GPU not only has access to all of the CPU accessible address space via
+ * ATC/IOMMU, but it also has access to the GPUVM address space.  The “system
+ * unified address” feature (SUA) is the mapping of GPUVM and ATC address
+ * spaces into a unified pointer space.  The method we take for 64b mode is
+ * to map the full 40b GPUVM address space into the hole of the 64b address
+ * space.
+
+ * The GPUVM_Base/GPUVM_Limit defines the aperture in the 64b space where we
+ * direct requests to be translated via GPUVM page tables instead of the
+ * IOMMU path.
+ *
+ *
+ * 64b to 49b Address conversion
+ *
+ * Note that there are still significant portions of unused regions (holes)
+ * in the 64b address space even for the GPU.  There are several places in
+ * the pipeline (sw and hw), we wish to compress the 64b virtual address
+ * to a 49b address.  This 49b address is constituted of an “ATC” bit
+ * plus a 48b virtual address.  This 49b address is what is passed to the
+ * translation hardware.  ATC==0 means the 48b address is a GPUVM address
+ * (max of 2^40 – 1) intended to be translated via GPUVM page tables.
+ * ATC==1 means the 48b address is intended to be translated via IOMMU
+ * page tables.
+ *
+ * A 64b pointer is compared to the apertures that are defined (Base/Limit), in
+ * this case the GPUVM aperture (red) is defined and if a pointer falls in this
+ * aperture, we subtract the GPUVM_Base address and set the ATC bit to zero
+ * as part of the 64b to 49b conversion.
+ *
+ * Where this 64b to 49b conversion is done is a function of the usage.
+ * Most GPU memory access is via memory objects where the driver builds
+ * a descriptor which consists of a base address and a memory access by
+ * the GPU usually consists of some kind of an offset or Cartesian coordinate
+ * that references this memory descriptor.  This is the case for shader
+ * instructions that reference the T# or V# constants, or for specified
+ * locations of assets (ex. the shader program location).  In these cases
+ * the driver is what handles the 64b to 49b conversion and the base
+ * address in the descriptor (ex. V# or T# or shader program location)
+ * is defined as a 48b address w/ an ATC bit.  For this usage a given
+ * memory object cannot straddle multiple apertures in the 64b address
+ * space. For example a shader program cannot jump in/out between ATC
+ * and GPUVM space.
+ *
+ * In some cases we wish to pass a 64b pointer to the GPU hardware and
+ * the GPU hw does the 64b to 49b conversion before passing memory
+ * requests to the cache/memory system.  This is the case for the
+ * S_LOAD and FLAT_* shader memory instructions where we have 64b pointers
+ * in scalar and vector GPRs respectively.
+ *
+ * In all cases (no matter where the 64b -> 49b conversion is done), the gfxip
+ * hardware sends a 48b address along w/ an ATC bit, to the memory controller
+ * on the memory request interfaces.
+ *
+ *	<client>_MC_rdreq_atc   // read request ATC bit
+ *
+ *		0 : <client>_MC_rdreq_addr is a GPUVM VA
+ *
+ *		1 : <client>_MC_rdreq_addr is a ATC VA
+ *
+ *
+ * “Spare” aperture (APE1)
+ *
+ * We use the GPUVM aperture to differentiate ATC vs. GPUVM, but we also use
+ * apertures to set the Mtype field for S_LOAD/FLAT_* ops which is input to the
+ * config tables for setting cache policies. The “spare” (APE1) aperture is
+ * motivated by getting a different Mtype from the default.
+ * The default aperture isn’t an actual base/limit aperture; it is just the
+ * address space that doesn’t hit any defined base/limit apertures.
+ * The following diagram is a complete picture of the gfxip7.x SUA apertures.
+ * The APE1 can be placed either below or above
+ * the hole (cannot be in the hole).
+ *
+ *
+ * General Aperture definitions and rules
+ *
+ * An aperture register definition consists of a Base, Limit, Mtype, and
+ * usually an ATC bit indicating which translation tables that aperture uses.
+ * In all cases (for SUA and DUA apertures discussed later), aperture base
+ * and limit definitions are 64KB aligned.
+ *
+ *	<ape>_Base[63:0] = { <ape>_Base_register[63:16], 0x0000 }
+ *
+ *	<ape>_Limit[63:0] = { <ape>_Limit_register[63:16], 0xFFFF }
+ *
+ * The base and limit are considered inclusive to an aperture so being
+ * inside an aperture means (address >= Base) AND (address <= Limit).
+ *
+ * In no case is a payload that straddles multiple apertures expected to work.
+ * For example a load_dword_x4 that starts in one aperture and ends in another,
+ * does not work.  For the vector FLAT_* ops we have detection capability in
+ * the shader for reporting a “memory violation” back to the
+ * SQ block for use in traps.
+ * A memory violation results when an op falls into the hole,
+ * or a payload straddles multiple apertures.  The S_LOAD instruction
+ * does not have this detection.
+ *
+ * Apertures cannot overlap.
+ *
+ *
+ *
+ * HSA32 - ATC/IOMMU 32b
+ *
+ * For HSA32 mode, the pointers are interpreted as 32 bits and use a single GPR
+ * instead of two for the S_LOAD and FLAT_* ops. The entire GPUVM space of 40b
+ * will not fit so there is only partial visibility to the GPUVM
+ * space (defined by the aperture) for S_LOAD and FLAT_* ops.
+ * There is no spare (APE1) aperture for HSA32 mode.
+ *
+ *
+ * GPUVM 64b mode (driver model)
+ *
+ * This mode is related to HSA64 in that the difference really is that
+ * the default aperture is GPUVM (ATC==0) and not ATC space.
+ * We have gfxip7.x hardware that has FLAT_* and S_LOAD support for
+ * SUA GPUVM mode, but does not support HSA32/HSA64.
+ *
+ *
+ * Device Unified Address - DUA
+ *
+ * Device unified address (DUA) is the name of the feature that maps the
+ * Shared(LDS) memory and Private(Scratch) memory into the overall address
+ * space for use by the new FLAT_* vector memory ops.  The Shared and
+ * Private memories are mapped as apertures into the address space,
+ * and the hardware detects when a FLAT_* memory request is to be redirected
+ * to the LDS or Scratch memory when it falls into one of these apertures.
+ * Like the SUA apertures, the Shared/Private apertures are 64KB aligned and
+ * the base/limit is “in” the aperture. For both HSA64 and GPUVM SUA modes,
+ * the Shared/Private apertures are always placed in a limited selection of
+ * options in the hole of the 64b address space. For HSA32 mode, the
+ * Shared/Private apertures can be placed anywhere in the 32b space
+ * except at 0.
+ *
+ *
+ * HSA64 Apertures for FLAT_* vector ops
+ *
+ * For HSA64 SUA mode, the Shared and Private apertures are always placed
+ * in the hole w/ a limited selection of possible locations. The requests
+ * that fall in the private aperture are expanded as a function of the
+ * work-item id (tid) and redirected to the location of the
+ * “hidden private memory”. The hidden private can be placed in either GPUVM
+ * or ATC space. The addresses that fall in the shared aperture are
+ * re-directed to the on-chip LDS memory hardware.
+ *
+ *
+ * HSA32 Apertures for FLAT_* vector ops
+ *
+ * In HSA32 mode, the Private and Shared apertures can be placed anywhere
+ * in the 32b space except at 0 (Private or Shared Base at zero disables
+ * the apertures). If the base address of the apertures are non-zero
+ * (ie apertures exists), the size is always 64KB.
+ *
+ *
+ * GPUVM Apertures for FLAT_* vector ops
+ *
+ * In GPUVM mode, the Shared/Private apertures are specified identically
+ * to HSA64 mode where they are always in the hole at a limited selection
+ * of locations.
+ *
+ *
+ * Aperture Definitions for SUA and DUA
+ *
+ * The interpretation of the aperture register definitions for a given
+ * VMID is a function of the “SUA Mode” which is one of HSA64, HSA32, or
+ * GPUVM64 discussed in previous sections. The mode is first decoded, and
+ * then the remaining register decode is a function of the mode.
+ *
+ *
+ * SUA Mode Decode
+ *
+ * For the S_LOAD and FLAT_* shader operations, the SUA mode is decoded from
+ * the COMPUTE_DISPATCH_INITIATOR:DATA_ATC bit and
+ * the SH_MEM_CONFIG:PTR32 bits.
+ *
+ * COMPUTE_DISPATCH_INITIATOR:DATA_ATC    SH_MEM_CONFIG:PTR32        Mode
+ *
+ * 1                                              0                  HSA64
+ *
+ * 1                                              1                  HSA32
+ *
+ * 0                                              X                 GPUVM64
+ *
+ * In general the hardware will ignore the PTR32 bit and treat
+ * as “0” whenever DATA_ATC = “0”, but sw should set PTR32=0
+ * when DATA_ATC=0.
+ *
+ * The DATA_ATC bit is only set for compute dispatches.
+ * All “Draw” dispatches are hardcoded to GPUVM64 mode
+ * for FLAT_* / S_LOAD operations.
+ */
+
+#define MAKE_GPUVM_APP_BASE_VI(gpu_num) \
+	(((uint64_t)(gpu_num) << 61) + 0x1000000000000L)
+
+#define MAKE_GPUVM_APP_LIMIT(base, size) \
+	(((uint64_t)(base) & 0xFFFFFF0000000000UL) + (size) - 1)
+
+#define MAKE_SCRATCH_APP_BASE_VI() \
+	(((uint64_t)(0x1UL) << 61) + 0x100000000L)
+
+#define MAKE_SCRATCH_APP_LIMIT(base) \
+	(((uint64_t)base & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
+
+#define MAKE_LDS_APP_BASE_VI() \
+	(((uint64_t)(0x1UL) << 61) + 0x0)
+#define MAKE_LDS_APP_LIMIT(base) \
+	(((uint64_t)(base) & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
+
+/* On GFXv9 the LDS and scratch apertures are programmed independently
+ * using the high 16 bits of the 64-bit virtual address. They must be
+ * in the hole, which will be the case as long as the high 16 bits are
+ * not 0.
+ *
+ * The aperture sizes are still 4GB implicitly.
+ *
+ * A GPUVM aperture is not applicable on GFXv9.
+ */
+#define MAKE_LDS_APP_BASE_V9() ((uint64_t)(0x1UL) << 48)
+#define MAKE_SCRATCH_APP_BASE_V9() ((uint64_t)(0x2UL) << 48)
+
+/* User mode manages most of the SVM aperture address space. The low
+ * 16MB are reserved for kernel use (CWSR trap handler and kernel IB
+ * for now).
+ */
+#define SVM_USER_BASE (u64)(KFD_CWSR_TBA_TMA_SIZE + 2*PAGE_SIZE)
+#define SVM_CWSR_BASE (SVM_USER_BASE - KFD_CWSR_TBA_TMA_SIZE)
+#define SVM_IB_BASE   (SVM_CWSR_BASE - PAGE_SIZE)
+
+static void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id)
+{
+	/*
+	 * node id couldn't be 0 - the three MSB bits of
+	 * aperture shouldn't be 0
+	 */
+	pdd->lds_base = MAKE_LDS_APP_BASE_VI();
+	pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
+
+	/* dGPUs: SVM aperture starting at 0
+	 * with small reserved space for kernel.
+	 * Set them to CANONICAL addresses.
+	 */
+	pdd->gpuvm_base = SVM_USER_BASE;
+	pdd->gpuvm_limit =
+		pdd->dev->kfd->shared_resources.gpuvm_size - 1;
+
+	pdd->scratch_base = MAKE_SCRATCH_APP_BASE_VI();
+	pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
+}
+
+static void kfd_init_apertures_v9(struct kfd_process_device *pdd, uint8_t id)
+{
+	pdd->lds_base = MAKE_LDS_APP_BASE_V9();
+	pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
+
+        /* Raven needs SVM to support graphic handle, etc. Leave the small
+         * reserved space before SVM on Raven as well, even though we don't
+         * have to.
+         * Set gpuvm_base and gpuvm_limit to CANONICAL addresses so that they
+         * are used in Thunk to reserve SVM.
+         */
+        pdd->gpuvm_base = SVM_USER_BASE;
+	pdd->gpuvm_limit =
+		pdd->dev->kfd->shared_resources.gpuvm_size - 1;
+
+	pdd->scratch_base = MAKE_SCRATCH_APP_BASE_V9();
+	pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
+}
+
+int kfd_init_apertures(struct kfd_process *process)
+{
+	uint8_t id  = 0;
+	struct kfd_node *dev;
+	struct kfd_process_device *pdd;
+
+	/*Iterating over all devices*/
+	while (kfd_topology_enum_kfd_devices(id, &dev) == 0) {
+		if (!dev || kfd_devcgroup_check_permission(dev)) {
+			/* Skip non GPU devices and devices to which the
+			 * current process have no access to. Access can be
+			 * limited by placing the process in a specific
+			 * cgroup hierarchy
+			 */
+			id++;
+			continue;
+		}
+
+		pdd = kfd_create_process_device_data(dev, process);
+		if (!pdd) {
+			pr_err("Failed to create process device data\n");
+			return -ENOMEM;
+		}
+		/*
+		 * For 64 bit process apertures will be statically reserved in
+		 * the x86_64 non canonical process address space
+		 * amdkfd doesn't currently support apertures for 32 bit process
+		 */
+		if (process->is_32bit_user_mode) {
+			pdd->lds_base = pdd->lds_limit = 0;
+			pdd->gpuvm_base = pdd->gpuvm_limit = 0;
+			pdd->scratch_base = pdd->scratch_limit = 0;
+		} else {
+			switch (dev->adev->asic_type) {
+			case CHIP_KAVERI:
+			case CHIP_HAWAII:
+			case CHIP_CARRIZO:
+			case CHIP_TONGA:
+			case CHIP_FIJI:
+			case CHIP_POLARIS10:
+			case CHIP_POLARIS11:
+			case CHIP_POLARIS12:
+			case CHIP_VEGAM:
+				kfd_init_apertures_vi(pdd, id);
+				break;
+			default:
+				if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1))
+					kfd_init_apertures_v9(pdd, id);
+				else {
+					WARN(1, "Unexpected ASIC family %u",
+					     dev->adev->asic_type);
+					return -EINVAL;
+				}
+			}
+
+                        /* dGPUs: the reserved space for kernel
+                         * before SVM
+                         */
+                        pdd->qpd.cwsr_base = SVM_CWSR_BASE;
+                        pdd->qpd.ib_base = SVM_IB_BASE;
+		}
+
+		dev_dbg(kfd_device, "node id %u\n", id);
+		dev_dbg(kfd_device, "gpu id %u\n", pdd->dev->id);
+		dev_dbg(kfd_device, "lds_base %llX\n", pdd->lds_base);
+		dev_dbg(kfd_device, "lds_limit %llX\n", pdd->lds_limit);
+		dev_dbg(kfd_device, "gpuvm_base %llX\n", pdd->gpuvm_base);
+		dev_dbg(kfd_device, "gpuvm_limit %llX\n", pdd->gpuvm_limit);
+		dev_dbg(kfd_device, "scratch_base %llX\n", pdd->scratch_base);
+		dev_dbg(kfd_device, "scratch_limit %llX\n", pdd->scratch_limit);
+
+		id++;
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
new file mode 100644
index 000000000..a7697ec81
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v10.c
@@ -0,0 +1,405 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "kfd_events.h"
+#include "kfd_debug.h"
+#include "soc15_int.h"
+#include "kfd_device_queue_manager.h"
+
+/*
+ * GFX10 SQ Interrupts
+ *
+ * There are 3 encoding types of interrupts sourced from SQ sent as a 44-bit
+ * packet to the Interrupt Handler:
+ * Auto - Generated by the SQG (various cmd overflows, timestamps etc)
+ * Wave - Generated by S_SENDMSG through a shader program
+ * Error - HW generated errors (Illegal instructions, Memviols, EDC etc)
+ *
+ * The 44-bit packet is mapped as {context_id1[7:0],context_id0[31:0]} plus
+ * 4-bits for VMID (SOC15_VMID_FROM_IH_ENTRY) as such:
+ *
+ * - context_id1[7:6]
+ * Encoding type (0 = Auto, 1 = Wave, 2 = Error)
+ *
+ * - context_id0[24]
+ * PRIV bit indicates that Wave S_SEND or error occurred within trap
+ *
+ * - context_id0[22:0]
+ * 23-bit data with the following layout per encoding type:
+ * Auto - only context_id0[8:0] is used, which reports various interrupts
+ * generated by SQG.  The rest is 0.
+ * Wave - user data sent from m0 via S_SENDMSG
+ * Error - Error type (context_id0[22:19]), Error Details (rest of bits)
+ *
+ * The other context_id bits show coordinates (SE/SH/CU/SIMD/WGP) for wave
+ * S_SENDMSG and Errors.  These are 0 for Auto.
+ */
+
+enum SQ_INTERRUPT_WORD_ENCODING {
+	SQ_INTERRUPT_WORD_ENCODING_AUTO = 0x0,
+	SQ_INTERRUPT_WORD_ENCODING_INST,
+	SQ_INTERRUPT_WORD_ENCODING_ERROR,
+};
+
+enum SQ_INTERRUPT_ERROR_TYPE {
+	SQ_INTERRUPT_ERROR_TYPE_EDC_FUE = 0x0,
+	SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST,
+	SQ_INTERRUPT_ERROR_TYPE_MEMVIOL,
+	SQ_INTERRUPT_ERROR_TYPE_EDC_FED,
+};
+
+/* SQ_INTERRUPT_WORD_AUTO_CTXID */
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE__SHIFT 0
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__WLT__SHIFT 1
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_BUF0_FULL__SHIFT 2
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_BUF1_FULL__SHIFT 3
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_UTC_ERROR__SHIFT 7
+#define SQ_INTERRUPT_WORD_AUTO_CTXID1__SE_ID__SHIFT 4
+#define SQ_INTERRUPT_WORD_AUTO_CTXID1__ENCODING__SHIFT 6
+
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_MASK 0x00000001
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__WLT_MASK 0x00000002
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_BUF0_FULL_MASK 0x00000004
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_BUF1_FULL_MASK 0x00000008
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_UTC_ERROR_MASK 0x00000080
+#define SQ_INTERRUPT_WORD_AUTO_CTXID1__SE_ID_MASK 0x030
+#define SQ_INTERRUPT_WORD_AUTO_CTXID1__ENCODING_MASK 0x0c0
+
+/* SQ_INTERRUPT_WORD_WAVE_CTXID */
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__DATA__SHIFT 0
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__SA_ID__SHIFT 23
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__PRIV__SHIFT 24
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__WAVE_ID__SHIFT 25
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__SIMD_ID__SHIFT 30
+#define SQ_INTERRUPT_WORD_WAVE_CTXID1__WGP_ID__SHIFT 0
+#define SQ_INTERRUPT_WORD_WAVE_CTXID1__SE_ID__SHIFT 4
+#define SQ_INTERRUPT_WORD_WAVE_CTXID1__ENCODING__SHIFT 6
+
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__DATA_MASK 0x000007fffff
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__SA_ID_MASK 0x0000800000
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__PRIV_MASK 0x00001000000
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__WAVE_ID_MASK 0x0003e000000
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__SIMD_ID_MASK 0x000c0000000
+#define SQ_INTERRUPT_WORD_WAVE_CTXID1__WGP_ID_MASK 0x00f
+#define SQ_INTERRUPT_WORD_WAVE_CTXID1__SE_ID_MASK 0x030
+#define SQ_INTERRUPT_WORD_WAVE_CTXID1__ENCODING_MASK 0x0c0
+
+#define KFD_CTXID0__ERR_TYPE_MASK 0x780000
+#define KFD_CTXID0__ERR_TYPE__SHIFT 19
+
+/* GFX10 SQ interrupt ENC type bit (context_id1[7:6]) for wave s_sendmsg */
+#define KFD_CONTEXT_ID1_ENC_TYPE_WAVE_MASK	0x40
+/* GFX10 SQ interrupt PRIV bit (context_id0[24]) for s_sendmsg inside trap */
+#define KFD_CONTEXT_ID0_PRIV_MASK		0x1000000
+/*
+ * The debugger will send user data(m0) with PRIV=1 to indicate it requires
+ * notification from the KFD with the following queue id (DOORBELL_ID) and
+ * trap code (TRAP_CODE).
+ */
+#define KFD_CONTEXT_ID0_DEBUG_DOORBELL_MASK	0x0003ff
+#define KFD_CONTEXT_ID0_DEBUG_TRAP_CODE_SHIFT	10
+#define KFD_CONTEXT_ID0_DEBUG_TRAP_CODE_MASK	0x07fc00
+#define KFD_DEBUG_DOORBELL_ID(ctxid0)	((ctxid0) &	\
+				KFD_CONTEXT_ID0_DEBUG_DOORBELL_MASK)
+#define KFD_DEBUG_TRAP_CODE(ctxid0)	(((ctxid0) &	\
+				KFD_CONTEXT_ID0_DEBUG_TRAP_CODE_MASK)	\
+				>> KFD_CONTEXT_ID0_DEBUG_TRAP_CODE_SHIFT)
+#define KFD_DEBUG_CP_BAD_OP_ECODE_MASK		0x3fffc00
+#define KFD_DEBUG_CP_BAD_OP_ECODE_SHIFT		10
+#define KFD_DEBUG_CP_BAD_OP_ECODE(ctxid0) (((ctxid0) &			\
+				KFD_DEBUG_CP_BAD_OP_ECODE_MASK)		\
+				>> KFD_DEBUG_CP_BAD_OP_ECODE_SHIFT)
+
+static void event_interrupt_poison_consumption(struct kfd_node *dev,
+				uint16_t pasid, uint16_t client_id)
+{
+	int old_poison, ret = -EINVAL;
+	struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+
+	if (!p)
+		return;
+
+	/* all queues of a process will be unmapped in one time */
+	old_poison = atomic_cmpxchg(&p->poison, 0, 1);
+	kfd_unref_process(p);
+	if (old_poison)
+		return;
+
+	switch (client_id) {
+	case SOC15_IH_CLIENTID_SE0SH:
+	case SOC15_IH_CLIENTID_SE1SH:
+	case SOC15_IH_CLIENTID_SE2SH:
+	case SOC15_IH_CLIENTID_SE3SH:
+	case SOC15_IH_CLIENTID_UTCL2:
+		ret = kfd_dqm_evict_pasid(dev->dqm, pasid);
+		break;
+	case SOC15_IH_CLIENTID_SDMA0:
+	case SOC15_IH_CLIENTID_SDMA1:
+	case SOC15_IH_CLIENTID_SDMA2:
+	case SOC15_IH_CLIENTID_SDMA3:
+	case SOC15_IH_CLIENTID_SDMA4:
+		break;
+	default:
+		break;
+	}
+
+	kfd_signal_poison_consumed_event(dev, pasid);
+
+	/* resetting queue passes, do page retirement without gpu reset
+	 * resetting queue fails, fallback to gpu reset solution
+	 */
+	if (!ret) {
+		dev_warn(dev->adev->dev,
+			"RAS poison consumption, unmap queue flow succeeded: client id %d\n",
+			client_id);
+		amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, false);
+	} else {
+		dev_warn(dev->adev->dev,
+			"RAS poison consumption, fall back to gpu reset flow: client id %d\n",
+			client_id);
+		amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, true);
+	}
+}
+
+static bool event_interrupt_isr_v10(struct kfd_node *dev,
+					const uint32_t *ih_ring_entry,
+					uint32_t *patched_ihre,
+					bool *patched_flag)
+{
+	uint16_t source_id, client_id, pasid, vmid;
+	const uint32_t *data = ih_ring_entry;
+
+	source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry);
+	client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry);
+
+	/* Only handle interrupts from KFD VMIDs */
+	vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry);
+	if (!KFD_IRQ_IS_FENCE(client_id, source_id) &&
+	   (vmid < dev->vm_info.first_vmid_kfd ||
+	    vmid > dev->vm_info.last_vmid_kfd))
+		return false;
+
+	pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry);
+
+	/* Only handle clients we care about */
+	if (client_id != SOC15_IH_CLIENTID_GRBM_CP &&
+	    client_id != SOC15_IH_CLIENTID_SDMA0 &&
+	    client_id != SOC15_IH_CLIENTID_SDMA1 &&
+	    client_id != SOC15_IH_CLIENTID_SDMA2 &&
+	    client_id != SOC15_IH_CLIENTID_SDMA3 &&
+	    client_id != SOC15_IH_CLIENTID_SDMA4 &&
+	    client_id != SOC15_IH_CLIENTID_SDMA5 &&
+	    client_id != SOC15_IH_CLIENTID_SDMA6 &&
+	    client_id != SOC15_IH_CLIENTID_SDMA7 &&
+	    client_id != SOC15_IH_CLIENTID_VMC &&
+	    client_id != SOC15_IH_CLIENTID_VMC1 &&
+	    client_id != SOC15_IH_CLIENTID_UTCL2 &&
+	    client_id != SOC15_IH_CLIENTID_SE0SH &&
+	    client_id != SOC15_IH_CLIENTID_SE1SH &&
+	    client_id != SOC15_IH_CLIENTID_SE2SH &&
+	    client_id != SOC15_IH_CLIENTID_SE3SH)
+		return false;
+
+	pr_debug("client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n",
+		 client_id, source_id, vmid, pasid);
+	pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
+		 data[0], data[1], data[2], data[3],
+		 data[4], data[5], data[6], data[7]);
+
+	/* If there is no valid PASID, it's likely a bug */
+	if (WARN_ONCE(pasid == 0, "Bug: No PASID in KFD interrupt"))
+		return 0;
+
+	/* Interrupt types we care about: various signals and faults.
+	 * They will be forwarded to a work queue (see below).
+	 */
+	return source_id == SOC15_INTSRC_CP_END_OF_PIPE ||
+		source_id == SOC15_INTSRC_SDMA_TRAP ||
+		source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG ||
+		source_id == SOC15_INTSRC_CP_BAD_OPCODE ||
+		client_id == SOC15_IH_CLIENTID_VMC ||
+		client_id == SOC15_IH_CLIENTID_VMC1 ||
+		client_id == SOC15_IH_CLIENTID_UTCL2 ||
+		KFD_IRQ_IS_FENCE(client_id, source_id);
+}
+
+static void event_interrupt_wq_v10(struct kfd_node *dev,
+					const uint32_t *ih_ring_entry)
+{
+	uint16_t source_id, client_id, pasid, vmid;
+	uint32_t context_id0, context_id1;
+	uint32_t encoding, sq_intr_err_type;
+
+	source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry);
+	client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry);
+	pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry);
+	vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry);
+	context_id0 = SOC15_CONTEXT_ID0_FROM_IH_ENTRY(ih_ring_entry);
+	context_id1 = SOC15_CONTEXT_ID1_FROM_IH_ENTRY(ih_ring_entry);
+
+	if (client_id == SOC15_IH_CLIENTID_GRBM_CP ||
+	    client_id == SOC15_IH_CLIENTID_SE0SH ||
+	    client_id == SOC15_IH_CLIENTID_SE1SH ||
+	    client_id == SOC15_IH_CLIENTID_SE2SH ||
+	    client_id == SOC15_IH_CLIENTID_SE3SH) {
+		if (source_id == SOC15_INTSRC_CP_END_OF_PIPE)
+			kfd_signal_event_interrupt(pasid, context_id0, 32);
+		else if (source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG) {
+			encoding = REG_GET_FIELD(context_id1,
+						SQ_INTERRUPT_WORD_WAVE_CTXID1, ENCODING);
+			switch (encoding) {
+			case SQ_INTERRUPT_WORD_ENCODING_AUTO:
+				pr_debug_ratelimited(
+					"sq_intr: auto, se %d, ttrace %d, wlt %d, ttrac_buf0_full %d, ttrac_buf1_full %d, ttrace_utc_err %d\n",
+					REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_AUTO_CTXID1,
+							SE_ID),
+					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+							THREAD_TRACE),
+					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+							WLT),
+					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+							THREAD_TRACE_BUF0_FULL),
+					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+							THREAD_TRACE_BUF1_FULL),
+					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0,
+							THREAD_TRACE_UTC_ERROR));
+				break;
+			case SQ_INTERRUPT_WORD_ENCODING_INST:
+				pr_debug_ratelimited("sq_intr: inst, se %d, data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n",
+					REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1,
+							SE_ID),
+					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
+							DATA),
+					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
+							SA_ID),
+					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
+							PRIV),
+					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
+							WAVE_ID),
+					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
+							SIMD_ID),
+					REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1,
+							WGP_ID));
+				if (context_id0 & SQ_INTERRUPT_WORD_WAVE_CTXID0__PRIV_MASK) {
+					if (kfd_set_dbg_ev_from_interrupt(dev, pasid,
+							KFD_DEBUG_DOORBELL_ID(context_id0),
+							KFD_DEBUG_TRAP_CODE(context_id0),
+							NULL, 0))
+						return;
+				}
+				break;
+			case SQ_INTERRUPT_WORD_ENCODING_ERROR:
+				sq_intr_err_type = REG_GET_FIELD(context_id0, KFD_CTXID0,
+								ERR_TYPE);
+				pr_warn_ratelimited("sq_intr: error, se %d, data 0x%x, sa %d, priv %d, wave_id %d, simd_id %d, wgp_id %d, err_type %d\n",
+					REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1,
+							SE_ID),
+					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
+							DATA),
+					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
+							SA_ID),
+					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
+							PRIV),
+					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
+							WAVE_ID),
+					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0,
+							SIMD_ID),
+					REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1,
+							WGP_ID),
+					sq_intr_err_type);
+				if (sq_intr_err_type != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST &&
+					sq_intr_err_type != SQ_INTERRUPT_ERROR_TYPE_MEMVIOL) {
+					event_interrupt_poison_consumption(dev, pasid, source_id);
+					return;
+				}
+				break;
+			default:
+				break;
+			}
+			kfd_signal_event_interrupt(pasid, context_id0 & 0x7fffff, 23);
+		} else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) {
+			kfd_set_dbg_ev_from_interrupt(dev, pasid,
+				KFD_DEBUG_DOORBELL_ID(context_id0),
+				KFD_EC_MASK(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0)),
+				NULL,
+				0);
+		}
+	} else if (client_id == SOC15_IH_CLIENTID_SDMA0 ||
+		   client_id == SOC15_IH_CLIENTID_SDMA1 ||
+		   client_id == SOC15_IH_CLIENTID_SDMA2 ||
+		   client_id == SOC15_IH_CLIENTID_SDMA3 ||
+		   (client_id == SOC15_IH_CLIENTID_SDMA3_Sienna_Cichlid &&
+		    KFD_GC_VERSION(dev) == IP_VERSION(10, 3, 0)) ||
+		   client_id == SOC15_IH_CLIENTID_SDMA4 ||
+		   client_id == SOC15_IH_CLIENTID_SDMA5 ||
+		   client_id == SOC15_IH_CLIENTID_SDMA6 ||
+		   client_id == SOC15_IH_CLIENTID_SDMA7) {
+		if (source_id == SOC15_INTSRC_SDMA_TRAP) {
+			kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28);
+		} else if (source_id == SOC15_INTSRC_SDMA_ECC) {
+			event_interrupt_poison_consumption(dev, pasid, source_id);
+			return;
+		}
+	} else if (client_id == SOC15_IH_CLIENTID_VMC ||
+		   client_id == SOC15_IH_CLIENTID_VMC1 ||
+		   client_id == SOC15_IH_CLIENTID_UTCL2) {
+		struct kfd_vm_fault_info info = {0};
+		uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry);
+		struct kfd_hsa_memory_exception_data exception_data;
+
+		if (client_id == SOC15_IH_CLIENTID_UTCL2 &&
+				amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev)) {
+			event_interrupt_poison_consumption(dev, pasid, client_id);
+			return;
+		}
+
+		info.vmid = vmid;
+		info.mc_id = client_id;
+		info.page_addr = ih_ring_entry[4] |
+			(uint64_t)(ih_ring_entry[5] & 0xf) << 32;
+		info.prot_valid = ring_id & 0x08;
+		info.prot_read  = ring_id & 0x10;
+		info.prot_write = ring_id & 0x20;
+
+		memset(&exception_data, 0, sizeof(exception_data));
+		exception_data.gpu_id = dev->id;
+		exception_data.va = (info.page_addr) << PAGE_SHIFT;
+		exception_data.failure.NotPresent = info.prot_valid ? 1 : 0;
+		exception_data.failure.NoExecute = info.prot_exec ? 1 : 0;
+		exception_data.failure.ReadOnly = info.prot_write ? 1 : 0;
+		exception_data.failure.imprecise = 0;
+
+		kfd_set_dbg_ev_from_interrupt(dev,
+						pasid,
+						-1,
+						KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION),
+						&exception_data,
+						sizeof(exception_data));
+	} else if (KFD_IRQ_IS_FENCE(client_id, source_id)) {
+		kfd_process_close_interrupt_drain(pasid);
+	}
+}
+
+const struct kfd_event_interrupt_class event_interrupt_class_v10 = {
+	.interrupt_isr = event_interrupt_isr_v10,
+	.interrupt_wq = event_interrupt_wq_v10,
+};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
new file mode 100644
index 000000000..2a65792fd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v11.c
@@ -0,0 +1,385 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "kfd_priv.h"
+#include "kfd_events.h"
+#include "soc15_int.h"
+#include "kfd_device_queue_manager.h"
+#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
+#include "kfd_smi_events.h"
+#include "kfd_debug.h"
+
+/*
+ * GFX11 SQ Interrupts
+ *
+ * There are 3 encoding types of interrupts sourced from SQ sent as a 44-bit
+ * packet to the Interrupt Handler:
+ * Auto - Generated by the SQG (various cmd overflows, timestamps etc)
+ * Wave - Generated by S_SENDMSG through a shader program
+ * Error - HW generated errors (Illegal instructions, Memviols, EDC etc)
+ *
+ * The 44-bit packet is mapped as {context_id1[7:0],context_id0[31:0]} plus
+ * 4-bits for VMID (SOC15_VMID_FROM_IH_ENTRY) as such:
+ *
+ * - context_id1[7:6]
+ * Encoding type (0 = Auto, 1 = Wave, 2 = Error)
+ *
+ * - context_id0[26]
+ * PRIV bit indicates that Wave S_SEND or error occurred within trap
+ *
+ * - context_id0[24:0]
+ * 25-bit data with the following layout per encoding type:
+ * Auto - only context_id0[8:0] is used, which reports various interrupts
+ * generated by SQG.  The rest is 0.
+ * Wave - user data sent from m0 via S_SENDMSG (context_id0[23:0])
+ * Error - Error Type (context_id0[24:21]), Error Details (context_id0[20:0])
+ *
+ * The other context_id bits show coordinates (SE/SH/CU/SIMD/WGP) for wave
+ * S_SENDMSG and Errors.  These are 0 for Auto.
+ */
+
+enum SQ_INTERRUPT_WORD_ENCODING {
+	SQ_INTERRUPT_WORD_ENCODING_AUTO = 0x0,
+	SQ_INTERRUPT_WORD_ENCODING_INST,
+	SQ_INTERRUPT_WORD_ENCODING_ERROR,
+};
+
+enum SQ_INTERRUPT_ERROR_TYPE {
+	SQ_INTERRUPT_ERROR_TYPE_EDC_FUE = 0x0,
+	SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST,
+	SQ_INTERRUPT_ERROR_TYPE_MEMVIOL,
+	SQ_INTERRUPT_ERROR_TYPE_EDC_FED,
+};
+
+/* SQ_INTERRUPT_WORD_AUTO_CTXID */
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE__SHIFT		0
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__WLT__SHIFT			1
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_BUF_FULL__SHIFT	2
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__REG_TIMESTAMP__SHIFT		3
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__CMD_TIMESTAMP__SHIFT		4
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__HOST_CMD_OVERFLOW__SHIFT		5
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__HOST_REG_OVERFLOW__SHIFT		6
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__IMMED_OVERFLOW__SHIFT		7
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_UTC_ERROR__SHIFT	8
+#define SQ_INTERRUPT_WORD_AUTO_CTXID1__ENCODING__SHIFT			6
+
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_MASK		0x00000001
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__WLT_MASK				0x00000002
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_BUF_FULL_MASK	0x00000004
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__REG_TIMESTAMP_MASK		0x00000008
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__CMD_TIMESTAMP_MASK		0x00000010
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__HOST_CMD_OVERFLOW_MASK		0x00000020
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__HOST_REG_OVERFLOW_MASK		0x00000040
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__IMMED_OVERFLOW_MASK		0x00000080
+#define SQ_INTERRUPT_WORD_AUTO_CTXID0__THREAD_TRACE_UTC_ERROR_MASK	0x00000100
+#define SQ_INTERRUPT_WORD_AUTO_CTXID1__ENCODING_MASK			0x000000c0
+
+/* SQ_INTERRUPT_WORD_WAVE_CTXID */
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__DATA__SHIFT	0
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__SH_ID__SHIFT	25
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__PRIV__SHIFT	26
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__WAVE_ID__SHIFT	27
+#define SQ_INTERRUPT_WORD_WAVE_CTXID1__SIMD_ID__SHIFT	0
+#define SQ_INTERRUPT_WORD_WAVE_CTXID1__WGP_ID__SHIFT	2
+#define SQ_INTERRUPT_WORD_WAVE_CTXID1__ENCODING__SHIFT	6
+
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__DATA_MASK	0x00ffffff /* [23:0] */
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__SH_ID_MASK	0x02000000 /* [25] */
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__PRIV_MASK	0x04000000 /* [26] */
+#define SQ_INTERRUPT_WORD_WAVE_CTXID0__WAVE_ID_MASK	0xf8000000 /* [31:27] */
+#define SQ_INTERRUPT_WORD_WAVE_CTXID1__SIMD_ID_MASK	0x00000003 /* [33:32] */
+#define SQ_INTERRUPT_WORD_WAVE_CTXID1__WGP_ID_MASK	0x0000003c /* [37:34] */
+#define SQ_INTERRUPT_WORD_WAVE_CTXID1__ENCODING_MASK	0x000000c0 /* [39:38] */
+
+/* SQ_INTERRUPT_WORD_ERROR_CTXID */
+#define SQ_INTERRUPT_WORD_ERROR_CTXID0__DETAIL__SHIFT	0
+#define SQ_INTERRUPT_WORD_ERROR_CTXID0__TYPE__SHIFT	21
+#define SQ_INTERRUPT_WORD_ERROR_CTXID0__SH_ID__SHIFT	25
+#define SQ_INTERRUPT_WORD_ERROR_CTXID0__PRIV__SHIFT	26
+#define SQ_INTERRUPT_WORD_ERROR_CTXID0__WAVE_ID__SHIFT	27
+#define SQ_INTERRUPT_WORD_ERROR_CTXID1__SIMD_ID__SHIFT	0
+#define SQ_INTERRUPT_WORD_ERROR_CTXID1__WGP_ID__SHIFT	2
+#define SQ_INTERRUPT_WORD_ERROR_CTXID1__ENCODING__SHIFT	6
+
+#define SQ_INTERRUPT_WORD_ERROR_CTXID0__DETAIL_MASK	0x001fffff /* [20:0] */
+#define SQ_INTERRUPT_WORD_ERROR_CTXID0__TYPE_MASK	0x01e00000 /* [24:21] */
+#define SQ_INTERRUPT_WORD_ERROR_CTXID0__SH_ID_MASK	0x02000000 /* [25] */
+#define SQ_INTERRUPT_WORD_ERROR_CTXID0__PRIV_MASK	0x04000000 /* [26] */
+#define SQ_INTERRUPT_WORD_ERROR_CTXID0__WAVE_ID_MASK	0xf8000000 /* [31:27] */
+#define SQ_INTERRUPT_WORD_ERROR_CTXID1__SIMD_ID_MASK	0x00000003 /* [33:32] */
+#define SQ_INTERRUPT_WORD_ERROR_CTXID1__WGP_ID_MASK	0x0000003c /* [37:34] */
+#define SQ_INTERRUPT_WORD_ERROR_CTXID1__ENCODING_MASK	0x000000c0 /* [39:38] */
+
+/*
+ * The debugger will send user data(m0) with PRIV=1 to indicate it requires
+ * notification from the KFD with the following queue id (DOORBELL_ID) and
+ * trap code (TRAP_CODE).
+ */
+#define KFD_CTXID0_TRAP_CODE_SHIFT	10
+#define KFD_CTXID0_TRAP_CODE_MASK	0xfffc00
+#define KFD_CTXID0_CP_BAD_OP_ECODE_MASK	0x3ffffff
+#define KFD_CTXID0_DOORBELL_ID_MASK	0x0003ff
+
+#define KFD_CTXID0_TRAP_CODE(ctxid0)		(((ctxid0) &  \
+				KFD_CTXID0_TRAP_CODE_MASK) >> \
+				KFD_CTXID0_TRAP_CODE_SHIFT)
+#define KFD_CTXID0_CP_BAD_OP_ECODE(ctxid0)	(((ctxid0) &        \
+				KFD_CTXID0_CP_BAD_OP_ECODE_MASK) >> \
+				KFD_CTXID0_TRAP_CODE_SHIFT)
+#define KFD_CTXID0_DOORBELL_ID(ctxid0)		((ctxid0) & \
+				KFD_CTXID0_DOORBELL_ID_MASK)
+
+static void print_sq_intr_info_auto(uint32_t context_id0, uint32_t context_id1)
+{
+	pr_debug_ratelimited(
+		"sq_intr: auto, ttrace %d, wlt %d, ttrace_buf_full %d, reg_tms %d, cmd_tms %d, host_cmd_ovf %d, host_reg_ovf %d, immed_ovf %d, ttrace_utc_err %d\n",
+		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, THREAD_TRACE),
+		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, WLT),
+		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, THREAD_TRACE_BUF_FULL),
+		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, REG_TIMESTAMP),
+		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, CMD_TIMESTAMP),
+		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, HOST_CMD_OVERFLOW),
+		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, HOST_REG_OVERFLOW),
+		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, IMMED_OVERFLOW),
+		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID0, THREAD_TRACE_UTC_ERROR));
+}
+
+static void print_sq_intr_info_inst(uint32_t context_id0, uint32_t context_id1)
+{
+	pr_debug_ratelimited(
+		"sq_intr: inst, data 0x%08x, sh %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n",
+		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, DATA),
+		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, SH_ID),
+		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, PRIV),
+		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID0, WAVE_ID),
+		REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, SIMD_ID),
+		REG_GET_FIELD(context_id1, SQ_INTERRUPT_WORD_WAVE_CTXID1, WGP_ID));
+}
+
+static void print_sq_intr_info_error(uint32_t context_id0, uint32_t context_id1)
+{
+	pr_warn_ratelimited(
+		"sq_intr: error, detail 0x%08x, type %d, sh %d, priv %d, wave_id %d, simd_id %d, wgp_id %d\n",
+		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, DETAIL),
+		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, TYPE),
+		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, SH_ID),
+		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, PRIV),
+		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID0, WAVE_ID),
+		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID1, SIMD_ID),
+		REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_ERROR_CTXID1, WGP_ID));
+}
+
+static void event_interrupt_poison_consumption_v11(struct kfd_node *dev,
+				uint16_t pasid, uint16_t source_id)
+{
+	int ret = -EINVAL;
+	struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+
+	if (!p)
+		return;
+
+	/* all queues of a process will be unmapped in one time */
+	if (atomic_read(&p->poison)) {
+		kfd_unref_process(p);
+		return;
+	}
+
+	atomic_set(&p->poison, 1);
+	kfd_unref_process(p);
+
+	switch (source_id) {
+	case SOC15_INTSRC_SQ_INTERRUPT_MSG:
+		if (dev->dqm->ops.reset_queues)
+			ret = dev->dqm->ops.reset_queues(dev->dqm, pasid);
+		break;
+	case SOC21_INTSRC_SDMA_ECC:
+	default:
+		break;
+	}
+
+	kfd_signal_poison_consumed_event(dev, pasid);
+
+	/* resetting queue passes, do page retirement without gpu reset
+	   resetting queue fails, fallback to gpu reset solution */
+	if (!ret)
+		amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, false);
+	else
+		amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, true);
+}
+
+static bool event_interrupt_isr_v11(struct kfd_node *dev,
+					const uint32_t *ih_ring_entry,
+					uint32_t *patched_ihre,
+					bool *patched_flag)
+{
+	uint16_t source_id, client_id, pasid, vmid;
+	const uint32_t *data = ih_ring_entry;
+	uint32_t context_id0;
+
+	source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry);
+	client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry);
+	/* Only handle interrupts from KFD VMIDs */
+	vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry);
+	if (!KFD_IRQ_IS_FENCE(client_id, source_id) &&
+	    (vmid < dev->vm_info.first_vmid_kfd ||
+	    vmid > dev->vm_info.last_vmid_kfd))
+		return false;
+
+	pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry);
+	context_id0 = SOC15_CONTEXT_ID0_FROM_IH_ENTRY(ih_ring_entry);
+
+	if ((source_id == SOC15_INTSRC_CP_END_OF_PIPE) &&
+	    (context_id0 & AMDGPU_FENCE_MES_QUEUE_FLAG))
+		return false;
+
+	pr_debug("client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n",
+		 client_id, source_id, vmid, pasid);
+	pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
+		 data[0], data[1], data[2], data[3],
+		 data[4], data[5], data[6], data[7]);
+
+	/* If there is no valid PASID, it's likely a bug */
+	if (WARN_ONCE(pasid == 0, "Bug: No PASID in KFD interrupt"))
+		return false;
+
+	/* Interrupt types we care about: various signals and faults.
+	 * They will be forwarded to a work queue (see below).
+	 */
+	return source_id == SOC15_INTSRC_CP_END_OF_PIPE ||
+		source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG ||
+		source_id == SOC15_INTSRC_CP_BAD_OPCODE ||
+		source_id == SOC21_INTSRC_SDMA_TRAP ||
+		KFD_IRQ_IS_FENCE(client_id, source_id) ||
+		(((client_id == SOC21_IH_CLIENTID_VMC) ||
+		 ((client_id == SOC21_IH_CLIENTID_GFX) &&
+		  (source_id == UTCL2_1_0__SRCID__FAULT))) &&
+		  !amdgpu_no_queue_eviction_on_vm_fault);
+}
+
+static void event_interrupt_wq_v11(struct kfd_node *dev,
+					const uint32_t *ih_ring_entry)
+{
+	uint16_t source_id, client_id, ring_id, pasid, vmid;
+	uint32_t context_id0, context_id1;
+	uint8_t sq_int_enc, sq_int_priv, sq_int_errtype;
+	struct kfd_vm_fault_info info = {0};
+	struct kfd_hsa_memory_exception_data exception_data;
+
+	source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry);
+	client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry);
+	ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry);
+	pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry);
+	vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry);
+	context_id0 = SOC15_CONTEXT_ID0_FROM_IH_ENTRY(ih_ring_entry);
+	context_id1 = SOC15_CONTEXT_ID1_FROM_IH_ENTRY(ih_ring_entry);
+
+	/* VMC, UTCL2 */
+	if (client_id == SOC21_IH_CLIENTID_VMC ||
+	     ((client_id == SOC21_IH_CLIENTID_GFX) &&
+	     (source_id == UTCL2_1_0__SRCID__FAULT))) {
+
+		info.vmid = vmid;
+		info.mc_id = client_id;
+		info.page_addr = ih_ring_entry[4] |
+			(uint64_t)(ih_ring_entry[5] & 0xf) << 32;
+		info.prot_valid = ring_id & 0x08;
+		info.prot_read  = ring_id & 0x10;
+		info.prot_write = ring_id & 0x20;
+
+		memset(&exception_data, 0, sizeof(exception_data));
+		exception_data.gpu_id = dev->id;
+		exception_data.va = (info.page_addr) << PAGE_SHIFT;
+		exception_data.failure.NotPresent = info.prot_valid ? 1 : 0;
+		exception_data.failure.NoExecute = info.prot_exec ? 1 : 0;
+		exception_data.failure.ReadOnly = info.prot_write ? 1 : 0;
+		exception_data.failure.imprecise = 0;
+
+		kfd_set_dbg_ev_from_interrupt(dev, pasid, -1,
+					      KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION),
+					      &exception_data, sizeof(exception_data));
+		kfd_smi_event_update_vmfault(dev, pasid);
+
+	/* GRBM, SDMA, SE, PMM */
+	} else if (client_id == SOC21_IH_CLIENTID_GRBM_CP ||
+		   client_id == SOC21_IH_CLIENTID_GFX) {
+
+		/* CP */
+		if (source_id == SOC15_INTSRC_CP_END_OF_PIPE)
+			kfd_signal_event_interrupt(pasid, context_id0, 32);
+		else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE)
+			kfd_set_dbg_ev_from_interrupt(dev, pasid,
+				KFD_CTXID0_DOORBELL_ID(context_id0),
+				KFD_EC_MASK(KFD_CTXID0_CP_BAD_OP_ECODE(context_id0)),
+				NULL, 0);
+
+		/* SDMA */
+		else if (source_id == SOC21_INTSRC_SDMA_TRAP)
+			kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28);
+		else if (source_id == SOC21_INTSRC_SDMA_ECC) {
+			event_interrupt_poison_consumption_v11(dev, pasid, source_id);
+			return;
+		}
+
+		/* SQ */
+		else if (source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG) {
+			sq_int_enc = REG_GET_FIELD(context_id1,
+					SQ_INTERRUPT_WORD_WAVE_CTXID1, ENCODING);
+			switch (sq_int_enc) {
+			case SQ_INTERRUPT_WORD_ENCODING_AUTO:
+				print_sq_intr_info_auto(context_id0, context_id1);
+				break;
+			case SQ_INTERRUPT_WORD_ENCODING_INST:
+				print_sq_intr_info_inst(context_id0, context_id1);
+				sq_int_priv = REG_GET_FIELD(context_id0,
+						SQ_INTERRUPT_WORD_WAVE_CTXID0, PRIV);
+				if (sq_int_priv && (kfd_set_dbg_ev_from_interrupt(dev, pasid,
+						KFD_CTXID0_DOORBELL_ID(context_id0),
+						KFD_CTXID0_TRAP_CODE(context_id0),
+						NULL, 0)))
+					return;
+				break;
+			case SQ_INTERRUPT_WORD_ENCODING_ERROR:
+				print_sq_intr_info_error(context_id0, context_id1);
+				sq_int_errtype = REG_GET_FIELD(context_id0,
+						SQ_INTERRUPT_WORD_ERROR_CTXID0, TYPE);
+				if (sq_int_errtype != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST &&
+				    sq_int_errtype != SQ_INTERRUPT_ERROR_TYPE_MEMVIOL) {
+					event_interrupt_poison_consumption_v11(
+							dev, pasid, source_id);
+					return;
+				}
+				break;
+			default:
+				break;
+			}
+			kfd_signal_event_interrupt(pasid, context_id0 & 0xffffff, 24);
+		}
+
+	} else if (KFD_IRQ_IS_FENCE(client_id, source_id)) {
+		kfd_process_close_interrupt_drain(pasid);
+	}
+}
+
+const struct kfd_event_interrupt_class event_interrupt_class_v11 = {
+	.interrupt_isr = event_interrupt_isr_v11,
+	.interrupt_wq = event_interrupt_wq_v11,
+};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
new file mode 100644
index 000000000..27cdaea40
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -0,0 +1,481 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2016-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "kfd_priv.h"
+#include "kfd_events.h"
+#include "kfd_debug.h"
+#include "soc15_int.h"
+#include "kfd_device_queue_manager.h"
+#include "kfd_smi_events.h"
+
+/*
+ * GFX9 SQ Interrupts
+ *
+ * There are 3 encoding types of interrupts sourced from SQ sent as a 44-bit
+ * packet to the Interrupt Handler:
+ * Auto - Generated by the SQG (various cmd overflows, timestamps etc)
+ * Wave - Generated by S_SENDMSG through a shader program
+ * Error - HW generated errors (Illegal instructions, Memviols, EDC etc)
+ *
+ * The 44-bit packet is mapped as {context_id1[7:0],context_id0[31:0]} plus
+ * 4-bits for VMID (SOC15_VMID_FROM_IH_ENTRY) as such:
+ *
+ * - context_id0[27:26]
+ * Encoding type (0 = Auto, 1 = Wave, 2 = Error)
+ *
+ * - context_id0[13]
+ * PRIV bit indicates that Wave S_SEND or error occurred within trap
+ *
+ * - {context_id1[7:0],context_id0[31:28],context_id0[11:0]}
+ * 24-bit data with the following layout per encoding type:
+ * Auto - only context_id0[8:0] is used, which reports various interrupts
+ * generated by SQG.  The rest is 0.
+ * Wave - user data sent from m0 via S_SENDMSG
+ * Error - Error type (context_id1[7:4]), Error Details (rest of bits)
+ *
+ * The other context_id bits show coordinates (SE/SH/CU/SIMD/WAVE) for wave
+ * S_SENDMSG and Errors.  These are 0 for Auto.
+ */
+
+enum SQ_INTERRUPT_WORD_ENCODING {
+	SQ_INTERRUPT_WORD_ENCODING_AUTO = 0x0,
+	SQ_INTERRUPT_WORD_ENCODING_INST,
+	SQ_INTERRUPT_WORD_ENCODING_ERROR,
+};
+
+enum SQ_INTERRUPT_ERROR_TYPE {
+	SQ_INTERRUPT_ERROR_TYPE_EDC_FUE = 0x0,
+	SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST,
+	SQ_INTERRUPT_ERROR_TYPE_MEMVIOL,
+	SQ_INTERRUPT_ERROR_TYPE_EDC_FED,
+};
+
+/* SQ_INTERRUPT_WORD_AUTO_CTXID */
+#define SQ_INTERRUPT_WORD_AUTO_CTXID__THREAD_TRACE__SHIFT 0
+#define SQ_INTERRUPT_WORD_AUTO_CTXID__WLT__SHIFT 1
+#define SQ_INTERRUPT_WORD_AUTO_CTXID__THREAD_TRACE_BUF_FULL__SHIFT 2
+#define SQ_INTERRUPT_WORD_AUTO_CTXID__REG_TIMESTAMP__SHIFT 3
+#define SQ_INTERRUPT_WORD_AUTO_CTXID__CMD_TIMESTAMP__SHIFT 4
+#define SQ_INTERRUPT_WORD_AUTO_CTXID__HOST_CMD_OVERFLOW__SHIFT 5
+#define SQ_INTERRUPT_WORD_AUTO_CTXID__HOST_REG_OVERFLOW__SHIFT 6
+#define SQ_INTERRUPT_WORD_AUTO_CTXID__IMMED_OVERFLOW__SHIFT 7
+#define SQ_INTERRUPT_WORD_AUTO_CTXID__THREAD_TRACE_UTC_ERROR__SHIFT 8
+#define SQ_INTERRUPT_WORD_AUTO_CTXID__SE_ID__SHIFT 24
+#define SQ_INTERRUPT_WORD_AUTO_CTXID__ENCODING__SHIFT 26
+
+#define SQ_INTERRUPT_WORD_AUTO_CTXID__THREAD_TRACE_MASK 0x00000001
+#define SQ_INTERRUPT_WORD_AUTO_CTXID__WLT_MASK 0x00000002
+#define SQ_INTERRUPT_WORD_AUTO_CTXID__THREAD_TRACE_BUF_FULL_MASK 0x00000004
+#define SQ_INTERRUPT_WORD_AUTO_CTXID__REG_TIMESTAMP_MASK 0x00000008
+#define SQ_INTERRUPT_WORD_AUTO_CTXID__CMD_TIMESTAMP_MASK 0x00000010
+#define SQ_INTERRUPT_WORD_AUTO_CTXID__HOST_CMD_OVERFLOW_MASK 0x00000020
+#define SQ_INTERRUPT_WORD_AUTO_CTXID__HOST_REG_OVERFLOW_MASK 0x00000040
+#define SQ_INTERRUPT_WORD_AUTO_CTXID__IMMED_OVERFLOW_MASK 0x00000080
+#define SQ_INTERRUPT_WORD_AUTO_CTXID__THREAD_TRACE_UTC_ERROR_MASK 0x00000100
+#define SQ_INTERRUPT_WORD_AUTO_CTXID__SE_ID_MASK 0x03000000
+#define SQ_INTERRUPT_WORD_AUTO_CTXID__ENCODING_MASK 0x0c000000
+
+/* SQ_INTERRUPT_WORD_WAVE_CTXID */
+#define SQ_INTERRUPT_WORD_WAVE_CTXID__DATA__SHIFT 0
+#define SQ_INTERRUPT_WORD_WAVE_CTXID__SH_ID__SHIFT 12
+#define SQ_INTERRUPT_WORD_WAVE_CTXID__PRIV__SHIFT 13
+#define SQ_INTERRUPT_WORD_WAVE_CTXID__WAVE_ID__SHIFT 14
+#define SQ_INTERRUPT_WORD_WAVE_CTXID__SIMD_ID__SHIFT 18
+#define SQ_INTERRUPT_WORD_WAVE_CTXID__CU_ID__SHIFT 20
+#define SQ_INTERRUPT_WORD_WAVE_CTXID__SE_ID__SHIFT 24
+#define SQ_INTERRUPT_WORD_WAVE_CTXID__ENCODING__SHIFT 26
+
+#define SQ_INTERRUPT_WORD_WAVE_CTXID__DATA_MASK 0x00000fff
+#define SQ_INTERRUPT_WORD_WAVE_CTXID__SH_ID_MASK 0x00001000
+#define SQ_INTERRUPT_WORD_WAVE_CTXID__PRIV_MASK 0x00002000
+#define SQ_INTERRUPT_WORD_WAVE_CTXID__WAVE_ID_MASK 0x0003c000
+#define SQ_INTERRUPT_WORD_WAVE_CTXID__SIMD_ID_MASK 0x000c0000
+#define SQ_INTERRUPT_WORD_WAVE_CTXID__CU_ID_MASK 0x00f00000
+#define SQ_INTERRUPT_WORD_WAVE_CTXID__SE_ID_MASK 0x03000000
+#define SQ_INTERRUPT_WORD_WAVE_CTXID__ENCODING_MASK 0x0c000000
+
+/* GFX9 SQ interrupt 24-bit data from context_id<0,1> */
+#define KFD_CONTEXT_ID_GET_SQ_INT_DATA(ctx0, ctx1)                             \
+	((ctx0 & 0xfff) | ((ctx0 >> 16) & 0xf000) | ((ctx1 << 16) & 0xff0000))
+
+#define KFD_SQ_INT_DATA__ERR_TYPE_MASK 0xF00000
+#define KFD_SQ_INT_DATA__ERR_TYPE__SHIFT 20
+
+/*
+ * The debugger will send user data(m0) with PRIV=1 to indicate it requires
+ * notification from the KFD with the following queue id (DOORBELL_ID) and
+ * trap code (TRAP_CODE).
+ */
+#define KFD_INT_DATA_DEBUG_DOORBELL_MASK	0x0003ff
+#define KFD_INT_DATA_DEBUG_TRAP_CODE_SHIFT	10
+#define KFD_INT_DATA_DEBUG_TRAP_CODE_MASK	0x07fc00
+#define KFD_DEBUG_DOORBELL_ID(sq_int_data)	((sq_int_data) &	\
+				KFD_INT_DATA_DEBUG_DOORBELL_MASK)
+#define KFD_DEBUG_TRAP_CODE(sq_int_data)	(((sq_int_data) &	\
+				KFD_INT_DATA_DEBUG_TRAP_CODE_MASK)	\
+				>> KFD_INT_DATA_DEBUG_TRAP_CODE_SHIFT)
+#define KFD_DEBUG_CP_BAD_OP_ECODE_MASK		0x3fffc00
+#define KFD_DEBUG_CP_BAD_OP_ECODE_SHIFT		10
+#define KFD_DEBUG_CP_BAD_OP_ECODE(ctxid0)	(((ctxid0) &		\
+				KFD_DEBUG_CP_BAD_OP_ECODE_MASK)		\
+				>> KFD_DEBUG_CP_BAD_OP_ECODE_SHIFT)
+
+static void event_interrupt_poison_consumption_v9(struct kfd_node *dev,
+				uint16_t pasid, uint16_t client_id)
+{
+	int old_poison, ret = -EINVAL;
+	struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+
+	if (!p)
+		return;
+
+	/* all queues of a process will be unmapped in one time */
+	old_poison = atomic_cmpxchg(&p->poison, 0, 1);
+	kfd_unref_process(p);
+	if (old_poison)
+		return;
+
+	switch (client_id) {
+	case SOC15_IH_CLIENTID_SE0SH:
+	case SOC15_IH_CLIENTID_SE1SH:
+	case SOC15_IH_CLIENTID_SE2SH:
+	case SOC15_IH_CLIENTID_SE3SH:
+	case SOC15_IH_CLIENTID_UTCL2:
+		ret = kfd_dqm_evict_pasid(dev->dqm, pasid);
+		break;
+	case SOC15_IH_CLIENTID_SDMA0:
+	case SOC15_IH_CLIENTID_SDMA1:
+	case SOC15_IH_CLIENTID_SDMA2:
+	case SOC15_IH_CLIENTID_SDMA3:
+	case SOC15_IH_CLIENTID_SDMA4:
+		break;
+	default:
+		break;
+	}
+
+	kfd_signal_poison_consumed_event(dev, pasid);
+
+	/* resetting queue passes, do page retirement without gpu reset
+	 * resetting queue fails, fallback to gpu reset solution
+	 */
+	if (!ret) {
+		dev_warn(dev->adev->dev,
+			"RAS poison consumption, unmap queue flow succeeded: client id %d\n",
+			client_id);
+		amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, false);
+	} else {
+		dev_warn(dev->adev->dev,
+			"RAS poison consumption, fall back to gpu reset flow: client id %d\n",
+			client_id);
+		amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, true);
+	}
+}
+
+static bool context_id_expected(struct kfd_dev *dev)
+{
+	switch (KFD_GC_VERSION(dev)) {
+	case IP_VERSION(9, 0, 1):
+		return dev->mec_fw_version >= 0x817a;
+	case IP_VERSION(9, 1, 0):
+	case IP_VERSION(9, 2, 1):
+	case IP_VERSION(9, 2, 2):
+	case IP_VERSION(9, 3, 0):
+	case IP_VERSION(9, 4, 0):
+		return dev->mec_fw_version >= 0x17a;
+	default:
+		/* Other GFXv9 and later GPUs always sent valid context IDs
+		 * on legitimate events
+		 */
+		return KFD_GC_VERSION(dev) >= IP_VERSION(9, 4, 1);
+	}
+}
+
+static bool event_interrupt_isr_v9(struct kfd_node *dev,
+					const uint32_t *ih_ring_entry,
+					uint32_t *patched_ihre,
+					bool *patched_flag)
+{
+	uint16_t source_id, client_id, pasid, vmid;
+	const uint32_t *data = ih_ring_entry;
+
+	source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry);
+	client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry);
+
+	/* Only handle interrupts from KFD VMIDs */
+	vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry);
+	if (!KFD_IRQ_IS_FENCE(client_id, source_id) &&
+	   (vmid < dev->vm_info.first_vmid_kfd ||
+	    vmid > dev->vm_info.last_vmid_kfd))
+		return false;
+
+	pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry);
+
+	/* Only handle clients we care about */
+	if (client_id != SOC15_IH_CLIENTID_GRBM_CP &&
+	    client_id != SOC15_IH_CLIENTID_SDMA0 &&
+	    client_id != SOC15_IH_CLIENTID_SDMA1 &&
+	    client_id != SOC15_IH_CLIENTID_SDMA2 &&
+	    client_id != SOC15_IH_CLIENTID_SDMA3 &&
+	    client_id != SOC15_IH_CLIENTID_SDMA4 &&
+	    client_id != SOC15_IH_CLIENTID_SDMA5 &&
+	    client_id != SOC15_IH_CLIENTID_SDMA6 &&
+	    client_id != SOC15_IH_CLIENTID_SDMA7 &&
+	    client_id != SOC15_IH_CLIENTID_VMC &&
+	    client_id != SOC15_IH_CLIENTID_VMC1 &&
+	    client_id != SOC15_IH_CLIENTID_UTCL2 &&
+	    client_id != SOC15_IH_CLIENTID_SE0SH &&
+	    client_id != SOC15_IH_CLIENTID_SE1SH &&
+	    client_id != SOC15_IH_CLIENTID_SE2SH &&
+	    client_id != SOC15_IH_CLIENTID_SE3SH &&
+	    !KFD_IRQ_IS_FENCE(client_id, source_id))
+		return false;
+
+	/* This is a known issue for gfx9. Under non HWS, pasid is not set
+	 * in the interrupt payload, so we need to find out the pasid on our
+	 * own.
+	 */
+	if (!pasid && dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
+		const uint32_t pasid_mask = 0xffff;
+
+		*patched_flag = true;
+		memcpy(patched_ihre, ih_ring_entry,
+				dev->kfd->device_info.ih_ring_entry_size);
+
+		pasid = dev->dqm->vmid_pasid[vmid];
+
+		/* Patch the pasid field */
+		patched_ihre[3] = cpu_to_le32((le32_to_cpu(patched_ihre[3])
+					& ~pasid_mask) | pasid);
+	}
+
+	pr_debug("client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n",
+		 client_id, source_id, vmid, pasid);
+	pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
+		 data[0], data[1], data[2], data[3],
+		 data[4], data[5], data[6], data[7]);
+
+	/* If there is no valid PASID, it's likely a bug */
+	if (WARN_ONCE(pasid == 0, "Bug: No PASID in KFD interrupt"))
+		return false;
+
+	/* Workaround CP firmware sending bogus signals with 0 context_id.
+	 * Those can be safely ignored on hardware and firmware versions that
+	 * include a valid context_id on legitimate signals. This avoids the
+	 * slow path in kfd_signal_event_interrupt that scans all event slots
+	 * for signaled events.
+	 */
+	if (source_id == SOC15_INTSRC_CP_END_OF_PIPE) {
+		uint32_t context_id =
+			SOC15_CONTEXT_ID0_FROM_IH_ENTRY(ih_ring_entry);
+
+		if (context_id == 0 && context_id_expected(dev->kfd))
+			return false;
+	}
+
+	/* Interrupt types we care about: various signals and faults.
+	 * They will be forwarded to a work queue (see below).
+	 */
+	return source_id == SOC15_INTSRC_CP_END_OF_PIPE ||
+		source_id == SOC15_INTSRC_SDMA_TRAP ||
+		source_id == SOC15_INTSRC_SDMA_ECC ||
+		source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG ||
+		source_id == SOC15_INTSRC_CP_BAD_OPCODE ||
+		KFD_IRQ_IS_FENCE(client_id, source_id) ||
+		((client_id == SOC15_IH_CLIENTID_VMC ||
+		client_id == SOC15_IH_CLIENTID_VMC1 ||
+		client_id == SOC15_IH_CLIENTID_UTCL2) &&
+		!amdgpu_no_queue_eviction_on_vm_fault);
+}
+
+static void event_interrupt_wq_v9(struct kfd_node *dev,
+					const uint32_t *ih_ring_entry)
+{
+	uint16_t source_id, client_id, pasid, vmid;
+	uint32_t context_id0, context_id1;
+	uint32_t sq_intr_err, sq_int_data, encoding;
+
+	source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry);
+	client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry);
+	pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry);
+	vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry);
+	context_id0 = SOC15_CONTEXT_ID0_FROM_IH_ENTRY(ih_ring_entry);
+	context_id1 = SOC15_CONTEXT_ID1_FROM_IH_ENTRY(ih_ring_entry);
+
+	if (client_id == SOC15_IH_CLIENTID_GRBM_CP ||
+	    client_id == SOC15_IH_CLIENTID_SE0SH ||
+	    client_id == SOC15_IH_CLIENTID_SE1SH ||
+	    client_id == SOC15_IH_CLIENTID_SE2SH ||
+	    client_id == SOC15_IH_CLIENTID_SE3SH) {
+		if (source_id == SOC15_INTSRC_CP_END_OF_PIPE)
+			kfd_signal_event_interrupt(pasid, context_id0, 32);
+		else if (source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG) {
+			sq_int_data = KFD_CONTEXT_ID_GET_SQ_INT_DATA(context_id0, context_id1);
+			encoding = REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, ENCODING);
+			switch (encoding) {
+			case SQ_INTERRUPT_WORD_ENCODING_AUTO:
+				pr_debug_ratelimited(
+					"sq_intr: auto, se %d, ttrace %d, wlt %d, ttrac_buf_full %d, reg_tms %d, cmd_tms %d, host_cmd_ovf %d, host_reg_ovf %d, immed_ovf %d, ttrace_utc_err %d\n",
+					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, SE_ID),
+					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, THREAD_TRACE),
+					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, WLT),
+					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, THREAD_TRACE_BUF_FULL),
+					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, REG_TIMESTAMP),
+					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, CMD_TIMESTAMP),
+					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, HOST_CMD_OVERFLOW),
+					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, HOST_REG_OVERFLOW),
+					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, IMMED_OVERFLOW),
+					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, THREAD_TRACE_UTC_ERROR));
+				break;
+			case SQ_INTERRUPT_WORD_ENCODING_INST:
+				pr_debug_ratelimited("sq_intr: inst, se %d, data 0x%x, sh %d, priv %d, wave_id %d, simd_id %d, cu_id %d, intr_data 0x%x\n",
+					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SE_ID),
+					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, DATA),
+					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SH_ID),
+					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, PRIV),
+					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, WAVE_ID),
+					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SIMD_ID),
+					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, CU_ID),
+					sq_int_data);
+				if (context_id0 & SQ_INTERRUPT_WORD_WAVE_CTXID__PRIV_MASK) {
+					if (kfd_set_dbg_ev_from_interrupt(dev, pasid,
+							KFD_DEBUG_DOORBELL_ID(sq_int_data),
+							KFD_DEBUG_TRAP_CODE(sq_int_data),
+							NULL, 0))
+						return;
+				}
+				break;
+			case SQ_INTERRUPT_WORD_ENCODING_ERROR:
+				sq_intr_err = REG_GET_FIELD(sq_int_data, KFD_SQ_INT_DATA, ERR_TYPE);
+				pr_warn_ratelimited("sq_intr: error, se %d, data 0x%x, sh %d, priv %d, wave_id %d, simd_id %d, cu_id %d, err_type %d\n",
+					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SE_ID),
+					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, DATA),
+					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SH_ID),
+					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, PRIV),
+					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, WAVE_ID),
+					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SIMD_ID),
+					REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, CU_ID),
+					sq_intr_err);
+				if (sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST &&
+					sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_MEMVIOL) {
+					event_interrupt_poison_consumption_v9(dev, pasid, client_id);
+					return;
+				}
+				break;
+			default:
+				break;
+			}
+			kfd_signal_event_interrupt(pasid, sq_int_data, 24);
+		} else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) {
+			kfd_set_dbg_ev_from_interrupt(dev, pasid,
+				KFD_DEBUG_DOORBELL_ID(context_id0),
+				KFD_EC_MASK(KFD_DEBUG_CP_BAD_OP_ECODE(context_id0)),
+				NULL, 0);
+		}
+	} else if (client_id == SOC15_IH_CLIENTID_SDMA0 ||
+		   client_id == SOC15_IH_CLIENTID_SDMA1 ||
+		   client_id == SOC15_IH_CLIENTID_SDMA2 ||
+		   client_id == SOC15_IH_CLIENTID_SDMA3 ||
+		   client_id == SOC15_IH_CLIENTID_SDMA4 ||
+		   client_id == SOC15_IH_CLIENTID_SDMA5 ||
+		   client_id == SOC15_IH_CLIENTID_SDMA6 ||
+		   client_id == SOC15_IH_CLIENTID_SDMA7) {
+		if (source_id == SOC15_INTSRC_SDMA_TRAP) {
+			kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28);
+		} else if (source_id == SOC15_INTSRC_SDMA_ECC) {
+			event_interrupt_poison_consumption_v9(dev, pasid, client_id);
+			return;
+		}
+	} else if (client_id == SOC15_IH_CLIENTID_VMC ||
+		   client_id == SOC15_IH_CLIENTID_VMC1 ||
+		   client_id == SOC15_IH_CLIENTID_UTCL2) {
+		struct kfd_vm_fault_info info = {0};
+		uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry);
+		struct kfd_hsa_memory_exception_data exception_data;
+
+		if (client_id == SOC15_IH_CLIENTID_UTCL2 &&
+		    amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev)) {
+			event_interrupt_poison_consumption_v9(dev, pasid, client_id);
+			return;
+		}
+
+		info.vmid = vmid;
+		info.mc_id = client_id;
+		info.page_addr = ih_ring_entry[4] |
+			(uint64_t)(ih_ring_entry[5] & 0xf) << 32;
+		info.prot_valid = ring_id & 0x08;
+		info.prot_read  = ring_id & 0x10;
+		info.prot_write = ring_id & 0x20;
+
+		memset(&exception_data, 0, sizeof(exception_data));
+		exception_data.gpu_id = dev->id;
+		exception_data.va = (info.page_addr) << PAGE_SHIFT;
+		exception_data.failure.NotPresent = info.prot_valid ? 1 : 0;
+		exception_data.failure.NoExecute = info.prot_exec ? 1 : 0;
+		exception_data.failure.ReadOnly = info.prot_write ? 1 : 0;
+		exception_data.failure.imprecise = 0;
+
+		kfd_set_dbg_ev_from_interrupt(dev,
+						pasid,
+						-1,
+						KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION),
+						&exception_data,
+						sizeof(exception_data));
+		kfd_smi_event_update_vmfault(dev, pasid);
+	} else if (KFD_IRQ_IS_FENCE(client_id, source_id)) {
+		kfd_process_close_interrupt_drain(pasid);
+	}
+}
+
+static bool event_interrupt_isr_v9_4_3(struct kfd_node *node,
+				const uint32_t *ih_ring_entry,
+				uint32_t *patched_ihre,
+				bool *patched_flag)
+{
+	uint16_t node_id, vmid;
+
+	/*
+	 * For GFX 9.4.3, process the interrupt if:
+	 * - NodeID field in IH entry matches the corresponding bit
+	 *   set in interrupt_bitmap Bits 0-15.
+	 *   OR
+	 * - If partition mode is CPX and interrupt came from
+	 *   Node_id 0,4,8,12, then check if the Bit (16 + client id)
+	 *   is set in interrupt bitmap Bits 16-31.
+	 */
+	node_id = SOC15_NODEID_FROM_IH_ENTRY(ih_ring_entry);
+	vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry);
+	if (kfd_irq_is_from_node(node, node_id, vmid))
+		return event_interrupt_isr_v9(node, ih_ring_entry,
+					patched_ihre, patched_flag);
+	return false;
+}
+
+const struct kfd_event_interrupt_class event_interrupt_class_v9 = {
+	.interrupt_isr = event_interrupt_isr_v9,
+	.interrupt_wq = event_interrupt_wq_v9,
+};
+
+const struct kfd_event_interrupt_class event_interrupt_class_v9_4_3 = {
+	.interrupt_isr = event_interrupt_isr_v9_4_3,
+	.interrupt_wq = event_interrupt_wq_v9,
+};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
new file mode 100644
index 000000000..dd3c43c1a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * KFD Interrupts.
+ *
+ * AMD GPUs deliver interrupts by pushing an interrupt description onto the
+ * interrupt ring and then sending an interrupt. KGD receives the interrupt
+ * in ISR and sends us a pointer to each new entry on the interrupt ring.
+ *
+ * We generally can't process interrupt-signaled events from ISR, so we call
+ * out to each interrupt client module (currently only the scheduler) to ask if
+ * each interrupt is interesting. If they return true, then it requires further
+ * processing so we copy it to an internal interrupt ring and call each
+ * interrupt client again from a work-queue.
+ *
+ * There's no acknowledgment for the interrupts we use. The hardware simply
+ * queues a new interrupt each time without waiting.
+ *
+ * The fixed-size internal queue means that it's possible for us to lose
+ * interrupts because we have no back-pressure to the hardware.
+ */
+
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/kfifo.h>
+#include "kfd_priv.h"
+
+#define KFD_IH_NUM_ENTRIES 8192
+
+static void interrupt_wq(struct work_struct *);
+
+int kfd_interrupt_init(struct kfd_node *node)
+{
+	int r;
+
+	r = kfifo_alloc(&node->ih_fifo,
+		KFD_IH_NUM_ENTRIES * node->kfd->device_info.ih_ring_entry_size,
+		GFP_KERNEL);
+	if (r) {
+		dev_err(node->adev->dev, "Failed to allocate IH fifo\n");
+		return r;
+	}
+
+	node->ih_wq = alloc_workqueue("KFD IH", WQ_HIGHPRI, 1);
+	if (unlikely(!node->ih_wq)) {
+		kfifo_free(&node->ih_fifo);
+		dev_err(node->adev->dev, "Failed to allocate KFD IH workqueue\n");
+		return -ENOMEM;
+	}
+	spin_lock_init(&node->interrupt_lock);
+
+	INIT_WORK(&node->interrupt_work, interrupt_wq);
+
+	node->interrupts_active = true;
+
+	/*
+	 * After this function returns, the interrupt will be enabled. This
+	 * barrier ensures that the interrupt running on a different processor
+	 * sees all the above writes.
+	 */
+	smp_wmb();
+
+	return 0;
+}
+
+void kfd_interrupt_exit(struct kfd_node *node)
+{
+	/*
+	 * Stop the interrupt handler from writing to the ring and scheduling
+	 * workqueue items. The spinlock ensures that any interrupt running
+	 * after we have unlocked sees interrupts_active = false.
+	 */
+	unsigned long flags;
+
+	spin_lock_irqsave(&node->interrupt_lock, flags);
+	node->interrupts_active = false;
+	spin_unlock_irqrestore(&node->interrupt_lock, flags);
+
+	/*
+	 * flush_work ensures that there are no outstanding
+	 * work-queue items that will access interrupt_ring. New work items
+	 * can't be created because we stopped interrupt handling above.
+	 */
+	flush_workqueue(node->ih_wq);
+
+	kfifo_free(&node->ih_fifo);
+}
+
+/*
+ * Assumption: single reader/writer. This function is not re-entrant
+ */
+bool enqueue_ih_ring_entry(struct kfd_node *node, const void *ih_ring_entry)
+{
+	int count;
+
+	count = kfifo_in(&node->ih_fifo, ih_ring_entry,
+				node->kfd->device_info.ih_ring_entry_size);
+	if (count != node->kfd->device_info.ih_ring_entry_size) {
+		dev_dbg_ratelimited(node->adev->dev,
+			"Interrupt ring overflow, dropping interrupt %d\n",
+			count);
+		return false;
+	}
+
+	return true;
+}
+
+/*
+ * Assumption: single reader/writer. This function is not re-entrant
+ */
+static bool dequeue_ih_ring_entry(struct kfd_node *node, void *ih_ring_entry)
+{
+	int count;
+
+	count = kfifo_out(&node->ih_fifo, ih_ring_entry,
+				node->kfd->device_info.ih_ring_entry_size);
+
+	WARN_ON(count && count != node->kfd->device_info.ih_ring_entry_size);
+
+	return count == node->kfd->device_info.ih_ring_entry_size;
+}
+
+static void interrupt_wq(struct work_struct *work)
+{
+	struct kfd_node *dev = container_of(work, struct kfd_node,
+						interrupt_work);
+	uint32_t ih_ring_entry[KFD_MAX_RING_ENTRY_SIZE];
+	unsigned long start_jiffies = jiffies;
+
+	if (dev->kfd->device_info.ih_ring_entry_size > sizeof(ih_ring_entry)) {
+		dev_err_once(dev->adev->dev, "Ring entry too small\n");
+		return;
+	}
+
+	while (dequeue_ih_ring_entry(dev, ih_ring_entry)) {
+		dev->kfd->device_info.event_interrupt_class->interrupt_wq(dev,
+								ih_ring_entry);
+		if (time_is_before_jiffies(start_jiffies + HZ)) {
+			/* If we spent more than a second processing signals,
+			 * reschedule the worker to avoid soft-lockup warnings
+			 */
+			queue_work(dev->ih_wq, &dev->interrupt_work);
+			break;
+		}
+	}
+}
+
+bool interrupt_is_wanted(struct kfd_node *dev,
+			const uint32_t *ih_ring_entry,
+			uint32_t *patched_ihre, bool *flag)
+{
+	/* integer and bitwise OR so there is no boolean short-circuiting */
+	unsigned int wanted = 0;
+
+	wanted |= dev->kfd->device_info.event_interrupt_class->interrupt_isr(dev,
+					 ih_ring_entry, patched_ihre, flag);
+
+	return wanted != 0;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
new file mode 100644
index 000000000..1bea629c4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -0,0 +1,376 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/printk.h>
+#include <linux/sched.h>
+#include "kfd_kernel_queue.h"
+#include "kfd_priv.h"
+#include "kfd_device_queue_manager.h"
+#include "kfd_pm4_headers.h"
+#include "kfd_pm4_opcodes.h"
+
+#define PM4_COUNT_ZERO (((1 << 15) - 1) << 16)
+
+/* Initialize a kernel queue, including allocations of GART memory
+ * needed for the queue.
+ */
+static bool kq_initialize(struct kernel_queue *kq, struct kfd_node *dev,
+		enum kfd_queue_type type, unsigned int queue_size)
+{
+	struct queue_properties prop;
+	int retval;
+	union PM4_MES_TYPE_3_HEADER nop;
+
+	if (WARN_ON(type != KFD_QUEUE_TYPE_DIQ && type != KFD_QUEUE_TYPE_HIQ))
+		return false;
+
+	pr_debug("Initializing queue type %d size %d\n", KFD_QUEUE_TYPE_HIQ,
+			queue_size);
+
+	memset(&prop, 0, sizeof(prop));
+	memset(&nop, 0, sizeof(nop));
+
+	nop.opcode = IT_NOP;
+	nop.type = PM4_TYPE_3;
+	nop.u32all |= PM4_COUNT_ZERO;
+
+	kq->dev = dev;
+	kq->nop_packet = nop.u32all;
+	switch (type) {
+	case KFD_QUEUE_TYPE_DIQ:
+		kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_DIQ];
+		break;
+	case KFD_QUEUE_TYPE_HIQ:
+		kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
+		break;
+	default:
+		pr_err("Invalid queue type %d\n", type);
+		return false;
+	}
+
+	if (!kq->mqd_mgr)
+		return false;
+
+	prop.doorbell_ptr = kfd_get_kernel_doorbell(dev->kfd, &prop.doorbell_off);
+
+	if (!prop.doorbell_ptr) {
+		pr_err("Failed to initialize doorbell");
+		goto err_get_kernel_doorbell;
+	}
+
+	retval = kfd_gtt_sa_allocate(dev, queue_size, &kq->pq);
+	if (retval != 0) {
+		pr_err("Failed to init pq queues size %d\n", queue_size);
+		goto err_pq_allocate_vidmem;
+	}
+
+	kq->pq_kernel_addr = kq->pq->cpu_ptr;
+	kq->pq_gpu_addr = kq->pq->gpu_addr;
+
+	/* For CIK family asics, kq->eop_mem is not needed */
+	if (dev->adev->asic_type > CHIP_MULLINS) {
+		retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem);
+		if (retval != 0)
+			goto err_eop_allocate_vidmem;
+
+		kq->eop_gpu_addr = kq->eop_mem->gpu_addr;
+		kq->eop_kernel_addr = kq->eop_mem->cpu_ptr;
+
+		memset(kq->eop_kernel_addr, 0, PAGE_SIZE);
+	}
+
+	retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->rptr_kernel),
+					&kq->rptr_mem);
+
+	if (retval != 0)
+		goto err_rptr_allocate_vidmem;
+
+	kq->rptr_kernel = kq->rptr_mem->cpu_ptr;
+	kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr;
+
+	retval = kfd_gtt_sa_allocate(dev, dev->kfd->device_info.doorbell_size,
+					&kq->wptr_mem);
+
+	if (retval != 0)
+		goto err_wptr_allocate_vidmem;
+
+	kq->wptr_kernel = kq->wptr_mem->cpu_ptr;
+	kq->wptr_gpu_addr = kq->wptr_mem->gpu_addr;
+
+	memset(kq->pq_kernel_addr, 0, queue_size);
+	memset(kq->rptr_kernel, 0, sizeof(*kq->rptr_kernel));
+	memset(kq->wptr_kernel, 0, sizeof(*kq->wptr_kernel));
+
+	prop.queue_size = queue_size;
+	prop.is_interop = false;
+	prop.is_gws = false;
+	prop.priority = 1;
+	prop.queue_percent = 100;
+	prop.type = type;
+	prop.vmid = 0;
+	prop.queue_address = kq->pq_gpu_addr;
+	prop.read_ptr = (uint32_t *) kq->rptr_gpu_addr;
+	prop.write_ptr = (uint32_t *) kq->wptr_gpu_addr;
+	prop.eop_ring_buffer_address = kq->eop_gpu_addr;
+	prop.eop_ring_buffer_size = PAGE_SIZE;
+
+	if (init_queue(&kq->queue, &prop) != 0)
+		goto err_init_queue;
+
+	kq->queue->device = dev;
+	kq->queue->process = kfd_get_process(current);
+
+	kq->queue->mqd_mem_obj = kq->mqd_mgr->allocate_mqd(kq->mqd_mgr->dev,
+					&kq->queue->properties);
+	if (!kq->queue->mqd_mem_obj)
+		goto err_allocate_mqd;
+	kq->mqd_mgr->init_mqd(kq->mqd_mgr, &kq->queue->mqd,
+					kq->queue->mqd_mem_obj,
+					&kq->queue->gart_mqd_addr,
+					&kq->queue->properties);
+	/* assign HIQ to HQD */
+	if (type == KFD_QUEUE_TYPE_HIQ) {
+		pr_debug("Assigning hiq to hqd\n");
+		kq->queue->pipe = KFD_CIK_HIQ_PIPE;
+		kq->queue->queue = KFD_CIK_HIQ_QUEUE;
+		kq->mqd_mgr->load_mqd(kq->mqd_mgr, kq->queue->mqd,
+				kq->queue->pipe, kq->queue->queue,
+				&kq->queue->properties, NULL);
+	} else {
+		/* allocate fence for DIQ */
+
+		retval = kfd_gtt_sa_allocate(dev, sizeof(uint32_t),
+						&kq->fence_mem_obj);
+
+		if (retval != 0)
+			goto err_alloc_fence;
+
+		kq->fence_kernel_address = kq->fence_mem_obj->cpu_ptr;
+		kq->fence_gpu_addr = kq->fence_mem_obj->gpu_addr;
+	}
+
+	print_queue(kq->queue);
+
+	return true;
+err_alloc_fence:
+	kq->mqd_mgr->free_mqd(kq->mqd_mgr, kq->queue->mqd, kq->queue->mqd_mem_obj);
+err_allocate_mqd:
+	uninit_queue(kq->queue);
+err_init_queue:
+	kfd_gtt_sa_free(dev, kq->wptr_mem);
+err_wptr_allocate_vidmem:
+	kfd_gtt_sa_free(dev, kq->rptr_mem);
+err_rptr_allocate_vidmem:
+	kfd_gtt_sa_free(dev, kq->eop_mem);
+err_eop_allocate_vidmem:
+	kfd_gtt_sa_free(dev, kq->pq);
+err_pq_allocate_vidmem:
+	kfd_release_kernel_doorbell(dev->kfd, prop.doorbell_ptr);
+err_get_kernel_doorbell:
+	return false;
+
+}
+
+/* Uninitialize a kernel queue and free all its memory usages. */
+static void kq_uninitialize(struct kernel_queue *kq, bool hanging)
+{
+	if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ && !hanging)
+		kq->mqd_mgr->destroy_mqd(kq->mqd_mgr,
+					kq->queue->mqd,
+					KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
+					KFD_UNMAP_LATENCY_MS,
+					kq->queue->pipe,
+					kq->queue->queue);
+	else if (kq->queue->properties.type == KFD_QUEUE_TYPE_DIQ)
+		kfd_gtt_sa_free(kq->dev, kq->fence_mem_obj);
+
+	kq->mqd_mgr->free_mqd(kq->mqd_mgr, kq->queue->mqd,
+				kq->queue->mqd_mem_obj);
+
+	kfd_gtt_sa_free(kq->dev, kq->rptr_mem);
+	kfd_gtt_sa_free(kq->dev, kq->wptr_mem);
+
+	/* For CIK family asics, kq->eop_mem is Null, kfd_gtt_sa_free()
+	 * is able to handle NULL properly.
+	 */
+	kfd_gtt_sa_free(kq->dev, kq->eop_mem);
+
+	kfd_gtt_sa_free(kq->dev, kq->pq);
+	kfd_release_kernel_doorbell(kq->dev->kfd,
+					kq->queue->properties.doorbell_ptr);
+	uninit_queue(kq->queue);
+}
+
+int kq_acquire_packet_buffer(struct kernel_queue *kq,
+		size_t packet_size_in_dwords, unsigned int **buffer_ptr)
+{
+	size_t available_size;
+	size_t queue_size_dwords;
+	uint32_t wptr, rptr;
+	uint64_t wptr64;
+	unsigned int *queue_address;
+
+	/* When rptr == wptr, the buffer is empty.
+	 * When rptr == wptr + 1, the buffer is full.
+	 * It is always rptr that advances to the position of wptr, rather than
+	 * the opposite. So we can only use up to queue_size_dwords - 1 dwords.
+	 */
+	rptr = *kq->rptr_kernel;
+	wptr = kq->pending_wptr;
+	wptr64 = kq->pending_wptr64;
+	queue_address = (unsigned int *)kq->pq_kernel_addr;
+	queue_size_dwords = kq->queue->properties.queue_size / 4;
+
+	pr_debug("rptr: %d\n", rptr);
+	pr_debug("wptr: %d\n", wptr);
+	pr_debug("queue_address 0x%p\n", queue_address);
+
+	available_size = (rptr + queue_size_dwords - 1 - wptr) %
+							queue_size_dwords;
+
+	if (packet_size_in_dwords > available_size) {
+		/*
+		 * make sure calling functions know
+		 * acquire_packet_buffer() failed
+		 */
+		goto err_no_space;
+	}
+
+	if (wptr + packet_size_in_dwords >= queue_size_dwords) {
+		/* make sure after rolling back to position 0, there is
+		 * still enough space.
+		 */
+		if (packet_size_in_dwords >= rptr)
+			goto err_no_space;
+
+		/* fill nops, roll back and start at position 0 */
+		while (wptr > 0) {
+			queue_address[wptr] = kq->nop_packet;
+			wptr = (wptr + 1) % queue_size_dwords;
+			wptr64++;
+		}
+	}
+
+	*buffer_ptr = &queue_address[wptr];
+	kq->pending_wptr = wptr + packet_size_in_dwords;
+	kq->pending_wptr64 = wptr64 + packet_size_in_dwords;
+
+	return 0;
+
+err_no_space:
+	*buffer_ptr = NULL;
+	return -ENOMEM;
+}
+
+void kq_submit_packet(struct kernel_queue *kq)
+{
+#ifdef DEBUG
+	int i;
+
+	for (i = *kq->wptr_kernel; i < kq->pending_wptr; i++) {
+		pr_debug("0x%2X ", kq->pq_kernel_addr[i]);
+		if (i % 15 == 0)
+			pr_debug("\n");
+	}
+	pr_debug("\n");
+#endif
+	if (kq->dev->kfd->device_info.doorbell_size == 8) {
+		*kq->wptr64_kernel = kq->pending_wptr64;
+		write_kernel_doorbell64(kq->queue->properties.doorbell_ptr,
+					kq->pending_wptr64);
+	} else {
+		*kq->wptr_kernel = kq->pending_wptr;
+		write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
+					kq->pending_wptr);
+	}
+}
+
+void kq_rollback_packet(struct kernel_queue *kq)
+{
+	if (kq->dev->kfd->device_info.doorbell_size == 8) {
+		kq->pending_wptr64 = *kq->wptr64_kernel;
+		kq->pending_wptr = *kq->wptr_kernel %
+			(kq->queue->properties.queue_size / 4);
+	} else {
+		kq->pending_wptr = *kq->wptr_kernel;
+	}
+}
+
+struct kernel_queue *kernel_queue_init(struct kfd_node *dev,
+					enum kfd_queue_type type)
+{
+	struct kernel_queue *kq;
+
+	kq = kzalloc(sizeof(*kq), GFP_KERNEL);
+	if (!kq)
+		return NULL;
+
+	if (kq_initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE))
+		return kq;
+
+	pr_err("Failed to init kernel queue\n");
+
+	kfree(kq);
+	return NULL;
+}
+
+void kernel_queue_uninit(struct kernel_queue *kq, bool hanging)
+{
+	kq_uninitialize(kq, hanging);
+	kfree(kq);
+}
+
+/* FIXME: Can this test be removed? */
+static __attribute__((unused)) void test_kq(struct kfd_node *dev)
+{
+	struct kernel_queue *kq;
+	uint32_t *buffer, i;
+	int retval;
+
+	pr_err("Starting kernel queue test\n");
+
+	kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_HIQ);
+	if (unlikely(!kq)) {
+		pr_err("  Failed to initialize HIQ\n");
+		pr_err("Kernel queue test failed\n");
+		return;
+	}
+
+	retval = kq_acquire_packet_buffer(kq, 5, &buffer);
+	if (unlikely(retval != 0)) {
+		pr_err("  Failed to acquire packet buffer\n");
+		pr_err("Kernel queue test failed\n");
+		return;
+	}
+	for (i = 0; i < 5; i++)
+		buffer[i] = kq->nop_packet;
+	kq_submit_packet(kq);
+
+	pr_err("Ending kernel queue test\n");
+}
+
+
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
new file mode 100644
index 000000000..9a6244430
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef KFD_KERNEL_QUEUE_H_
+#define KFD_KERNEL_QUEUE_H_
+
+#include <linux/list.h>
+#include <linux/types.h>
+#include "kfd_priv.h"
+
+/**
+ * kq_acquire_packet_buffer: Returns a pointer to the location in the kernel
+ * queue ring buffer where the calling function can write its packet. It is
+ * Guaranteed that there is enough space for that packet. It also updates the
+ * pending write pointer to that location so subsequent calls to
+ * acquire_packet_buffer will get a correct write pointer
+ *
+ * kq_submit_packet: Update the write pointer and doorbell of a kernel queue.
+ *
+ * kq_rollback_packet: This routine is called if we failed to build an acquired
+ * packet for some reason. It just overwrites the pending wptr with the current
+ * one
+ *
+ */
+
+int kq_acquire_packet_buffer(struct kernel_queue *kq,
+				size_t packet_size_in_dwords,
+				unsigned int **buffer_ptr);
+void kq_submit_packet(struct kernel_queue *kq);
+void kq_rollback_packet(struct kernel_queue *kq);
+
+
+struct kernel_queue {
+	/* data */
+	struct kfd_node		*dev;
+	struct mqd_manager	*mqd_mgr;
+	struct queue		*queue;
+	uint64_t		pending_wptr64;
+	uint32_t		pending_wptr;
+	unsigned int		nop_packet;
+
+	struct kfd_mem_obj	*rptr_mem;
+	uint32_t		*rptr_kernel;
+	uint64_t		rptr_gpu_addr;
+	struct kfd_mem_obj	*wptr_mem;
+	union {
+		uint64_t	*wptr64_kernel;
+		uint32_t	*wptr_kernel;
+	};
+	uint64_t		wptr_gpu_addr;
+	struct kfd_mem_obj	*pq;
+	uint64_t		pq_gpu_addr;
+	uint32_t		*pq_kernel_addr;
+	struct kfd_mem_obj	*eop_mem;
+	uint64_t		eop_gpu_addr;
+	uint32_t		*eop_kernel_addr;
+
+	struct kfd_mem_obj	*fence_mem_obj;
+	uint64_t		fence_gpu_addr;
+	void			*fence_kernel_address;
+
+	struct list_head	list;
+};
+
+#endif /* KFD_KERNEL_QUEUE_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
new file mode 100644
index 000000000..659313648
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -0,0 +1,1055 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2020-2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <linux/types.h>
+#include <linux/hmm.h>
+#include <linux/dma-direction.h>
+#include <linux/dma-mapping.h>
+#include <linux/migrate.h>
+#include "amdgpu_sync.h"
+#include "amdgpu_object.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_res_cursor.h"
+#include "kfd_priv.h"
+#include "kfd_svm.h"
+#include "kfd_migrate.h"
+#include "kfd_smi_events.h"
+
+#ifdef dev_fmt
+#undef dev_fmt
+#endif
+#define dev_fmt(fmt) "kfd_migrate: " fmt
+
+static uint64_t
+svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, uint64_t addr)
+{
+	return addr + amdgpu_ttm_domain_start(adev, TTM_PL_VRAM);
+}
+
+static int
+svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages,
+		     dma_addr_t *addr, uint64_t *gart_addr, uint64_t flags)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_job *job;
+	unsigned int num_dw, num_bytes;
+	struct dma_fence *fence;
+	uint64_t src_addr, dst_addr;
+	uint64_t pte_flags;
+	void *cpu_addr;
+	int r;
+
+	/* use gart window 0 */
+	*gart_addr = adev->gmc.gart_start;
+
+	num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
+	num_bytes = npages * 8;
+
+	r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr,
+				     AMDGPU_FENCE_OWNER_UNDEFINED,
+				     num_dw * 4 + num_bytes,
+				     AMDGPU_IB_POOL_DELAYED,
+				     &job);
+	if (r)
+		return r;
+
+	src_addr = num_dw * 4;
+	src_addr += job->ibs[0].gpu_addr;
+
+	dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
+	amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
+				dst_addr, num_bytes, false);
+
+	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
+	WARN_ON(job->ibs[0].length_dw > num_dw);
+
+	pte_flags = AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
+	pte_flags |= AMDGPU_PTE_SYSTEM | AMDGPU_PTE_SNOOPED;
+	if (!(flags & KFD_IOCTL_SVM_FLAG_GPU_RO))
+		pte_flags |= AMDGPU_PTE_WRITEABLE;
+	pte_flags |= adev->gart.gart_pte_flags;
+
+	cpu_addr = &job->ibs[0].ptr[num_dw];
+
+	amdgpu_gart_map(adev, 0, npages, addr, pte_flags, cpu_addr);
+	fence = amdgpu_job_submit(job);
+	dma_fence_put(fence);
+
+	return r;
+}
+
+/**
+ * svm_migrate_copy_memory_gart - sdma copy data between ram and vram
+ *
+ * @adev: amdgpu device the sdma ring running
+ * @sys: system DMA pointer to be copied
+ * @vram: vram destination DMA pointer
+ * @npages: number of pages to copy
+ * @direction: enum MIGRATION_COPY_DIR
+ * @mfence: output, sdma fence to signal after sdma is done
+ *
+ * ram address uses GART table continuous entries mapping to ram pages,
+ * vram address uses direct mapping of vram pages, which must have npages
+ * number of continuous pages.
+ * GART update and sdma uses same buf copy function ring, sdma is splited to
+ * multiple GTT_MAX_PAGES transfer, all sdma operations are serialized, wait for
+ * the last sdma finish fence which is returned to check copy memory is done.
+ *
+ * Context: Process context, takes and releases gtt_window_lock
+ *
+ * Return:
+ * 0 - OK, otherwise error code
+ */
+
+static int
+svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
+			     uint64_t *vram, uint64_t npages,
+			     enum MIGRATION_COPY_DIR direction,
+			     struct dma_fence **mfence)
+{
+	const uint64_t GTT_MAX_PAGES = AMDGPU_GTT_MAX_TRANSFER_SIZE;
+	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+	uint64_t gart_s, gart_d;
+	struct dma_fence *next;
+	uint64_t size;
+	int r;
+
+	mutex_lock(&adev->mman.gtt_window_lock);
+
+	while (npages) {
+		size = min(GTT_MAX_PAGES, npages);
+
+		if (direction == FROM_VRAM_TO_RAM) {
+			gart_s = svm_migrate_direct_mapping_addr(adev, *vram);
+			r = svm_migrate_gart_map(ring, size, sys, &gart_d, 0);
+
+		} else if (direction == FROM_RAM_TO_VRAM) {
+			r = svm_migrate_gart_map(ring, size, sys, &gart_s,
+						 KFD_IOCTL_SVM_FLAG_GPU_RO);
+			gart_d = svm_migrate_direct_mapping_addr(adev, *vram);
+		}
+		if (r) {
+			dev_err(adev->dev, "fail %d create gart mapping\n", r);
+			goto out_unlock;
+		}
+
+		r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE,
+				       NULL, &next, false, true, false);
+		if (r) {
+			dev_err(adev->dev, "fail %d to copy memory\n", r);
+			goto out_unlock;
+		}
+
+		dma_fence_put(*mfence);
+		*mfence = next;
+		npages -= size;
+		if (npages) {
+			sys += size;
+			vram += size;
+		}
+	}
+
+out_unlock:
+	mutex_unlock(&adev->mman.gtt_window_lock);
+
+	return r;
+}
+
+/**
+ * svm_migrate_copy_done - wait for memory copy sdma is done
+ *
+ * @adev: amdgpu device the sdma memory copy is executing on
+ * @mfence: migrate fence
+ *
+ * Wait for dma fence is signaled, if the copy ssplit into multiple sdma
+ * operations, this is the last sdma operation fence.
+ *
+ * Context: called after svm_migrate_copy_memory
+ *
+ * Return:
+ * 0		- success
+ * otherwise	- error code from dma fence signal
+ */
+static int
+svm_migrate_copy_done(struct amdgpu_device *adev, struct dma_fence *mfence)
+{
+	int r = 0;
+
+	if (mfence) {
+		r = dma_fence_wait(mfence, false);
+		dma_fence_put(mfence);
+		pr_debug("sdma copy memory fence done\n");
+	}
+
+	return r;
+}
+
+unsigned long
+svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr)
+{
+	return (addr + adev->kfd.pgmap.range.start) >> PAGE_SHIFT;
+}
+
+static void
+svm_migrate_get_vram_page(struct svm_range *prange, unsigned long pfn)
+{
+	struct page *page;
+
+	page = pfn_to_page(pfn);
+	svm_range_bo_ref(prange->svm_bo);
+	page->zone_device_data = prange->svm_bo;
+	zone_device_page_init(page);
+}
+
+static void
+svm_migrate_put_vram_page(struct amdgpu_device *adev, unsigned long addr)
+{
+	struct page *page;
+
+	page = pfn_to_page(svm_migrate_addr_to_pfn(adev, addr));
+	unlock_page(page);
+	put_page(page);
+}
+
+static unsigned long
+svm_migrate_addr(struct amdgpu_device *adev, struct page *page)
+{
+	unsigned long addr;
+
+	addr = page_to_pfn(page) << PAGE_SHIFT;
+	return (addr - adev->kfd.pgmap.range.start);
+}
+
+static struct page *
+svm_migrate_get_sys_page(struct vm_area_struct *vma, unsigned long addr)
+{
+	struct page *page;
+
+	page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
+	if (page)
+		lock_page(page);
+
+	return page;
+}
+
+static void svm_migrate_put_sys_page(unsigned long addr)
+{
+	struct page *page;
+
+	page = pfn_to_page(addr >> PAGE_SHIFT);
+	unlock_page(page);
+	put_page(page);
+}
+
+static unsigned long svm_migrate_successful_pages(struct migrate_vma *migrate)
+{
+	unsigned long cpages = 0;
+	unsigned long i;
+
+	for (i = 0; i < migrate->npages; i++) {
+		if (migrate->src[i] & MIGRATE_PFN_VALID &&
+		    migrate->src[i] & MIGRATE_PFN_MIGRATE)
+			cpages++;
+	}
+	return cpages;
+}
+
+static unsigned long svm_migrate_unsuccessful_pages(struct migrate_vma *migrate)
+{
+	unsigned long upages = 0;
+	unsigned long i;
+
+	for (i = 0; i < migrate->npages; i++) {
+		if (migrate->src[i] & MIGRATE_PFN_VALID &&
+		    !(migrate->src[i] & MIGRATE_PFN_MIGRATE))
+			upages++;
+	}
+	return upages;
+}
+
+static int
+svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange,
+			 struct migrate_vma *migrate, struct dma_fence **mfence,
+			 dma_addr_t *scratch, uint64_t ttm_res_offset)
+{
+	uint64_t npages = migrate->cpages;
+	struct amdgpu_device *adev = node->adev;
+	struct device *dev = adev->dev;
+	struct amdgpu_res_cursor cursor;
+	dma_addr_t *src;
+	uint64_t *dst;
+	uint64_t i, j;
+	int r;
+
+	pr_debug("svms 0x%p [0x%lx 0x%lx 0x%llx]\n", prange->svms, prange->start,
+		 prange->last, ttm_res_offset);
+
+	src = scratch;
+	dst = (uint64_t *)(scratch + npages);
+
+	amdgpu_res_first(prange->ttm_res, ttm_res_offset,
+			 npages << PAGE_SHIFT, &cursor);
+	for (i = j = 0; i < npages; i++) {
+		struct page *spage;
+
+		dst[i] = cursor.start + (j << PAGE_SHIFT);
+		migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]);
+		svm_migrate_get_vram_page(prange, migrate->dst[i]);
+		migrate->dst[i] = migrate_pfn(migrate->dst[i]);
+
+		spage = migrate_pfn_to_page(migrate->src[i]);
+		if (spage && !is_zone_device_page(spage)) {
+			src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE,
+					      DMA_TO_DEVICE);
+			r = dma_mapping_error(dev, src[i]);
+			if (r) {
+				dev_err(dev, "%s: fail %d dma_map_page\n",
+					__func__, r);
+				goto out_free_vram_pages;
+			}
+		} else {
+			if (j) {
+				r = svm_migrate_copy_memory_gart(
+						adev, src + i - j,
+						dst + i - j, j,
+						FROM_RAM_TO_VRAM,
+						mfence);
+				if (r)
+					goto out_free_vram_pages;
+				amdgpu_res_next(&cursor, (j + 1) << PAGE_SHIFT);
+				j = 0;
+			} else {
+				amdgpu_res_next(&cursor, PAGE_SIZE);
+			}
+			continue;
+		}
+
+		pr_debug_ratelimited("dma mapping src to 0x%llx, pfn 0x%lx\n",
+				     src[i] >> PAGE_SHIFT, page_to_pfn(spage));
+
+		if (j >= (cursor.size >> PAGE_SHIFT) - 1 && i < npages - 1) {
+			r = svm_migrate_copy_memory_gart(adev, src + i - j,
+							 dst + i - j, j + 1,
+							 FROM_RAM_TO_VRAM,
+							 mfence);
+			if (r)
+				goto out_free_vram_pages;
+			amdgpu_res_next(&cursor, (j + 1) * PAGE_SIZE);
+			j = 0;
+		} else {
+			j++;
+		}
+	}
+
+	r = svm_migrate_copy_memory_gart(adev, src + i - j, dst + i - j, j,
+					 FROM_RAM_TO_VRAM, mfence);
+
+out_free_vram_pages:
+	if (r) {
+		pr_debug("failed %d to copy memory to vram\n", r);
+		while (i--) {
+			svm_migrate_put_vram_page(adev, dst[i]);
+			migrate->dst[i] = 0;
+		}
+	}
+
+#ifdef DEBUG_FORCE_MIXED_DOMAINS
+	for (i = 0, j = 0; i < npages; i += 4, j++) {
+		if (j & 1)
+			continue;
+		svm_migrate_put_vram_page(adev, dst[i]);
+		migrate->dst[i] = 0;
+		svm_migrate_put_vram_page(adev, dst[i + 1]);
+		migrate->dst[i + 1] = 0;
+		svm_migrate_put_vram_page(adev, dst[i + 2]);
+		migrate->dst[i + 2] = 0;
+		svm_migrate_put_vram_page(adev, dst[i + 3]);
+		migrate->dst[i + 3] = 0;
+	}
+#endif
+
+	return r;
+}
+
+static long
+svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
+			struct vm_area_struct *vma, uint64_t start,
+			uint64_t end, uint32_t trigger, uint64_t ttm_res_offset)
+{
+	struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
+	uint64_t npages = (end - start) >> PAGE_SHIFT;
+	struct amdgpu_device *adev = node->adev;
+	struct kfd_process_device *pdd;
+	struct dma_fence *mfence = NULL;
+	struct migrate_vma migrate = { 0 };
+	unsigned long cpages = 0;
+	dma_addr_t *scratch;
+	void *buf;
+	int r = -ENOMEM;
+
+	memset(&migrate, 0, sizeof(migrate));
+	migrate.vma = vma;
+	migrate.start = start;
+	migrate.end = end;
+	migrate.flags = MIGRATE_VMA_SELECT_SYSTEM;
+	migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev);
+
+	buf = kvcalloc(npages,
+		       2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t),
+		       GFP_KERNEL);
+	if (!buf)
+		goto out;
+
+	migrate.src = buf;
+	migrate.dst = migrate.src + npages;
+	scratch = (dma_addr_t *)(migrate.dst + npages);
+
+	kfd_smi_event_migration_start(node, p->lead_thread->pid,
+				      start >> PAGE_SHIFT, end >> PAGE_SHIFT,
+				      0, node->id, prange->prefetch_loc,
+				      prange->preferred_loc, trigger);
+
+	r = migrate_vma_setup(&migrate);
+	if (r) {
+		dev_err(adev->dev, "%s: vma setup fail %d range [0x%lx 0x%lx]\n",
+			__func__, r, prange->start, prange->last);
+		goto out_free;
+	}
+
+	cpages = migrate.cpages;
+	if (!cpages) {
+		pr_debug("failed collect migrate sys pages [0x%lx 0x%lx]\n",
+			 prange->start, prange->last);
+		goto out_free;
+	}
+	if (cpages != npages)
+		pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
+			 cpages, npages);
+	else
+		pr_debug("0x%lx pages migrated\n", cpages);
+
+	r = svm_migrate_copy_to_vram(node, prange, &migrate, &mfence, scratch, ttm_res_offset);
+	migrate_vma_pages(&migrate);
+
+	pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
+		svm_migrate_successful_pages(&migrate), cpages, migrate.npages);
+
+	svm_migrate_copy_done(adev, mfence);
+	migrate_vma_finalize(&migrate);
+
+	kfd_smi_event_migration_end(node, p->lead_thread->pid,
+				    start >> PAGE_SHIFT, end >> PAGE_SHIFT,
+				    0, node->id, trigger);
+
+	svm_range_dma_unmap(adev->dev, scratch, 0, npages);
+
+out_free:
+	kvfree(buf);
+out:
+	if (!r && cpages) {
+		pdd = svm_range_get_pdd_by_node(prange, node);
+		if (pdd)
+			WRITE_ONCE(pdd->page_in, pdd->page_in + cpages);
+
+		return cpages;
+	}
+	return r;
+}
+
+/**
+ * svm_migrate_ram_to_vram - migrate svm range from system to device
+ * @prange: range structure
+ * @best_loc: the device to migrate to
+ * @mm: the process mm structure
+ * @trigger: reason of migration
+ *
+ * Context: Process context, caller hold mmap read lock, svms lock, prange lock
+ *
+ * Return:
+ * 0 - OK, otherwise error code
+ */
+static int
+svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
+			struct mm_struct *mm, uint32_t trigger)
+{
+	unsigned long addr, start, end;
+	struct vm_area_struct *vma;
+	uint64_t ttm_res_offset;
+	struct kfd_node *node;
+	unsigned long cpages = 0;
+	long r = 0;
+
+	if (prange->actual_loc == best_loc) {
+		pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
+			 prange->svms, prange->start, prange->last, best_loc);
+		return 0;
+	}
+
+	node = svm_range_get_node_by_id(prange, best_loc);
+	if (!node) {
+		pr_debug("failed to get kfd node by id 0x%x\n", best_loc);
+		return -ENODEV;
+	}
+
+	pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms,
+		 prange->start, prange->last, best_loc);
+
+	start = prange->start << PAGE_SHIFT;
+	end = (prange->last + 1) << PAGE_SHIFT;
+
+	r = svm_range_vram_node_new(node, prange, true);
+	if (r) {
+		dev_dbg(node->adev->dev, "fail %ld to alloc vram\n", r);
+		return r;
+	}
+	ttm_res_offset = prange->offset << PAGE_SHIFT;
+
+	for (addr = start; addr < end;) {
+		unsigned long next;
+
+		vma = vma_lookup(mm, addr);
+		if (!vma)
+			break;
+
+		next = min(vma->vm_end, end);
+		r = svm_migrate_vma_to_vram(node, prange, vma, addr, next, trigger, ttm_res_offset);
+		if (r < 0) {
+			pr_debug("failed %ld to migrate\n", r);
+			break;
+		} else {
+			cpages += r;
+		}
+		ttm_res_offset += next - addr;
+		addr = next;
+	}
+
+	if (cpages) {
+		prange->actual_loc = best_loc;
+		svm_range_free_dma_mappings(prange, true);
+	} else {
+		svm_range_vram_node_free(prange);
+	}
+
+	return r < 0 ? r : 0;
+}
+
+static void svm_migrate_page_free(struct page *page)
+{
+	struct svm_range_bo *svm_bo = page->zone_device_data;
+
+	if (svm_bo) {
+		pr_debug_ratelimited("ref: %d\n", kref_read(&svm_bo->kref));
+		svm_range_bo_unref_async(svm_bo);
+	}
+}
+
+static int
+svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
+			struct migrate_vma *migrate, struct dma_fence **mfence,
+			dma_addr_t *scratch, uint64_t npages)
+{
+	struct device *dev = adev->dev;
+	uint64_t *src;
+	dma_addr_t *dst;
+	struct page *dpage;
+	uint64_t i = 0, j;
+	uint64_t addr;
+	int r = 0;
+
+	pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
+		 prange->last);
+
+	addr = prange->start << PAGE_SHIFT;
+
+	src = (uint64_t *)(scratch + npages);
+	dst = scratch;
+
+	for (i = 0, j = 0; i < npages; i++, addr += PAGE_SIZE) {
+		struct page *spage;
+
+		spage = migrate_pfn_to_page(migrate->src[i]);
+		if (!spage || !is_zone_device_page(spage)) {
+			pr_debug("invalid page. Could be in CPU already svms 0x%p [0x%lx 0x%lx]\n",
+				 prange->svms, prange->start, prange->last);
+			if (j) {
+				r = svm_migrate_copy_memory_gart(adev, dst + i - j,
+								 src + i - j, j,
+								 FROM_VRAM_TO_RAM,
+								 mfence);
+				if (r)
+					goto out_oom;
+				j = 0;
+			}
+			continue;
+		}
+		src[i] = svm_migrate_addr(adev, spage);
+		if (j > 0 && src[i] != src[i - 1] + PAGE_SIZE) {
+			r = svm_migrate_copy_memory_gart(adev, dst + i - j,
+							 src + i - j, j,
+							 FROM_VRAM_TO_RAM,
+							 mfence);
+			if (r)
+				goto out_oom;
+			j = 0;
+		}
+
+		dpage = svm_migrate_get_sys_page(migrate->vma, addr);
+		if (!dpage) {
+			pr_debug("failed get page svms 0x%p [0x%lx 0x%lx]\n",
+				 prange->svms, prange->start, prange->last);
+			r = -ENOMEM;
+			goto out_oom;
+		}
+
+		dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+		r = dma_mapping_error(dev, dst[i]);
+		if (r) {
+			dev_err(adev->dev, "%s: fail %d dma_map_page\n", __func__, r);
+			goto out_oom;
+		}
+
+		pr_debug_ratelimited("dma mapping dst to 0x%llx, pfn 0x%lx\n",
+				     dst[i] >> PAGE_SHIFT, page_to_pfn(dpage));
+
+		migrate->dst[i] = migrate_pfn(page_to_pfn(dpage));
+		j++;
+	}
+
+	r = svm_migrate_copy_memory_gart(adev, dst + i - j, src + i - j, j,
+					 FROM_VRAM_TO_RAM, mfence);
+
+out_oom:
+	if (r) {
+		pr_debug("failed %d copy to ram\n", r);
+		while (i--) {
+			svm_migrate_put_sys_page(dst[i]);
+			migrate->dst[i] = 0;
+		}
+	}
+
+	return r;
+}
+
+/**
+ * svm_migrate_vma_to_ram - migrate range inside one vma from device to system
+ *
+ * @prange: svm range structure
+ * @vma: vm_area_struct that range [start, end] belongs to
+ * @start: range start virtual address in pages
+ * @end: range end virtual address in pages
+ * @node: kfd node device to migrate from
+ * @trigger: reason of migration
+ * @fault_page: is from vmf->page, svm_migrate_to_ram(), this is CPU page fault callback
+ *
+ * Context: Process context, caller hold mmap read lock, prange->migrate_mutex
+ *
+ * Return:
+ *   0 - success with all pages migrated
+ *   negative values - indicate error
+ *   positive values - partial migration, number of pages not migrated
+ */
+static long
+svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
+		       struct vm_area_struct *vma, uint64_t start, uint64_t end,
+		       uint32_t trigger, struct page *fault_page)
+{
+	struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
+	uint64_t npages = (end - start) >> PAGE_SHIFT;
+	unsigned long upages = npages;
+	unsigned long cpages = 0;
+	struct amdgpu_device *adev = node->adev;
+	struct kfd_process_device *pdd;
+	struct dma_fence *mfence = NULL;
+	struct migrate_vma migrate = { 0 };
+	dma_addr_t *scratch;
+	void *buf;
+	int r = -ENOMEM;
+
+	memset(&migrate, 0, sizeof(migrate));
+	migrate.vma = vma;
+	migrate.start = start;
+	migrate.end = end;
+	migrate.pgmap_owner = SVM_ADEV_PGMAP_OWNER(adev);
+	if (adev->gmc.xgmi.connected_to_cpu)
+		migrate.flags = MIGRATE_VMA_SELECT_DEVICE_COHERENT;
+	else
+		migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
+
+	buf = kvcalloc(npages,
+		       2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t),
+		       GFP_KERNEL);
+	if (!buf)
+		goto out;
+
+	migrate.src = buf;
+	migrate.dst = migrate.src + npages;
+	migrate.fault_page = fault_page;
+	scratch = (dma_addr_t *)(migrate.dst + npages);
+
+	kfd_smi_event_migration_start(node, p->lead_thread->pid,
+				      start >> PAGE_SHIFT, end >> PAGE_SHIFT,
+				      node->id, 0, prange->prefetch_loc,
+				      prange->preferred_loc, trigger);
+
+	r = migrate_vma_setup(&migrate);
+	if (r) {
+		dev_err(adev->dev, "%s: vma setup fail %d range [0x%lx 0x%lx]\n",
+			__func__, r, prange->start, prange->last);
+		goto out_free;
+	}
+
+	cpages = migrate.cpages;
+	if (!cpages) {
+		pr_debug("failed collect migrate device pages [0x%lx 0x%lx]\n",
+			 prange->start, prange->last);
+		upages = svm_migrate_unsuccessful_pages(&migrate);
+		goto out_free;
+	}
+	if (cpages != npages)
+		pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
+			 cpages, npages);
+	else
+		pr_debug("0x%lx pages migrated\n", cpages);
+
+	r = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence,
+				    scratch, npages);
+	migrate_vma_pages(&migrate);
+
+	upages = svm_migrate_unsuccessful_pages(&migrate);
+	pr_debug("unsuccessful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
+		 upages, cpages, migrate.npages);
+
+	svm_migrate_copy_done(adev, mfence);
+	migrate_vma_finalize(&migrate);
+
+	kfd_smi_event_migration_end(node, p->lead_thread->pid,
+				    start >> PAGE_SHIFT, end >> PAGE_SHIFT,
+				    node->id, 0, trigger);
+
+	svm_range_dma_unmap(adev->dev, scratch, 0, npages);
+
+out_free:
+	kvfree(buf);
+out:
+	if (!r && cpages) {
+		pdd = svm_range_get_pdd_by_node(prange, node);
+		if (pdd)
+			WRITE_ONCE(pdd->page_out, pdd->page_out + cpages);
+	}
+	return r ? r : upages;
+}
+
+/**
+ * svm_migrate_vram_to_ram - migrate svm range from device to system
+ * @prange: range structure
+ * @mm: process mm, use current->mm if NULL
+ * @trigger: reason of migration
+ * @fault_page: is from vmf->page, svm_migrate_to_ram(), this is CPU page fault callback
+ *
+ * Context: Process context, caller hold mmap read lock, prange->migrate_mutex
+ *
+ * Return:
+ * 0 - OK, otherwise error code
+ */
+int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
+			    uint32_t trigger, struct page *fault_page)
+{
+	struct kfd_node *node;
+	struct vm_area_struct *vma;
+	unsigned long addr;
+	unsigned long start;
+	unsigned long end;
+	unsigned long upages = 0;
+	long r = 0;
+
+	if (!prange->actual_loc) {
+		pr_debug("[0x%lx 0x%lx] already migrated to ram\n",
+			 prange->start, prange->last);
+		return 0;
+	}
+
+	node = svm_range_get_node_by_id(prange, prange->actual_loc);
+	if (!node) {
+		pr_debug("failed to get kfd node by id 0x%x\n", prange->actual_loc);
+		return -ENODEV;
+	}
+	pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] from gpu 0x%x to ram\n",
+		 prange->svms, prange, prange->start, prange->last,
+		 prange->actual_loc);
+
+	start = prange->start << PAGE_SHIFT;
+	end = (prange->last + 1) << PAGE_SHIFT;
+
+	for (addr = start; addr < end;) {
+		unsigned long next;
+
+		vma = vma_lookup(mm, addr);
+		if (!vma) {
+			pr_debug("failed to find vma for prange %p\n", prange);
+			r = -EFAULT;
+			break;
+		}
+
+		next = min(vma->vm_end, end);
+		r = svm_migrate_vma_to_ram(node, prange, vma, addr, next, trigger,
+			fault_page);
+		if (r < 0) {
+			pr_debug("failed %ld to migrate prange %p\n", r, prange);
+			break;
+		} else {
+			upages += r;
+		}
+		addr = next;
+	}
+
+	if (r >= 0 && !upages) {
+		svm_range_vram_node_free(prange);
+		prange->actual_loc = 0;
+	}
+
+	return r < 0 ? r : 0;
+}
+
+/**
+ * svm_migrate_vram_to_vram - migrate svm range from device to device
+ * @prange: range structure
+ * @best_loc: the device to migrate to
+ * @mm: process mm, use current->mm if NULL
+ * @trigger: reason of migration
+ *
+ * Context: Process context, caller hold mmap read lock, svms lock, prange lock
+ *
+ * Return:
+ * 0 - OK, otherwise error code
+ */
+static int
+svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
+			 struct mm_struct *mm, uint32_t trigger)
+{
+	int r, retries = 3;
+
+	/*
+	 * TODO: for both devices with PCIe large bar or on same xgmi hive, skip
+	 * system memory as migration bridge
+	 */
+
+	pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc);
+
+	do {
+		r = svm_migrate_vram_to_ram(prange, mm, trigger, NULL);
+		if (r)
+			return r;
+	} while (prange->actual_loc && --retries);
+
+	if (prange->actual_loc)
+		return -EDEADLK;
+
+	return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger);
+}
+
+int
+svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
+		    struct mm_struct *mm, uint32_t trigger)
+{
+	if  (!prange->actual_loc)
+		return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger);
+	else
+		return svm_migrate_vram_to_vram(prange, best_loc, mm, trigger);
+
+}
+
+/**
+ * svm_migrate_to_ram - CPU page fault handler
+ * @vmf: CPU vm fault vma, address
+ *
+ * Context: vm fault handler, caller holds the mmap read lock
+ *
+ * Return:
+ * 0 - OK
+ * VM_FAULT_SIGBUS - notice application to have SIGBUS page fault
+ */
+static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
+{
+	unsigned long addr = vmf->address;
+	struct svm_range_bo *svm_bo;
+	enum svm_work_list_ops op;
+	struct svm_range *parent;
+	struct svm_range *prange;
+	struct kfd_process *p;
+	struct mm_struct *mm;
+	int r = 0;
+
+	svm_bo = vmf->page->zone_device_data;
+	if (!svm_bo) {
+		pr_debug("failed get device page at addr 0x%lx\n", addr);
+		return VM_FAULT_SIGBUS;
+	}
+	if (!mmget_not_zero(svm_bo->eviction_fence->mm)) {
+		pr_debug("addr 0x%lx of process mm is destroyed\n", addr);
+		return VM_FAULT_SIGBUS;
+	}
+
+	mm = svm_bo->eviction_fence->mm;
+	if (mm != vmf->vma->vm_mm)
+		pr_debug("addr 0x%lx is COW mapping in child process\n", addr);
+
+	p = kfd_lookup_process_by_mm(mm);
+	if (!p) {
+		pr_debug("failed find process at fault address 0x%lx\n", addr);
+		r = VM_FAULT_SIGBUS;
+		goto out_mmput;
+	}
+	if (READ_ONCE(p->svms.faulting_task) == current) {
+		pr_debug("skipping ram migration\n");
+		r = 0;
+		goto out_unref_process;
+	}
+
+	pr_debug("CPU page fault svms 0x%p address 0x%lx\n", &p->svms, addr);
+	addr >>= PAGE_SHIFT;
+
+	mutex_lock(&p->svms.lock);
+
+	prange = svm_range_from_addr(&p->svms, addr, &parent);
+	if (!prange) {
+		pr_debug("failed get range svms 0x%p addr 0x%lx\n", &p->svms, addr);
+		r = -EFAULT;
+		goto out_unlock_svms;
+	}
+
+	mutex_lock(&parent->migrate_mutex);
+	if (prange != parent)
+		mutex_lock_nested(&prange->migrate_mutex, 1);
+
+	if (!prange->actual_loc)
+		goto out_unlock_prange;
+
+	svm_range_lock(parent);
+	if (prange != parent)
+		mutex_lock_nested(&prange->lock, 1);
+	r = svm_range_split_by_granularity(p, mm, addr, parent, prange);
+	if (prange != parent)
+		mutex_unlock(&prange->lock);
+	svm_range_unlock(parent);
+	if (r) {
+		pr_debug("failed %d to split range by granularity\n", r);
+		goto out_unlock_prange;
+	}
+
+	r = svm_migrate_vram_to_ram(prange, vmf->vma->vm_mm,
+				    KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU,
+				    vmf->page);
+	if (r)
+		pr_debug("failed %d migrate svms 0x%p range 0x%p [0x%lx 0x%lx]\n",
+			 r, prange->svms, prange, prange->start, prange->last);
+
+	/* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */
+	if (p->xnack_enabled && parent == prange)
+		op = SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP;
+	else
+		op = SVM_OP_UPDATE_RANGE_NOTIFIER;
+	svm_range_add_list_work(&p->svms, parent, mm, op);
+	schedule_deferred_list_work(&p->svms);
+
+out_unlock_prange:
+	if (prange != parent)
+		mutex_unlock(&prange->migrate_mutex);
+	mutex_unlock(&parent->migrate_mutex);
+out_unlock_svms:
+	mutex_unlock(&p->svms.lock);
+out_unref_process:
+	pr_debug("CPU fault svms 0x%p address 0x%lx done\n", &p->svms, addr);
+	kfd_unref_process(p);
+out_mmput:
+	mmput(mm);
+	return r ? VM_FAULT_SIGBUS : 0;
+}
+
+static const struct dev_pagemap_ops svm_migrate_pgmap_ops = {
+	.page_free		= svm_migrate_page_free,
+	.migrate_to_ram		= svm_migrate_to_ram,
+};
+
+/* Each VRAM page uses sizeof(struct page) on system memory */
+#define SVM_HMM_PAGE_STRUCT_SIZE(size) ((size)/PAGE_SIZE * sizeof(struct page))
+
+int kgd2kfd_init_zone_device(struct amdgpu_device *adev)
+{
+	struct amdgpu_kfd_dev *kfddev = &adev->kfd;
+	struct dev_pagemap *pgmap;
+	struct resource *res = NULL;
+	unsigned long size;
+	void *r;
+
+	/* Page migration works on gfx9 or newer */
+	if (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 0, 1))
+		return -EINVAL;
+
+	if (adev->gmc.is_app_apu)
+		return 0;
+
+	pgmap = &kfddev->pgmap;
+	memset(pgmap, 0, sizeof(*pgmap));
+
+	/* TODO: register all vram to HMM for now.
+	 * should remove reserved size
+	 */
+	size = ALIGN(adev->gmc.real_vram_size, 2ULL << 20);
+	if (adev->gmc.xgmi.connected_to_cpu) {
+		pgmap->range.start = adev->gmc.aper_base;
+		pgmap->range.end = adev->gmc.aper_base + adev->gmc.aper_size - 1;
+		pgmap->type = MEMORY_DEVICE_COHERENT;
+	} else {
+		res = devm_request_free_mem_region(adev->dev, &iomem_resource, size);
+		if (IS_ERR(res))
+			return PTR_ERR(res);
+		pgmap->range.start = res->start;
+		pgmap->range.end = res->end;
+		pgmap->type = MEMORY_DEVICE_PRIVATE;
+	}
+
+	pgmap->nr_range = 1;
+	pgmap->ops = &svm_migrate_pgmap_ops;
+	pgmap->owner = SVM_ADEV_PGMAP_OWNER(adev);
+	pgmap->flags = 0;
+	/* Device manager releases device-specific resources, memory region and
+	 * pgmap when driver disconnects from device.
+	 */
+	r = devm_memremap_pages(adev->dev, pgmap);
+	if (IS_ERR(r)) {
+		pr_err("failed to register HMM device memory\n");
+		if (pgmap->type == MEMORY_DEVICE_PRIVATE)
+			devm_release_mem_region(adev->dev, res->start, resource_size(res));
+		/* Disable SVM support capability */
+		pgmap->type = 0;
+		return PTR_ERR(r);
+	}
+
+	pr_debug("reserve %ldMB system memory for VRAM pages struct\n",
+		 SVM_HMM_PAGE_STRUCT_SIZE(size) >> 20);
+
+	amdgpu_amdkfd_reserve_system_mem(SVM_HMM_PAGE_STRUCT_SIZE(size));
+
+	pr_info("HMM registered %ldMB device memory\n", size >> 20);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
new file mode 100644
index 000000000..487f26368
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright 2020-2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef KFD_MIGRATE_H_
+#define KFD_MIGRATE_H_
+
+#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
+
+#include <linux/rwsem.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/sched/mm.h>
+#include <linux/hmm.h>
+#include "kfd_priv.h"
+#include "kfd_svm.h"
+
+enum MIGRATION_COPY_DIR {
+	FROM_RAM_TO_VRAM = 0,
+	FROM_VRAM_TO_RAM
+};
+
+int svm_migrate_to_vram(struct svm_range *prange,  uint32_t best_loc,
+			struct mm_struct *mm, uint32_t trigger);
+int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
+			    uint32_t trigger, struct page *fault_page);
+unsigned long
+svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr);
+
+#endif /* IS_ENABLED(CONFIG_HSA_AMD_SVM) */
+
+#endif /* KFD_MIGRATE_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
new file mode 100644
index 000000000..aee2212e5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/sched.h>
+#include <linux/device.h>
+#include "kfd_priv.h"
+#include "amdgpu_amdkfd.h"
+
+static int kfd_init(void)
+{
+	int err;
+
+	/* Verify module parameters */
+	if ((sched_policy < KFD_SCHED_POLICY_HWS) ||
+		(sched_policy > KFD_SCHED_POLICY_NO_HWS)) {
+		pr_err("sched_policy has invalid value\n");
+		return -EINVAL;
+	}
+
+	/* Verify module parameters */
+	if ((max_num_of_queues_per_device < 1) ||
+		(max_num_of_queues_per_device >
+			KFD_MAX_NUM_OF_QUEUES_PER_DEVICE)) {
+		pr_err("max_num_of_queues_per_device must be between 1 to KFD_MAX_NUM_OF_QUEUES_PER_DEVICE\n");
+		return -EINVAL;
+	}
+
+	err = kfd_chardev_init();
+	if (err < 0)
+		goto err_ioctl;
+
+	err = kfd_topology_init();
+	if (err < 0)
+		goto err_topology;
+
+	err = kfd_process_create_wq();
+	if (err < 0)
+		goto err_create_wq;
+
+	/* Ignore the return value, so that we can continue
+	 * to init the KFD, even if procfs isn't craated
+	 */
+	kfd_procfs_init();
+
+	kfd_debugfs_init();
+
+	return 0;
+
+err_create_wq:
+	kfd_topology_shutdown();
+err_topology:
+	kfd_chardev_exit();
+err_ioctl:
+	pr_err("KFD is disabled due to module initialization failure\n");
+	return err;
+}
+
+static void kfd_exit(void)
+{
+	kfd_cleanup_processes();
+	kfd_debugfs_fini();
+	kfd_process_destroy_wq();
+	kfd_procfs_shutdown();
+	kfd_topology_shutdown();
+	kfd_chardev_exit();
+}
+
+int kgd2kfd_init(void)
+{
+	return kfd_init();
+}
+
+void kgd2kfd_exit(void)
+{
+	kfd_exit();
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
new file mode 100644
index 000000000..447829c22
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -0,0 +1,292 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "kfd_mqd_manager.h"
+#include "amdgpu_amdkfd.h"
+#include "kfd_device_queue_manager.h"
+
+/* Mapping queue priority to pipe priority, indexed by queue priority */
+int pipe_priority_map[] = {
+	KFD_PIPE_PRIORITY_CS_LOW,
+	KFD_PIPE_PRIORITY_CS_LOW,
+	KFD_PIPE_PRIORITY_CS_LOW,
+	KFD_PIPE_PRIORITY_CS_LOW,
+	KFD_PIPE_PRIORITY_CS_LOW,
+	KFD_PIPE_PRIORITY_CS_LOW,
+	KFD_PIPE_PRIORITY_CS_LOW,
+	KFD_PIPE_PRIORITY_CS_MEDIUM,
+	KFD_PIPE_PRIORITY_CS_MEDIUM,
+	KFD_PIPE_PRIORITY_CS_MEDIUM,
+	KFD_PIPE_PRIORITY_CS_MEDIUM,
+	KFD_PIPE_PRIORITY_CS_HIGH,
+	KFD_PIPE_PRIORITY_CS_HIGH,
+	KFD_PIPE_PRIORITY_CS_HIGH,
+	KFD_PIPE_PRIORITY_CS_HIGH,
+	KFD_PIPE_PRIORITY_CS_HIGH
+};
+
+struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_node *dev, struct queue_properties *q)
+{
+	struct kfd_mem_obj *mqd_mem_obj;
+
+	mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
+	if (!mqd_mem_obj)
+		return NULL;
+
+	mqd_mem_obj->gtt_mem = dev->dqm->hiq_sdma_mqd.gtt_mem;
+	mqd_mem_obj->gpu_addr = dev->dqm->hiq_sdma_mqd.gpu_addr;
+	mqd_mem_obj->cpu_ptr = dev->dqm->hiq_sdma_mqd.cpu_ptr;
+
+	return mqd_mem_obj;
+}
+
+struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_node *dev,
+					struct queue_properties *q)
+{
+	struct kfd_mem_obj *mqd_mem_obj;
+	uint64_t offset;
+
+	mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
+	if (!mqd_mem_obj)
+		return NULL;
+
+	offset = (q->sdma_engine_id *
+		dev->kfd->device_info.num_sdma_queues_per_engine +
+		q->sdma_queue_id) *
+		dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size;
+
+	offset += dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size *
+		  NUM_XCC(dev->xcc_mask);
+
+	mqd_mem_obj->gtt_mem = (void *)((uint64_t)dev->dqm->hiq_sdma_mqd.gtt_mem
+				+ offset);
+	mqd_mem_obj->gpu_addr = dev->dqm->hiq_sdma_mqd.gpu_addr + offset;
+	mqd_mem_obj->cpu_ptr = (uint32_t *)((uint64_t)
+				dev->dqm->hiq_sdma_mqd.cpu_ptr + offset);
+
+	return mqd_mem_obj;
+}
+
+void free_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd,
+			struct kfd_mem_obj *mqd_mem_obj)
+{
+	WARN_ON(!mqd_mem_obj->gtt_mem);
+	kfree(mqd_mem_obj);
+}
+
+void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
+		const uint32_t *cu_mask, uint32_t cu_mask_count,
+		uint32_t *se_mask, uint32_t inst)
+{
+	struct kfd_cu_info cu_info;
+	uint32_t cu_per_sh[KFD_MAX_NUM_SE][KFD_MAX_NUM_SH_PER_SE] = {0};
+	bool wgp_mode_req = KFD_GC_VERSION(mm->dev) >= IP_VERSION(10, 0, 0);
+	uint32_t en_mask = wgp_mode_req ? 0x3 : 0x1;
+	int i, se, sh, cu, cu_bitmap_sh_mul, cu_inc = wgp_mode_req ? 2 : 1;
+	uint32_t cu_active_per_node;
+	int inc = cu_inc * NUM_XCC(mm->dev->xcc_mask);
+	int xcc_inst = inst + ffs(mm->dev->xcc_mask) - 1;
+
+	amdgpu_amdkfd_get_cu_info(mm->dev->adev, &cu_info);
+
+	cu_active_per_node = cu_info.cu_active_number / mm->dev->kfd->num_nodes;
+	if (cu_mask_count > cu_active_per_node)
+		cu_mask_count = cu_active_per_node;
+
+	/* Exceeding these bounds corrupts the stack and indicates a coding error.
+	 * Returning with no CU's enabled will hang the queue, which should be
+	 * attention grabbing.
+	 */
+	if (cu_info.num_shader_engines > KFD_MAX_NUM_SE) {
+		pr_err("Exceeded KFD_MAX_NUM_SE, chip reports %d\n", cu_info.num_shader_engines);
+		return;
+	}
+	if (cu_info.num_shader_arrays_per_engine > KFD_MAX_NUM_SH_PER_SE) {
+		pr_err("Exceeded KFD_MAX_NUM_SH, chip reports %d\n",
+			cu_info.num_shader_arrays_per_engine * cu_info.num_shader_engines);
+		return;
+	}
+
+	cu_bitmap_sh_mul = (KFD_GC_VERSION(mm->dev) >= IP_VERSION(11, 0, 0) &&
+			    KFD_GC_VERSION(mm->dev) < IP_VERSION(12, 0, 0)) ? 2 : 1;
+
+	/* Count active CUs per SH.
+	 *
+	 * Some CUs in an SH may be disabled.	HW expects disabled CUs to be
+	 * represented in the high bits of each SH's enable mask (the upper and lower
+	 * 16 bits of se_mask) and will take care of the actual distribution of
+	 * disabled CUs within each SH automatically.
+	 * Each half of se_mask must be filled only on bits 0-cu_per_sh[se][sh]-1.
+	 *
+	 * See note on Arcturus cu_bitmap layout in gfx_v9_0_get_cu_info.
+	 * See note on GFX11 cu_bitmap layout in gfx_v11_0_get_cu_info.
+	 */
+	for (se = 0; se < cu_info.num_shader_engines; se++)
+		for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++)
+			cu_per_sh[se][sh] = hweight32(
+				cu_info.cu_bitmap[xcc_inst][se % 4][sh + (se / 4) *
+				cu_bitmap_sh_mul]);
+
+	/* Symmetrically map cu_mask to all SEs & SHs:
+	 * se_mask programs up to 2 SH in the upper and lower 16 bits.
+	 *
+	 * Examples
+	 * Assuming 1 SH/SE, 4 SEs:
+	 * cu_mask[0] bit0 -> se_mask[0] bit0
+	 * cu_mask[0] bit1 -> se_mask[1] bit0
+	 * ...
+	 * cu_mask[0] bit4 -> se_mask[0] bit1
+	 * ...
+	 *
+	 * Assuming 2 SH/SE, 4 SEs
+	 * cu_mask[0] bit0 -> se_mask[0] bit0 (SE0,SH0,CU0)
+	 * cu_mask[0] bit1 -> se_mask[1] bit0 (SE1,SH0,CU0)
+	 * ...
+	 * cu_mask[0] bit4 -> se_mask[0] bit16 (SE0,SH1,CU0)
+	 * cu_mask[0] bit5 -> se_mask[1] bit16 (SE1,SH1,CU0)
+	 * ...
+	 * cu_mask[0] bit8 -> se_mask[0] bit1 (SE0,SH0,CU1)
+	 * ...
+	 *
+	 * For GFX 9.4.3, the following code only looks at a
+	 * subset of the cu_mask corresponding to the inst parameter.
+	 * If we have n XCCs under one GPU node
+	 * cu_mask[0] bit0 -> XCC0 se_mask[0] bit0 (XCC0,SE0,SH0,CU0)
+	 * cu_mask[0] bit1 -> XCC1 se_mask[0] bit0 (XCC1,SE0,SH0,CU0)
+	 * ..
+	 * cu_mask[0] bitn -> XCCn se_mask[0] bit0 (XCCn,SE0,SH0,CU0)
+	 * cu_mask[0] bit n+1 -> XCC0 se_mask[1] bit0 (XCC0,SE1,SH0,CU0)
+	 *
+	 * For example, if there are 6 XCCs under 1 KFD node, this code
+	 * running for each inst, will look at the bits as:
+	 * inst, inst + 6, inst + 12...
+	 *
+	 * First ensure all CUs are disabled, then enable user specified CUs.
+	 */
+	for (i = 0; i < cu_info.num_shader_engines; i++)
+		se_mask[i] = 0;
+
+	i = inst;
+	for (cu = 0; cu < 16; cu += cu_inc) {
+		for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++) {
+			for (se = 0; se < cu_info.num_shader_engines; se++) {
+				if (cu_per_sh[se][sh] > cu) {
+					if (cu_mask[i / 32] & (en_mask << (i % 32)))
+						se_mask[se] |= en_mask << (cu + sh * 16);
+					i += inc;
+					if (i >= cu_mask_count)
+						return;
+				}
+			}
+		}
+	}
+}
+
+int kfd_hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
+		     uint32_t pipe_id, uint32_t queue_id,
+		     struct queue_properties *p, struct mm_struct *mms)
+{
+	return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->adev, mqd, pipe_id,
+					      queue_id, p->doorbell_off, 0);
+}
+
+int kfd_destroy_mqd_cp(struct mqd_manager *mm, void *mqd,
+		enum kfd_preempt_type type, unsigned int timeout,
+		uint32_t pipe_id, uint32_t queue_id)
+{
+	return mm->dev->kfd2kgd->hqd_destroy(mm->dev->adev, mqd, type, timeout,
+						pipe_id, queue_id, 0);
+}
+
+void kfd_free_mqd_cp(struct mqd_manager *mm, void *mqd,
+	      struct kfd_mem_obj *mqd_mem_obj)
+{
+	if (mqd_mem_obj->gtt_mem) {
+		amdgpu_amdkfd_free_gtt_mem(mm->dev->adev, mqd_mem_obj->gtt_mem);
+		kfree(mqd_mem_obj);
+	} else {
+		kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
+	}
+}
+
+bool kfd_is_occupied_cp(struct mqd_manager *mm, void *mqd,
+		 uint64_t queue_address, uint32_t pipe_id,
+		 uint32_t queue_id)
+{
+	return mm->dev->kfd2kgd->hqd_is_occupied(mm->dev->adev, queue_address,
+						pipe_id, queue_id, 0);
+}
+
+int kfd_load_mqd_sdma(struct mqd_manager *mm, void *mqd,
+		  uint32_t pipe_id, uint32_t queue_id,
+		  struct queue_properties *p, struct mm_struct *mms)
+{
+	return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->adev, mqd,
+						(uint32_t __user *)p->write_ptr,
+						mms);
+}
+
+/*
+ * preempt type here is ignored because there is only one way
+ * to preempt sdma queue
+ */
+int kfd_destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
+		     enum kfd_preempt_type type,
+		     unsigned int timeout, uint32_t pipe_id,
+		     uint32_t queue_id)
+{
+	return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->adev, mqd, timeout);
+}
+
+bool kfd_is_occupied_sdma(struct mqd_manager *mm, void *mqd,
+		      uint64_t queue_address, uint32_t pipe_id,
+		      uint32_t queue_id)
+{
+	return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->adev, mqd);
+}
+
+uint64_t kfd_hiq_mqd_stride(struct kfd_node *dev)
+{
+	return dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
+}
+
+void kfd_get_hiq_xcc_mqd(struct kfd_node *dev, struct kfd_mem_obj *mqd_mem_obj,
+		     uint32_t virtual_xcc_id)
+{
+	uint64_t offset;
+
+	offset = kfd_hiq_mqd_stride(dev) * virtual_xcc_id;
+
+	mqd_mem_obj->gtt_mem = (virtual_xcc_id == 0) ?
+			dev->dqm->hiq_sdma_mqd.gtt_mem : NULL;
+	mqd_mem_obj->gpu_addr = dev->dqm->hiq_sdma_mqd.gpu_addr + offset;
+	mqd_mem_obj->cpu_ptr = (uint32_t *)((uintptr_t)
+				dev->dqm->hiq_sdma_mqd.cpu_ptr + offset);
+}
+
+uint64_t kfd_mqd_stride(struct mqd_manager *mm,
+			struct queue_properties *q)
+{
+	return mm->mqd_size;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
new file mode 100644
index 000000000..57bf5e513
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
@@ -0,0 +1,176 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef KFD_MQD_MANAGER_H_
+#define KFD_MQD_MANAGER_H_
+
+#include "kfd_priv.h"
+
+#define KFD_MAX_NUM_SE 8
+#define KFD_MAX_NUM_SH_PER_SE 2
+
+/**
+ * struct mqd_manager
+ *
+ * @init_mqd: Allocates the mqd buffer on local gpu memory and initialize it.
+ *
+ * @load_mqd: Loads the mqd to a concrete hqd slot. Used only for no cp
+ * scheduling mode.
+ *
+ * @update_mqd: Handles a update call for the MQD
+ *
+ * @destroy_mqd: Destroys the HQD slot and by that preempt the relevant queue.
+ * Used only for no cp scheduling.
+ *
+ * @free_mqd: Releases the mqd buffer from local gpu memory.
+ *
+ * @is_occupied: Checks if the relevant HQD slot is occupied.
+ *
+ * @get_wave_state: Retrieves context save state and optionally copies the
+ * control stack, if kept in the MQD, to the given userspace address.
+ *
+ * @mqd_mutex: Mqd manager mutex.
+ *
+ * @dev: The kfd device structure coupled with this module.
+ *
+ * MQD stands for Memory Queue Descriptor which represents the current queue
+ * state in the memory and initiate the HQD (Hardware Queue Descriptor) state.
+ * This structure is actually a base class for the different types of MQDs
+ * structures for the variant ASICs that should be supported in the future.
+ * This base class is also contains all the MQD specific operations.
+ * Another important thing to mention is that each queue has a MQD that keeps
+ * his state (or context) after each preemption or reassignment.
+ * Basically there are a instances of the mqd manager class per MQD type per
+ * ASIC. Currently the kfd driver supports only Kaveri so there are instances
+ * per KFD_MQD_TYPE for each device.
+ *
+ */
+extern int pipe_priority_map[];
+struct mqd_manager {
+	struct kfd_mem_obj*	(*allocate_mqd)(struct kfd_node *kfd,
+		struct queue_properties *q);
+
+	void	(*init_mqd)(struct mqd_manager *mm, void **mqd,
+			struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+			struct queue_properties *q);
+
+	int	(*load_mqd)(struct mqd_manager *mm, void *mqd,
+				uint32_t pipe_id, uint32_t queue_id,
+				struct queue_properties *p,
+				struct mm_struct *mms);
+
+	void	(*update_mqd)(struct mqd_manager *mm, void *mqd,
+				struct queue_properties *q,
+				struct mqd_update_info *minfo);
+
+	int	(*destroy_mqd)(struct mqd_manager *mm, void *mqd,
+				enum kfd_preempt_type type,
+				unsigned int timeout, uint32_t pipe_id,
+				uint32_t queue_id);
+
+	void	(*free_mqd)(struct mqd_manager *mm, void *mqd,
+				struct kfd_mem_obj *mqd_mem_obj);
+
+	bool	(*is_occupied)(struct mqd_manager *mm, void *mqd,
+				uint64_t queue_address,	uint32_t pipe_id,
+				uint32_t queue_id);
+
+	int	(*get_wave_state)(struct mqd_manager *mm, void *mqd,
+				  struct queue_properties *q,
+				  void __user *ctl_stack,
+				  u32 *ctl_stack_used_size,
+				  u32 *save_area_used_size);
+
+	void	(*get_checkpoint_info)(struct mqd_manager *mm, void *mqd, uint32_t *ctl_stack_size);
+
+	void	(*checkpoint_mqd)(struct mqd_manager *mm,
+				  void *mqd,
+				  void *mqd_dst,
+				  void *ctl_stack_dst);
+
+	void	(*restore_mqd)(struct mqd_manager *mm, void **mqd,
+				struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+				struct queue_properties *p,
+				const void *mqd_src,
+				const void *ctl_stack_src,
+				const u32 ctl_stack_size);
+
+#if defined(CONFIG_DEBUG_FS)
+	int	(*debugfs_show_mqd)(struct seq_file *m, void *data);
+#endif
+	uint32_t (*read_doorbell_id)(void *mqd);
+	uint64_t (*mqd_stride)(struct mqd_manager *mm,
+				struct queue_properties *p);
+
+	struct mutex	mqd_mutex;
+	struct kfd_node	*dev;
+	uint32_t mqd_size;
+};
+
+struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_node *dev,
+				struct queue_properties *q);
+
+struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_node *dev,
+					struct queue_properties *q);
+void free_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd,
+				struct kfd_mem_obj *mqd_mem_obj);
+
+void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
+		const uint32_t *cu_mask, uint32_t cu_mask_count,
+		uint32_t *se_mask, uint32_t inst);
+
+int kfd_hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
+		uint32_t pipe_id, uint32_t queue_id,
+		struct queue_properties *p, struct mm_struct *mms);
+
+int kfd_destroy_mqd_cp(struct mqd_manager *mm, void *mqd,
+		enum kfd_preempt_type type, unsigned int timeout,
+		uint32_t pipe_id, uint32_t queue_id);
+
+void kfd_free_mqd_cp(struct mqd_manager *mm, void *mqd,
+		struct kfd_mem_obj *mqd_mem_obj);
+
+bool kfd_is_occupied_cp(struct mqd_manager *mm, void *mqd,
+		 uint64_t queue_address, uint32_t pipe_id,
+		 uint32_t queue_id);
+
+int kfd_load_mqd_sdma(struct mqd_manager *mm, void *mqd,
+		uint32_t pipe_id, uint32_t queue_id,
+		struct queue_properties *p, struct mm_struct *mms);
+
+int kfd_destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
+		enum kfd_preempt_type type, unsigned int timeout,
+		uint32_t pipe_id, uint32_t queue_id);
+
+bool kfd_is_occupied_sdma(struct mqd_manager *mm, void *mqd,
+		uint64_t queue_address, uint32_t pipe_id,
+		uint32_t queue_id);
+
+void kfd_get_hiq_xcc_mqd(struct kfd_node *dev,
+		struct kfd_mem_obj *mqd_mem_obj, uint32_t virtual_xcc_id);
+
+uint64_t kfd_hiq_mqd_stride(struct kfd_node *dev);
+uint64_t kfd_mqd_stride(struct mqd_manager *mm,
+			struct queue_properties *q);
+#endif /* KFD_MQD_MANAGER_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
new file mode 100644
index 000000000..1a4a69943
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -0,0 +1,464 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/mm_types.h>
+
+#include "kfd_priv.h"
+#include "kfd_mqd_manager.h"
+#include "cik_regs.h"
+#include "cik_structs.h"
+#include "oss/oss_2_4_sh_mask.h"
+
+static inline struct cik_mqd *get_mqd(void *mqd)
+{
+	return (struct cik_mqd *)mqd;
+}
+
+static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
+{
+	return (struct cik_sdma_rlc_registers *)mqd;
+}
+
+static void update_cu_mask(struct mqd_manager *mm, void *mqd,
+			struct mqd_update_info *minfo)
+{
+	struct cik_mqd *m;
+	uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */
+
+	if (!minfo || !minfo->cu_mask.ptr)
+		return;
+
+	mqd_symmetrically_map_cu_mask(mm,
+		minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, 0);
+
+	m = get_mqd(mqd);
+	m->compute_static_thread_mgmt_se0 = se_mask[0];
+	m->compute_static_thread_mgmt_se1 = se_mask[1];
+	m->compute_static_thread_mgmt_se2 = se_mask[2];
+	m->compute_static_thread_mgmt_se3 = se_mask[3];
+
+	pr_debug("Update cu mask to %#x %#x %#x %#x\n",
+		m->compute_static_thread_mgmt_se0,
+		m->compute_static_thread_mgmt_se1,
+		m->compute_static_thread_mgmt_se2,
+		m->compute_static_thread_mgmt_se3);
+}
+
+static void set_priority(struct cik_mqd *m, struct queue_properties *q)
+{
+	m->cp_hqd_pipe_priority = pipe_priority_map[q->priority];
+	m->cp_hqd_queue_priority = q->priority;
+}
+
+static struct kfd_mem_obj *allocate_mqd(struct kfd_node *kfd,
+					struct queue_properties *q)
+{
+	struct kfd_mem_obj *mqd_mem_obj;
+
+	if (kfd_gtt_sa_allocate(kfd, sizeof(struct cik_mqd),
+			&mqd_mem_obj))
+		return NULL;
+
+	return mqd_mem_obj;
+}
+
+static void init_mqd(struct mqd_manager *mm, void **mqd,
+		struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+		struct queue_properties *q)
+{
+	uint64_t addr;
+	struct cik_mqd *m;
+
+	m = (struct cik_mqd *) mqd_mem_obj->cpu_ptr;
+	addr = mqd_mem_obj->gpu_addr;
+
+	memset(m, 0, ALIGN(sizeof(struct cik_mqd), 256));
+
+	m->header = 0xC0310800;
+	m->compute_pipelinestat_enable = 1;
+	m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
+	m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
+	m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
+	m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
+
+	/*
+	 * Make sure to use the last queue state saved on mqd when the cp
+	 * reassigns the queue, so when queue is switched on/off (e.g over
+	 * subscription or quantum timeout) the context will be consistent
+	 */
+	m->cp_hqd_persistent_state =
+				DEFAULT_CP_HQD_PERSISTENT_STATE | PRELOAD_REQ;
+
+	m->cp_mqd_control             = MQD_CONTROL_PRIV_STATE_EN;
+	m->cp_mqd_base_addr_lo        = lower_32_bits(addr);
+	m->cp_mqd_base_addr_hi        = upper_32_bits(addr);
+
+	m->cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS |
+				QUANTUM_DURATION(10);
+
+	/*
+	 * Pipe Priority
+	 * Identifies the pipe relative priority when this queue is connected
+	 * to the pipeline. The pipe priority is against the GFX pipe and HP3D.
+	 * In KFD we are using a fixed pipe priority set to CS_MEDIUM.
+	 * 0 = CS_LOW (typically below GFX)
+	 * 1 = CS_MEDIUM (typically between HP3D and GFX
+	 * 2 = CS_HIGH (typically above HP3D)
+	 */
+	set_priority(m, q);
+
+	if (q->format == KFD_QUEUE_FORMAT_AQL)
+		m->cp_hqd_iq_rptr = AQL_ENABLE;
+
+	*mqd = m;
+	if (gart_addr)
+		*gart_addr = addr;
+	mm->update_mqd(mm, m, q, NULL);
+}
+
+static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
+			struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+			struct queue_properties *q)
+{
+	struct cik_sdma_rlc_registers *m;
+
+	m = (struct cik_sdma_rlc_registers *) mqd_mem_obj->cpu_ptr;
+
+	memset(m, 0, sizeof(struct cik_sdma_rlc_registers));
+
+	*mqd = m;
+	if (gart_addr)
+		*gart_addr = mqd_mem_obj->gpu_addr;
+
+	mm->update_mqd(mm, m, q, NULL);
+}
+
+static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id,
+		    uint32_t queue_id, struct queue_properties *p,
+		    struct mm_struct *mms)
+{
+	/* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
+	uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
+	uint32_t wptr_mask = (uint32_t)((p->queue_size / 4) - 1);
+
+	return mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id,
+					  (uint32_t __user *)p->write_ptr,
+					  wptr_shift, wptr_mask, mms, 0);
+}
+
+static void __update_mqd(struct mqd_manager *mm, void *mqd,
+			struct queue_properties *q, struct mqd_update_info *minfo,
+			unsigned int atc_bit)
+{
+	struct cik_mqd *m;
+
+	m = get_mqd(mqd);
+	m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE |
+				DEFAULT_MIN_AVAIL_SIZE;
+	m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE;
+	if (atc_bit) {
+		m->cp_hqd_pq_control |= PQ_ATC_EN;
+		m->cp_hqd_ib_control |= IB_ATC_EN;
+	}
+
+	/*
+	 * Calculating queue size which is log base 2 of actual queue size -1
+	 * dwords and another -1 for ffs
+	 */
+	m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1;
+	m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
+	m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
+	m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+	m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+	m->cp_hqd_pq_doorbell_control = DOORBELL_OFFSET(q->doorbell_off);
+
+	m->cp_hqd_vmid = q->vmid;
+
+	if (q->format == KFD_QUEUE_FORMAT_AQL)
+		m->cp_hqd_pq_control |= NO_UPDATE_RPTR;
+
+	update_cu_mask(mm, mqd, minfo);
+	set_priority(m, q);
+
+	q->is_active = QUEUE_IS_ACTIVE(*q);
+}
+
+static uint32_t read_doorbell_id(void *mqd)
+{
+	struct cik_mqd *m = (struct cik_mqd *)mqd;
+
+	return m->queue_doorbell_id0;
+}
+
+static void update_mqd(struct mqd_manager *mm, void *mqd,
+		       struct queue_properties *q,
+		       struct mqd_update_info *minfo)
+{
+	__update_mqd(mm, mqd, q, minfo, 0);
+}
+
+static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
+			struct queue_properties *q,
+			struct mqd_update_info *minfo)
+{
+	struct cik_sdma_rlc_registers *m;
+
+	m = get_sdma_mqd(mqd);
+	m->sdma_rlc_rb_cntl = order_base_2(q->queue_size / 4)
+			<< SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
+			q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT |
+			1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
+			6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT;
+
+	m->sdma_rlc_rb_base = lower_32_bits(q->queue_address >> 8);
+	m->sdma_rlc_rb_base_hi = upper_32_bits(q->queue_address >> 8);
+	m->sdma_rlc_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+	m->sdma_rlc_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+	m->sdma_rlc_doorbell =
+		q->doorbell_off << SDMA0_RLC0_DOORBELL__OFFSET__SHIFT;
+
+	m->sdma_rlc_virtual_addr = q->sdma_vm_addr;
+
+	m->sdma_engine_id = q->sdma_engine_id;
+	m->sdma_queue_id = q->sdma_queue_id;
+
+	q->is_active = QUEUE_IS_ACTIVE(*q);
+}
+
+static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst)
+{
+	struct cik_mqd *m;
+
+	m = get_mqd(mqd);
+
+	memcpy(mqd_dst, m, sizeof(struct cik_mqd));
+}
+
+static void restore_mqd(struct mqd_manager *mm, void **mqd,
+			struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+			struct queue_properties *qp,
+			const void *mqd_src,
+			const void *ctl_stack_src, const u32 ctl_stack_size)
+{
+	uint64_t addr;
+	struct cik_mqd *m;
+
+	m = (struct cik_mqd *) mqd_mem_obj->cpu_ptr;
+	addr = mqd_mem_obj->gpu_addr;
+
+	memcpy(m, mqd_src, sizeof(*m));
+
+	*mqd = m;
+	if (gart_addr)
+		*gart_addr = addr;
+
+	m->cp_hqd_pq_doorbell_control = DOORBELL_OFFSET(qp->doorbell_off);
+
+	pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
+			m->cp_hqd_pq_doorbell_control);
+
+	qp->is_active = 0;
+}
+
+static void checkpoint_mqd_sdma(struct mqd_manager *mm,
+				void *mqd,
+				void *mqd_dst,
+				void *ctl_stack_dst)
+{
+	struct cik_sdma_rlc_registers *m;
+
+	m = get_sdma_mqd(mqd);
+
+	memcpy(mqd_dst, m, sizeof(struct cik_sdma_rlc_registers));
+}
+
+static void restore_mqd_sdma(struct mqd_manager *mm, void **mqd,
+				struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+				struct queue_properties *qp,
+				const void *mqd_src,
+				const void *ctl_stack_src, const u32 ctl_stack_size)
+{
+	uint64_t addr;
+	struct cik_sdma_rlc_registers *m;
+
+	m = (struct cik_sdma_rlc_registers *) mqd_mem_obj->cpu_ptr;
+	addr = mqd_mem_obj->gpu_addr;
+
+	memcpy(m, mqd_src, sizeof(*m));
+
+	m->sdma_rlc_doorbell =
+		qp->doorbell_off << SDMA0_RLC0_DOORBELL__OFFSET__SHIFT;
+
+	*mqd = m;
+	if (gart_addr)
+		*gart_addr = addr;
+
+	qp->is_active = 0;
+}
+
+/*
+ * HIQ MQD Implementation, concrete implementation for HIQ MQD implementation.
+ * The HIQ queue in Kaveri is using the same MQD structure as all the user mode
+ * queues but with different initial values.
+ */
+
+static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
+		struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+		struct queue_properties *q)
+{
+	init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q);
+}
+
+static void update_mqd_hiq(struct mqd_manager *mm, void *mqd,
+			struct queue_properties *q,
+			struct mqd_update_info *minfo)
+{
+	struct cik_mqd *m;
+
+	m = get_mqd(mqd);
+	m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE |
+				DEFAULT_MIN_AVAIL_SIZE |
+				PRIV_STATE |
+				KMD_QUEUE;
+
+	/*
+	 * Calculating queue size which is log base 2 of actual queue
+	 * size -1 dwords
+	 */
+	m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1;
+	m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
+	m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
+	m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+	m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+	m->cp_hqd_pq_doorbell_control = DOORBELL_OFFSET(q->doorbell_off);
+
+	m->cp_hqd_vmid = q->vmid;
+
+	q->is_active = QUEUE_IS_ACTIVE(*q);
+
+	set_priority(m, q);
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+static int debugfs_show_mqd(struct seq_file *m, void *data)
+{
+	seq_hex_dump(m, "    ", DUMP_PREFIX_OFFSET, 32, 4,
+		     data, sizeof(struct cik_mqd), false);
+	return 0;
+}
+
+static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
+{
+	seq_hex_dump(m, "    ", DUMP_PREFIX_OFFSET, 32, 4,
+		     data, sizeof(struct cik_sdma_rlc_registers), false);
+	return 0;
+}
+
+#endif
+
+struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
+		struct kfd_node *dev)
+{
+	struct mqd_manager *mqd;
+
+	if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
+		return NULL;
+
+	mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
+	if (!mqd)
+		return NULL;
+
+	mqd->dev = dev;
+
+	switch (type) {
+	case KFD_MQD_TYPE_CP:
+		mqd->allocate_mqd = allocate_mqd;
+		mqd->init_mqd = init_mqd;
+		mqd->free_mqd = kfd_free_mqd_cp;
+		mqd->load_mqd = load_mqd;
+		mqd->update_mqd = update_mqd;
+		mqd->destroy_mqd = kfd_destroy_mqd_cp;
+		mqd->is_occupied = kfd_is_occupied_cp;
+		mqd->checkpoint_mqd = checkpoint_mqd;
+		mqd->restore_mqd = restore_mqd;
+		mqd->mqd_size = sizeof(struct cik_mqd);
+#if defined(CONFIG_DEBUG_FS)
+		mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+		break;
+	case KFD_MQD_TYPE_HIQ:
+		mqd->allocate_mqd = allocate_hiq_mqd;
+		mqd->init_mqd = init_mqd_hiq;
+		mqd->free_mqd = free_mqd_hiq_sdma;
+		mqd->load_mqd = load_mqd;
+		mqd->update_mqd = update_mqd_hiq;
+		mqd->destroy_mqd = kfd_destroy_mqd_cp;
+		mqd->is_occupied = kfd_is_occupied_cp;
+		mqd->mqd_size = sizeof(struct cik_mqd);
+		mqd->mqd_stride = kfd_mqd_stride;
+#if defined(CONFIG_DEBUG_FS)
+		mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+		mqd->read_doorbell_id = read_doorbell_id;
+		break;
+	case KFD_MQD_TYPE_DIQ:
+		mqd->allocate_mqd = allocate_mqd;
+		mqd->init_mqd = init_mqd_hiq;
+		mqd->free_mqd = kfd_free_mqd_cp;
+		mqd->load_mqd = load_mqd;
+		mqd->update_mqd = update_mqd_hiq;
+		mqd->destroy_mqd = kfd_destroy_mqd_cp;
+		mqd->is_occupied = kfd_is_occupied_cp;
+		mqd->mqd_size = sizeof(struct cik_mqd);
+		mqd->mqd_stride = kfd_mqd_stride;
+#if defined(CONFIG_DEBUG_FS)
+		mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+		break;
+	case KFD_MQD_TYPE_SDMA:
+		mqd->allocate_mqd = allocate_sdma_mqd;
+		mqd->init_mqd = init_mqd_sdma;
+		mqd->free_mqd = free_mqd_hiq_sdma;
+		mqd->load_mqd = kfd_load_mqd_sdma;
+		mqd->update_mqd = update_mqd_sdma;
+		mqd->destroy_mqd = kfd_destroy_mqd_sdma;
+		mqd->is_occupied = kfd_is_occupied_sdma;
+		mqd->checkpoint_mqd = checkpoint_mqd_sdma;
+		mqd->restore_mqd = restore_mqd_sdma;
+		mqd->mqd_size = sizeof(struct cik_sdma_rlc_registers);
+		mqd->mqd_stride = kfd_mqd_stride;
+#if defined(CONFIG_DEBUG_FS)
+		mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
+#endif
+		break;
+	default:
+		kfree(mqd);
+		return NULL;
+	}
+
+	return mqd;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
new file mode 100644
index 000000000..22cbfa1bd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
@@ -0,0 +1,531 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2018-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include "kfd_priv.h"
+#include "kfd_mqd_manager.h"
+#include "v10_structs.h"
+#include "gc/gc_10_1_0_offset.h"
+#include "gc/gc_10_1_0_sh_mask.h"
+#include "amdgpu_amdkfd.h"
+
+static inline struct v10_compute_mqd *get_mqd(void *mqd)
+{
+	return (struct v10_compute_mqd *)mqd;
+}
+
+static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+	return (struct v10_sdma_mqd *)mqd;
+}
+
+static void update_cu_mask(struct mqd_manager *mm, void *mqd,
+			struct mqd_update_info *minfo)
+{
+	struct v10_compute_mqd *m;
+	uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */
+
+	if (!minfo || !minfo->cu_mask.ptr)
+		return;
+
+	mqd_symmetrically_map_cu_mask(mm,
+		minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, 0);
+
+	m = get_mqd(mqd);
+	m->compute_static_thread_mgmt_se0 = se_mask[0];
+	m->compute_static_thread_mgmt_se1 = se_mask[1];
+	m->compute_static_thread_mgmt_se2 = se_mask[2];
+	m->compute_static_thread_mgmt_se3 = se_mask[3];
+
+	pr_debug("update cu mask to %#x %#x %#x %#x\n",
+		m->compute_static_thread_mgmt_se0,
+		m->compute_static_thread_mgmt_se1,
+		m->compute_static_thread_mgmt_se2,
+		m->compute_static_thread_mgmt_se3);
+}
+
+static void set_priority(struct v10_compute_mqd *m, struct queue_properties *q)
+{
+	m->cp_hqd_pipe_priority = pipe_priority_map[q->priority];
+	m->cp_hqd_queue_priority = q->priority;
+}
+
+static struct kfd_mem_obj *allocate_mqd(struct kfd_node *kfd,
+		struct queue_properties *q)
+{
+	struct kfd_mem_obj *mqd_mem_obj;
+
+	if (kfd_gtt_sa_allocate(kfd, sizeof(struct v10_compute_mqd),
+			&mqd_mem_obj))
+		return NULL;
+
+	return mqd_mem_obj;
+}
+
+static void init_mqd(struct mqd_manager *mm, void **mqd,
+			struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+			struct queue_properties *q)
+{
+	uint64_t addr;
+	struct v10_compute_mqd *m;
+
+	m = (struct v10_compute_mqd *) mqd_mem_obj->cpu_ptr;
+	addr = mqd_mem_obj->gpu_addr;
+
+	memset(m, 0, sizeof(struct v10_compute_mqd));
+
+	m->header = 0xC0310800;
+	m->compute_pipelinestat_enable = 1;
+	m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
+	m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
+	m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
+	m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
+
+	m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
+			0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
+
+	m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT;
+
+	m->cp_mqd_base_addr_lo        = lower_32_bits(addr);
+	m->cp_mqd_base_addr_hi        = upper_32_bits(addr);
+
+	m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT |
+			1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
+			1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
+
+	/* Set cp_hqd_hq_scheduler0 bit 14 to 1 to have the CP set up the
+	 * DISPATCH_PTR.  This is required for the kfd debugger
+	 */
+	m->cp_hqd_hq_scheduler0 = 1 << 14;
+
+	if (q->format == KFD_QUEUE_FORMAT_AQL) {
+		m->cp_hqd_aql_control =
+			1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT;
+	}
+
+	if (mm->dev->kfd->cwsr_enabled) {
+		m->cp_hqd_persistent_state |=
+			(1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT);
+		m->cp_hqd_ctx_save_base_addr_lo =
+			lower_32_bits(q->ctx_save_restore_area_address);
+		m->cp_hqd_ctx_save_base_addr_hi =
+			upper_32_bits(q->ctx_save_restore_area_address);
+		m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size;
+		m->cp_hqd_cntl_stack_size = q->ctl_stack_size;
+		m->cp_hqd_cntl_stack_offset = q->ctl_stack_size;
+		m->cp_hqd_wg_state_offset = q->ctl_stack_size;
+	}
+
+	*mqd = m;
+	if (gart_addr)
+		*gart_addr = addr;
+	mm->update_mqd(mm, m, q, NULL);
+}
+
+static int load_mqd(struct mqd_manager *mm, void *mqd,
+			uint32_t pipe_id, uint32_t queue_id,
+			struct queue_properties *p, struct mm_struct *mms)
+{
+	int r = 0;
+	/* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
+	uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
+
+	r = mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id,
+					  (uint32_t __user *)p->write_ptr,
+					  wptr_shift, 0, mms, 0);
+	return r;
+}
+
+static void update_mqd(struct mqd_manager *mm, void *mqd,
+			struct queue_properties *q,
+			struct mqd_update_info *minfo)
+{
+	struct v10_compute_mqd *m;
+
+	m = get_mqd(mqd);
+
+	m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
+	m->cp_hqd_pq_control |=
+			ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1;
+	m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK;
+	pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
+
+	m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
+	m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
+
+	m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+	m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+	m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
+	m->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr);
+
+	m->cp_hqd_pq_doorbell_control =
+		q->doorbell_off <<
+			CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+	pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
+			m->cp_hqd_pq_doorbell_control);
+
+	m->cp_hqd_ib_control = 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT;
+
+	/*
+	 * HW does not clamp this field correctly. Maximum EOP queue size
+	 * is constrained by per-SE EOP done signal count, which is 8-bit.
+	 * Limit is 0xFF EOP entries (= 0x7F8 dwords). CP will not submit
+	 * more than (EOP entry count - 1) so a queue size of 0x800 dwords
+	 * is safe, giving a maximum field value of 0xA.
+	 */
+	m->cp_hqd_eop_control = min(0xA,
+		ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1);
+	m->cp_hqd_eop_base_addr_lo =
+			lower_32_bits(q->eop_ring_buffer_address >> 8);
+	m->cp_hqd_eop_base_addr_hi =
+			upper_32_bits(q->eop_ring_buffer_address >> 8);
+
+	m->cp_hqd_iq_timer = 0;
+
+	m->cp_hqd_vmid = q->vmid;
+
+	if (q->format == KFD_QUEUE_FORMAT_AQL) {
+		/* GC 10 removed WPP_CLAMP from PQ Control */
+		m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
+				2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT |
+				1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT;
+		m->cp_hqd_pq_doorbell_control |=
+			1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT;
+	}
+	if (mm->dev->kfd->cwsr_enabled)
+		m->cp_hqd_ctx_save_control = 0;
+
+	update_cu_mask(mm, mqd, minfo);
+	set_priority(m, q);
+
+	q->is_active = QUEUE_IS_ACTIVE(*q);
+}
+
+static uint32_t read_doorbell_id(void *mqd)
+{
+	struct v10_compute_mqd *m = (struct v10_compute_mqd *)mqd;
+
+	return m->queue_doorbell_id0;
+}
+
+static int get_wave_state(struct mqd_manager *mm, void *mqd,
+			  struct queue_properties *q,
+			  void __user *ctl_stack,
+			  u32 *ctl_stack_used_size,
+			  u32 *save_area_used_size)
+{
+	struct v10_compute_mqd *m;
+	struct kfd_context_save_area_header header;
+
+	m = get_mqd(mqd);
+
+	/* Control stack is written backwards, while workgroup context data
+	 * is written forwards. Both starts from m->cp_hqd_cntl_stack_size.
+	 * Current position is at m->cp_hqd_cntl_stack_offset and
+	 * m->cp_hqd_wg_state_offset, respectively.
+	 */
+	*ctl_stack_used_size = m->cp_hqd_cntl_stack_size -
+		m->cp_hqd_cntl_stack_offset;
+	*save_area_used_size = m->cp_hqd_wg_state_offset -
+		m->cp_hqd_cntl_stack_size;
+
+	/* Control stack is not copied to user mode for GFXv10 because
+	 * it's part of the context save area that is already
+	 * accessible to user mode
+	 */
+
+	header.wave_state.control_stack_size = *ctl_stack_used_size;
+	header.wave_state.wave_state_size = *save_area_used_size;
+
+	header.wave_state.wave_state_offset = m->cp_hqd_wg_state_offset;
+	header.wave_state.control_stack_offset = m->cp_hqd_cntl_stack_offset;
+
+	if (copy_to_user(ctl_stack, &header, sizeof(header.wave_state)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst)
+{
+	struct v10_compute_mqd *m;
+
+	m = get_mqd(mqd);
+
+	memcpy(mqd_dst, m, sizeof(struct v10_compute_mqd));
+}
+
+static void restore_mqd(struct mqd_manager *mm, void **mqd,
+			struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+			struct queue_properties *qp,
+			const void *mqd_src,
+			const void *ctl_stack_src, const u32 ctl_stack_size)
+{
+	uint64_t addr;
+	struct v10_compute_mqd *m;
+
+	m = (struct v10_compute_mqd *) mqd_mem_obj->cpu_ptr;
+	addr = mqd_mem_obj->gpu_addr;
+
+	memcpy(m, mqd_src, sizeof(*m));
+
+	*mqd = m;
+	if (gart_addr)
+		*gart_addr = addr;
+
+	m->cp_hqd_pq_doorbell_control =
+		qp->doorbell_off <<
+			CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+	pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
+			m->cp_hqd_pq_doorbell_control);
+
+	qp->is_active = 0;
+}
+
+static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
+			struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+			struct queue_properties *q)
+{
+	struct v10_compute_mqd *m;
+
+	init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q);
+
+	m = get_mqd(*mqd);
+
+	m->cp_hqd_pq_control |= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
+			1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
+}
+
+static int destroy_hiq_mqd(struct mqd_manager *mm, void *mqd,
+			enum kfd_preempt_type type, unsigned int timeout,
+			uint32_t pipe_id, uint32_t queue_id)
+{
+	int err;
+	struct v10_compute_mqd *m;
+	u32 doorbell_off;
+
+	m = get_mqd(mqd);
+
+	doorbell_off = m->cp_hqd_pq_doorbell_control >>
+			CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+
+	err = amdgpu_amdkfd_unmap_hiq(mm->dev->adev, doorbell_off, 0);
+	if (err)
+		pr_debug("Destroy HIQ MQD failed: %d\n", err);
+
+	return err;
+}
+
+static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
+		struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+		struct queue_properties *q)
+{
+	struct v10_sdma_mqd *m;
+
+	m = (struct v10_sdma_mqd *) mqd_mem_obj->cpu_ptr;
+
+	memset(m, 0, sizeof(struct v10_sdma_mqd));
+
+	*mqd = m;
+	if (gart_addr)
+		*gart_addr = mqd_mem_obj->gpu_addr;
+
+	mm->update_mqd(mm, m, q, NULL);
+}
+
+#define SDMA_RLC_DUMMY_DEFAULT 0xf
+
+static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
+			struct queue_properties *q,
+			struct mqd_update_info *minfo)
+{
+	struct v10_sdma_mqd *m;
+
+	m = get_sdma_mqd(mqd);
+	m->sdmax_rlcx_rb_cntl = (ffs(q->queue_size / sizeof(unsigned int)) - 1)
+		<< SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
+		q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT |
+		1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
+		6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT;
+
+	m->sdmax_rlcx_rb_base = lower_32_bits(q->queue_address >> 8);
+	m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8);
+	m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+	m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+	m->sdmax_rlcx_doorbell_offset =
+		q->doorbell_off << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+	m->sdma_engine_id = q->sdma_engine_id;
+	m->sdma_queue_id = q->sdma_queue_id;
+	m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT;
+
+	q->is_active = QUEUE_IS_ACTIVE(*q);
+}
+
+static void checkpoint_mqd_sdma(struct mqd_manager *mm,
+				void *mqd,
+				void *mqd_dst,
+				void *ctl_stack_dst)
+{
+	struct v10_sdma_mqd *m;
+
+	m = get_sdma_mqd(mqd);
+
+	memcpy(mqd_dst, m, sizeof(struct v10_sdma_mqd));
+}
+
+static void restore_mqd_sdma(struct mqd_manager *mm, void **mqd,
+			     struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+			     struct queue_properties *qp,
+			     const void *mqd_src,
+			     const void *ctl_stack_src,
+			     const u32 ctl_stack_size)
+{
+	uint64_t addr;
+	struct v10_sdma_mqd *m;
+
+	m = (struct v10_sdma_mqd *) mqd_mem_obj->cpu_ptr;
+	addr = mqd_mem_obj->gpu_addr;
+
+	memcpy(m, mqd_src, sizeof(*m));
+
+	m->sdmax_rlcx_doorbell_offset =
+		qp->doorbell_off << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+	*mqd = m;
+	if (gart_addr)
+		*gart_addr = addr;
+
+	qp->is_active = 0;
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+static int debugfs_show_mqd(struct seq_file *m, void *data)
+{
+	seq_hex_dump(m, "    ", DUMP_PREFIX_OFFSET, 32, 4,
+		     data, sizeof(struct v10_compute_mqd), false);
+	return 0;
+}
+
+static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
+{
+	seq_hex_dump(m, "    ", DUMP_PREFIX_OFFSET, 32, 4,
+		     data, sizeof(struct v10_sdma_mqd), false);
+	return 0;
+}
+
+#endif
+
+struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
+		struct kfd_node *dev)
+{
+	struct mqd_manager *mqd;
+
+	if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
+		return NULL;
+
+	mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
+	if (!mqd)
+		return NULL;
+
+	mqd->dev = dev;
+
+	switch (type) {
+	case KFD_MQD_TYPE_CP:
+		pr_debug("%s@%i\n", __func__, __LINE__);
+		mqd->allocate_mqd = allocate_mqd;
+		mqd->init_mqd = init_mqd;
+		mqd->free_mqd = kfd_free_mqd_cp;
+		mqd->load_mqd = load_mqd;
+		mqd->update_mqd = update_mqd;
+		mqd->destroy_mqd = kfd_destroy_mqd_cp;
+		mqd->is_occupied = kfd_is_occupied_cp;
+		mqd->mqd_size = sizeof(struct v10_compute_mqd);
+		mqd->get_wave_state = get_wave_state;
+		mqd->checkpoint_mqd = checkpoint_mqd;
+		mqd->restore_mqd = restore_mqd;
+		mqd->mqd_stride = kfd_mqd_stride;
+#if defined(CONFIG_DEBUG_FS)
+		mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+		pr_debug("%s@%i\n", __func__, __LINE__);
+		break;
+	case KFD_MQD_TYPE_HIQ:
+		pr_debug("%s@%i\n", __func__, __LINE__);
+		mqd->allocate_mqd = allocate_hiq_mqd;
+		mqd->init_mqd = init_mqd_hiq;
+		mqd->free_mqd = free_mqd_hiq_sdma;
+		mqd->load_mqd = kfd_hiq_load_mqd_kiq;
+		mqd->update_mqd = update_mqd;
+		mqd->destroy_mqd = destroy_hiq_mqd;
+		mqd->is_occupied = kfd_is_occupied_cp;
+		mqd->mqd_size = sizeof(struct v10_compute_mqd);
+		mqd->mqd_stride = kfd_mqd_stride;
+#if defined(CONFIG_DEBUG_FS)
+		mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+		mqd->read_doorbell_id = read_doorbell_id;
+		pr_debug("%s@%i\n", __func__, __LINE__);
+		break;
+	case KFD_MQD_TYPE_DIQ:
+		mqd->allocate_mqd = allocate_mqd;
+		mqd->init_mqd = init_mqd_hiq;
+		mqd->free_mqd = kfd_free_mqd_cp;
+		mqd->load_mqd = load_mqd;
+		mqd->update_mqd = update_mqd;
+		mqd->destroy_mqd = kfd_destroy_mqd_cp;
+		mqd->is_occupied = kfd_is_occupied_cp;
+		mqd->mqd_size = sizeof(struct v10_compute_mqd);
+#if defined(CONFIG_DEBUG_FS)
+		mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+		break;
+	case KFD_MQD_TYPE_SDMA:
+		pr_debug("%s@%i\n", __func__, __LINE__);
+		mqd->allocate_mqd = allocate_sdma_mqd;
+		mqd->init_mqd = init_mqd_sdma;
+		mqd->free_mqd = free_mqd_hiq_sdma;
+		mqd->load_mqd = kfd_load_mqd_sdma;
+		mqd->update_mqd = update_mqd_sdma;
+		mqd->destroy_mqd = kfd_destroy_mqd_sdma;
+		mqd->is_occupied = kfd_is_occupied_sdma;
+		mqd->checkpoint_mqd = checkpoint_mqd_sdma;
+		mqd->restore_mqd = restore_mqd_sdma;
+		mqd->mqd_size = sizeof(struct v10_sdma_mqd);
+		mqd->mqd_stride = kfd_mqd_stride;
+#if defined(CONFIG_DEBUG_FS)
+		mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
+#endif
+		pr_debug("%s@%i\n", __func__, __LINE__);
+		break;
+	default:
+		kfree(mqd);
+		return NULL;
+	}
+
+	return mqd;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
new file mode 100644
index 000000000..d722cbd31
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
@@ -0,0 +1,568 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include "kfd_priv.h"
+#include "kfd_mqd_manager.h"
+#include "v11_structs.h"
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
+#include "amdgpu_amdkfd.h"
+
+static inline struct v11_compute_mqd *get_mqd(void *mqd)
+{
+	return (struct v11_compute_mqd *)mqd;
+}
+
+static inline struct v11_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+	return (struct v11_sdma_mqd *)mqd;
+}
+
+static void update_cu_mask(struct mqd_manager *mm, void *mqd,
+			   struct mqd_update_info *minfo)
+{
+	struct v11_compute_mqd *m;
+	uint32_t se_mask[KFD_MAX_NUM_SE] = {0};
+	bool has_wa_flag = minfo && (minfo->update_flag & (UPDATE_FLAG_DBG_WA_ENABLE |
+			UPDATE_FLAG_DBG_WA_DISABLE));
+
+	if (!minfo || !(has_wa_flag || minfo->cu_mask.ptr))
+		return;
+
+	m = get_mqd(mqd);
+
+	if (has_wa_flag) {
+		uint32_t wa_mask = minfo->update_flag == UPDATE_FLAG_DBG_WA_ENABLE ?
+						0xffff : 0xffffffff;
+
+		m->compute_static_thread_mgmt_se0 = wa_mask;
+		m->compute_static_thread_mgmt_se1 = wa_mask;
+		m->compute_static_thread_mgmt_se2 = wa_mask;
+		m->compute_static_thread_mgmt_se3 = wa_mask;
+		m->compute_static_thread_mgmt_se4 = wa_mask;
+		m->compute_static_thread_mgmt_se5 = wa_mask;
+		m->compute_static_thread_mgmt_se6 = wa_mask;
+		m->compute_static_thread_mgmt_se7 = wa_mask;
+
+		return;
+	}
+
+	mqd_symmetrically_map_cu_mask(mm,
+		minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, 0);
+
+	m->compute_static_thread_mgmt_se0 = se_mask[0];
+	m->compute_static_thread_mgmt_se1 = se_mask[1];
+	m->compute_static_thread_mgmt_se2 = se_mask[2];
+	m->compute_static_thread_mgmt_se3 = se_mask[3];
+	m->compute_static_thread_mgmt_se4 = se_mask[4];
+	m->compute_static_thread_mgmt_se5 = se_mask[5];
+	m->compute_static_thread_mgmt_se6 = se_mask[6];
+	m->compute_static_thread_mgmt_se7 = se_mask[7];
+
+	pr_debug("update cu mask to %#x %#x %#x %#x %#x %#x %#x %#x\n",
+		m->compute_static_thread_mgmt_se0,
+		m->compute_static_thread_mgmt_se1,
+		m->compute_static_thread_mgmt_se2,
+		m->compute_static_thread_mgmt_se3,
+		m->compute_static_thread_mgmt_se4,
+		m->compute_static_thread_mgmt_se5,
+		m->compute_static_thread_mgmt_se6,
+		m->compute_static_thread_mgmt_se7);
+}
+
+static void set_priority(struct v11_compute_mqd *m, struct queue_properties *q)
+{
+	m->cp_hqd_pipe_priority = pipe_priority_map[q->priority];
+	m->cp_hqd_queue_priority = q->priority;
+}
+
+static struct kfd_mem_obj *allocate_mqd(struct kfd_node *node,
+		struct queue_properties *q)
+{
+	struct kfd_mem_obj *mqd_mem_obj;
+	int size;
+
+	/*
+	 * MES write to areas beyond MQD size. So allocate
+	 * 1 PAGE_SIZE memory for MQD is MES is enabled.
+	 */
+	if (node->kfd->shared_resources.enable_mes)
+		size = PAGE_SIZE;
+	else
+		size = sizeof(struct v11_compute_mqd);
+
+	if (kfd_gtt_sa_allocate(node, size, &mqd_mem_obj))
+		return NULL;
+
+	return mqd_mem_obj;
+}
+
+static void init_mqd(struct mqd_manager *mm, void **mqd,
+			struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+			struct queue_properties *q)
+{
+	uint64_t addr;
+	struct v11_compute_mqd *m;
+	int size;
+	uint32_t wa_mask = q->is_dbg_wa ? 0xffff : 0xffffffff;
+
+	m = (struct v11_compute_mqd *) mqd_mem_obj->cpu_ptr;
+	addr = mqd_mem_obj->gpu_addr;
+
+	if (mm->dev->kfd->shared_resources.enable_mes)
+		size = PAGE_SIZE;
+	else
+		size = sizeof(struct v11_compute_mqd);
+
+	memset(m, 0, size);
+
+	m->header = 0xC0310800;
+	m->compute_pipelinestat_enable = 1;
+
+	m->compute_static_thread_mgmt_se0 = wa_mask;
+	m->compute_static_thread_mgmt_se1 = wa_mask;
+	m->compute_static_thread_mgmt_se2 = wa_mask;
+	m->compute_static_thread_mgmt_se3 = wa_mask;
+	m->compute_static_thread_mgmt_se4 = wa_mask;
+	m->compute_static_thread_mgmt_se5 = wa_mask;
+	m->compute_static_thread_mgmt_se6 = wa_mask;
+	m->compute_static_thread_mgmt_se7 = wa_mask;
+
+	m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
+			0x55 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
+
+	m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT;
+
+	m->cp_mqd_base_addr_lo        = lower_32_bits(addr);
+	m->cp_mqd_base_addr_hi        = upper_32_bits(addr);
+
+	m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT |
+			1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
+			1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
+
+	/* Set cp_hqd_hq_scheduler0 bit 14 to 1 to have the CP set up the
+	 * DISPATCH_PTR.  This is required for the kfd debugger
+	 */
+	m->cp_hqd_hq_status0 = 1 << 14;
+
+	/*
+	 * GFX11 RS64 CPFW version >= 509 supports PCIe atomics support
+	 * acknowledgment.
+	 */
+	if (amdgpu_amdkfd_have_atomics_support(mm->dev->adev))
+		m->cp_hqd_hq_status0 |= 1 << 29;
+
+	if (q->format == KFD_QUEUE_FORMAT_AQL) {
+		m->cp_hqd_aql_control =
+			1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT;
+	}
+
+	if (mm->dev->kfd->cwsr_enabled) {
+		m->cp_hqd_persistent_state |=
+			(1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT);
+		m->cp_hqd_ctx_save_base_addr_lo =
+			lower_32_bits(q->ctx_save_restore_area_address);
+		m->cp_hqd_ctx_save_base_addr_hi =
+			upper_32_bits(q->ctx_save_restore_area_address);
+		m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size;
+		m->cp_hqd_cntl_stack_size = q->ctl_stack_size;
+		m->cp_hqd_cntl_stack_offset = q->ctl_stack_size;
+		m->cp_hqd_wg_state_offset = q->ctl_stack_size;
+	}
+
+	*mqd = m;
+	if (gart_addr)
+		*gart_addr = addr;
+	mm->update_mqd(mm, m, q, NULL);
+}
+
+static int load_mqd(struct mqd_manager *mm, void *mqd,
+			uint32_t pipe_id, uint32_t queue_id,
+			struct queue_properties *p, struct mm_struct *mms)
+{
+	int r = 0;
+	/* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
+	uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
+
+	r = mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id,
+					  (uint32_t __user *)p->write_ptr,
+					  wptr_shift, 0, mms, 0);
+	return r;
+}
+
+static void update_mqd(struct mqd_manager *mm, void *mqd,
+		       struct queue_properties *q,
+		       struct mqd_update_info *minfo)
+{
+	struct v11_compute_mqd *m;
+
+	m = get_mqd(mqd);
+
+	m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
+	m->cp_hqd_pq_control |=
+			ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1;
+	m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK;
+	pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
+
+	m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
+	m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
+
+	m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+	m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+	m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
+	m->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr);
+
+	m->cp_hqd_pq_doorbell_control =
+		q->doorbell_off <<
+			CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+	pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
+			m->cp_hqd_pq_doorbell_control);
+
+	m->cp_hqd_ib_control = 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT;
+
+	/*
+	 * HW does not clamp this field correctly. Maximum EOP queue size
+	 * is constrained by per-SE EOP done signal count, which is 8-bit.
+	 * Limit is 0xFF EOP entries (= 0x7F8 dwords). CP will not submit
+	 * more than (EOP entry count - 1) so a queue size of 0x800 dwords
+	 * is safe, giving a maximum field value of 0xA.
+	 */
+	m->cp_hqd_eop_control = min(0xA,
+		ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1);
+	m->cp_hqd_eop_base_addr_lo =
+			lower_32_bits(q->eop_ring_buffer_address >> 8);
+	m->cp_hqd_eop_base_addr_hi =
+			upper_32_bits(q->eop_ring_buffer_address >> 8);
+
+	m->cp_hqd_iq_timer = 0;
+
+	m->cp_hqd_vmid = q->vmid;
+
+	if (q->format == KFD_QUEUE_FORMAT_AQL) {
+		/* GC 10 removed WPP_CLAMP from PQ Control */
+		m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
+				2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT |
+				1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT ;
+		m->cp_hqd_pq_doorbell_control |=
+			1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT;
+	}
+	if (mm->dev->kfd->cwsr_enabled)
+		m->cp_hqd_ctx_save_control = 0;
+
+	update_cu_mask(mm, mqd, minfo);
+	set_priority(m, q);
+
+	q->is_active = QUEUE_IS_ACTIVE(*q);
+}
+
+static uint32_t read_doorbell_id(void *mqd)
+{
+	struct v11_compute_mqd *m = (struct v11_compute_mqd *)mqd;
+
+	return m->queue_doorbell_id0;
+}
+
+static int get_wave_state(struct mqd_manager *mm, void *mqd,
+			  struct queue_properties *q,
+			  void __user *ctl_stack,
+			  u32 *ctl_stack_used_size,
+			  u32 *save_area_used_size)
+{
+	struct v11_compute_mqd *m;
+	struct kfd_context_save_area_header header;
+
+	m = get_mqd(mqd);
+
+	/* Control stack is written backwards, while workgroup context data
+	 * is written forwards. Both starts from m->cp_hqd_cntl_stack_size.
+	 * Current position is at m->cp_hqd_cntl_stack_offset and
+	 * m->cp_hqd_wg_state_offset, respectively.
+	 */
+	*ctl_stack_used_size = m->cp_hqd_cntl_stack_size -
+		m->cp_hqd_cntl_stack_offset;
+	*save_area_used_size = m->cp_hqd_wg_state_offset -
+		m->cp_hqd_cntl_stack_size;
+
+	/* Control stack is not copied to user mode for GFXv11 because
+	 * it's part of the context save area that is already
+	 * accessible to user mode
+	 */
+	header.wave_state.control_stack_size = *ctl_stack_used_size;
+	header.wave_state.wave_state_size = *save_area_used_size;
+
+	header.wave_state.wave_state_offset = m->cp_hqd_wg_state_offset;
+	header.wave_state.control_stack_offset = m->cp_hqd_cntl_stack_offset;
+
+	if (copy_to_user(ctl_stack, &header, sizeof(header.wave_state)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst)
+{
+	struct v11_compute_mqd *m;
+
+	m = get_mqd(mqd);
+
+	memcpy(mqd_dst, m, sizeof(struct v11_compute_mqd));
+}
+
+static void restore_mqd(struct mqd_manager *mm, void **mqd,
+			struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+			struct queue_properties *qp,
+			const void *mqd_src,
+			const void *ctl_stack_src, const u32 ctl_stack_size)
+{
+	uint64_t addr;
+	struct v11_compute_mqd *m;
+
+	m = (struct v11_compute_mqd *) mqd_mem_obj->cpu_ptr;
+	addr = mqd_mem_obj->gpu_addr;
+
+	memcpy(m, mqd_src, sizeof(*m));
+
+	*mqd = m;
+	if (gart_addr)
+		*gart_addr = addr;
+
+	m->cp_hqd_pq_doorbell_control =
+		qp->doorbell_off <<
+			CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+	pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
+			m->cp_hqd_pq_doorbell_control);
+
+	qp->is_active = 0;
+}
+
+
+static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
+			struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+			struct queue_properties *q)
+{
+	struct v11_compute_mqd *m;
+
+	init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q);
+
+	m = get_mqd(*mqd);
+
+	m->cp_hqd_pq_control |= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
+			1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
+}
+
+static int destroy_hiq_mqd(struct mqd_manager *mm, void *mqd,
+			enum kfd_preempt_type type, unsigned int timeout,
+			uint32_t pipe_id, uint32_t queue_id)
+{
+	int err;
+	struct v11_compute_mqd *m;
+	u32 doorbell_off;
+
+	m = get_mqd(mqd);
+
+	doorbell_off = m->cp_hqd_pq_doorbell_control >>
+			CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+
+	err = amdgpu_amdkfd_unmap_hiq(mm->dev->adev, doorbell_off, 0);
+	if (err)
+		pr_debug("Destroy HIQ MQD failed: %d\n", err);
+
+	return err;
+}
+
+static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
+		struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+		struct queue_properties *q)
+{
+	struct v11_sdma_mqd *m;
+	int size;
+
+	m = (struct v11_sdma_mqd *) mqd_mem_obj->cpu_ptr;
+
+	if (mm->dev->kfd->shared_resources.enable_mes)
+		size = PAGE_SIZE;
+	else
+		size = sizeof(struct v11_sdma_mqd);
+
+	memset(m, 0, size);
+	*mqd = m;
+	if (gart_addr)
+		*gart_addr = mqd_mem_obj->gpu_addr;
+
+	mm->update_mqd(mm, m, q, NULL);
+}
+
+#define SDMA_RLC_DUMMY_DEFAULT 0xf
+
+static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
+		struct queue_properties *q,
+		struct mqd_update_info *minfo)
+{
+	struct v11_sdma_mqd *m;
+
+	m = get_sdma_mqd(mqd);
+	m->sdmax_rlcx_rb_cntl = (ffs(q->queue_size / sizeof(unsigned int)) - 1)
+		<< SDMA0_QUEUE0_RB_CNTL__RB_SIZE__SHIFT |
+		q->vmid << SDMA0_QUEUE0_RB_CNTL__RB_VMID__SHIFT |
+		1 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
+		6 << SDMA0_QUEUE0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT |
+		1 << SDMA0_QUEUE0_RB_CNTL__F32_WPTR_POLL_ENABLE__SHIFT;
+
+	m->sdmax_rlcx_rb_base = lower_32_bits(q->queue_address >> 8);
+	m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8);
+	m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+	m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+	m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
+	m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr);
+	m->sdmax_rlcx_doorbell_offset =
+		q->doorbell_off << SDMA0_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+	m->sdmax_rlcx_sched_cntl = (amdgpu_sdma_phase_quantum
+		<< SDMA0_QUEUE0_SCHEDULE_CNTL__CONTEXT_QUANTUM__SHIFT)
+		 & SDMA0_QUEUE0_SCHEDULE_CNTL__CONTEXT_QUANTUM_MASK;
+
+	m->sdma_engine_id = q->sdma_engine_id;
+	m->sdma_queue_id = q->sdma_queue_id;
+	m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT;
+
+	q->is_active = QUEUE_IS_ACTIVE(*q);
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+static int debugfs_show_mqd(struct seq_file *m, void *data)
+{
+	seq_hex_dump(m, "    ", DUMP_PREFIX_OFFSET, 32, 4,
+		     data, sizeof(struct v11_compute_mqd), false);
+	return 0;
+}
+
+static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
+{
+	seq_hex_dump(m, "    ", DUMP_PREFIX_OFFSET, 32, 4,
+		     data, sizeof(struct v11_sdma_mqd), false);
+	return 0;
+}
+
+#endif
+
+struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type,
+		struct kfd_node *dev)
+{
+	struct mqd_manager *mqd;
+
+	if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
+		return NULL;
+
+	mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
+	if (!mqd)
+		return NULL;
+
+	mqd->dev = dev;
+
+	switch (type) {
+	case KFD_MQD_TYPE_CP:
+		pr_debug("%s@%i\n", __func__, __LINE__);
+		mqd->allocate_mqd = allocate_mqd;
+		mqd->init_mqd = init_mqd;
+		mqd->free_mqd = kfd_free_mqd_cp;
+		mqd->load_mqd = load_mqd;
+		mqd->update_mqd = update_mqd;
+		mqd->destroy_mqd = kfd_destroy_mqd_cp;
+		mqd->is_occupied = kfd_is_occupied_cp;
+		mqd->mqd_size = sizeof(struct v11_compute_mqd);
+		mqd->get_wave_state = get_wave_state;
+		mqd->mqd_stride = kfd_mqd_stride;
+		mqd->checkpoint_mqd = checkpoint_mqd;
+		mqd->restore_mqd = restore_mqd;
+#if defined(CONFIG_DEBUG_FS)
+		mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+		pr_debug("%s@%i\n", __func__, __LINE__);
+		break;
+	case KFD_MQD_TYPE_HIQ:
+		pr_debug("%s@%i\n", __func__, __LINE__);
+		mqd->allocate_mqd = allocate_hiq_mqd;
+		mqd->init_mqd = init_mqd_hiq;
+		mqd->free_mqd = free_mqd_hiq_sdma;
+		mqd->load_mqd = kfd_hiq_load_mqd_kiq;
+		mqd->update_mqd = update_mqd;
+		mqd->destroy_mqd = destroy_hiq_mqd;
+		mqd->is_occupied = kfd_is_occupied_cp;
+		mqd->mqd_size = sizeof(struct v11_compute_mqd);
+		mqd->mqd_stride = kfd_mqd_stride;
+#if defined(CONFIG_DEBUG_FS)
+		mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+		mqd->read_doorbell_id = read_doorbell_id;
+		pr_debug("%s@%i\n", __func__, __LINE__);
+		break;
+	case KFD_MQD_TYPE_DIQ:
+		mqd->allocate_mqd = allocate_mqd;
+		mqd->init_mqd = init_mqd_hiq;
+		mqd->free_mqd = kfd_free_mqd_cp;
+		mqd->load_mqd = load_mqd;
+		mqd->update_mqd = update_mqd;
+		mqd->destroy_mqd = kfd_destroy_mqd_cp;
+		mqd->is_occupied = kfd_is_occupied_cp;
+		mqd->mqd_size = sizeof(struct v11_compute_mqd);
+#if defined(CONFIG_DEBUG_FS)
+		mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+		break;
+	case KFD_MQD_TYPE_SDMA:
+		pr_debug("%s@%i\n", __func__, __LINE__);
+		mqd->allocate_mqd = allocate_sdma_mqd;
+		mqd->init_mqd = init_mqd_sdma;
+		mqd->free_mqd = free_mqd_hiq_sdma;
+		mqd->load_mqd = kfd_load_mqd_sdma;
+		mqd->update_mqd = update_mqd_sdma;
+		mqd->destroy_mqd = kfd_destroy_mqd_sdma;
+		mqd->is_occupied = kfd_is_occupied_sdma;
+		mqd->checkpoint_mqd = checkpoint_mqd;
+		mqd->restore_mqd = restore_mqd;
+		mqd->mqd_size = sizeof(struct v11_sdma_mqd);
+		mqd->mqd_stride = kfd_mqd_stride;
+#if defined(CONFIG_DEBUG_FS)
+		mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
+#endif
+		/*
+		 * To allocate SDMA MQDs by generic functions
+		 * when MES is enabled.
+		 */
+		if (dev->kfd->shared_resources.enable_mes) {
+			mqd->allocate_mqd = allocate_mqd;
+			mqd->free_mqd = kfd_free_mqd_cp;
+		}
+		pr_debug("%s@%i\n", __func__, __LINE__);
+		break;
+	default:
+		kfree(mqd);
+		return NULL;
+	}
+
+	return mqd;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
new file mode 100644
index 000000000..42d881809
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -0,0 +1,921 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2016-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include "kfd_priv.h"
+#include "kfd_mqd_manager.h"
+#include "v9_structs.h"
+#include "gc/gc_9_0_offset.h"
+#include "gc/gc_9_0_sh_mask.h"
+#include "sdma0/sdma0_4_0_sh_mask.h"
+#include "amdgpu_amdkfd.h"
+#include "kfd_device_queue_manager.h"
+
+static void update_mqd(struct mqd_manager *mm, void *mqd,
+		       struct queue_properties *q,
+		       struct mqd_update_info *minfo);
+
+static uint64_t mqd_stride_v9(struct mqd_manager *mm,
+				struct queue_properties *q)
+{
+	if (mm->dev->kfd->cwsr_enabled &&
+	    q->type == KFD_QUEUE_TYPE_COMPUTE)
+		return ALIGN(q->ctl_stack_size, PAGE_SIZE) +
+			ALIGN(sizeof(struct v9_mqd), PAGE_SIZE);
+
+	return mm->mqd_size;
+}
+
+static inline struct v9_mqd *get_mqd(void *mqd)
+{
+	return (struct v9_mqd *)mqd;
+}
+
+static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+	return (struct v9_sdma_mqd *)mqd;
+}
+
+static void update_cu_mask(struct mqd_manager *mm, void *mqd,
+			struct mqd_update_info *minfo, uint32_t inst)
+{
+	struct v9_mqd *m;
+	uint32_t se_mask[KFD_MAX_NUM_SE] = {0};
+
+	if (!minfo || !minfo->cu_mask.ptr)
+		return;
+
+	mqd_symmetrically_map_cu_mask(mm,
+		minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, inst);
+
+	m = get_mqd(mqd);
+
+	m->compute_static_thread_mgmt_se0 = se_mask[0];
+	m->compute_static_thread_mgmt_se1 = se_mask[1];
+	m->compute_static_thread_mgmt_se2 = se_mask[2];
+	m->compute_static_thread_mgmt_se3 = se_mask[3];
+	if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3)) {
+		m->compute_static_thread_mgmt_se4 = se_mask[4];
+		m->compute_static_thread_mgmt_se5 = se_mask[5];
+		m->compute_static_thread_mgmt_se6 = se_mask[6];
+		m->compute_static_thread_mgmt_se7 = se_mask[7];
+
+		pr_debug("update cu mask to %#x %#x %#x %#x %#x %#x %#x %#x\n",
+			m->compute_static_thread_mgmt_se0,
+			m->compute_static_thread_mgmt_se1,
+			m->compute_static_thread_mgmt_se2,
+			m->compute_static_thread_mgmt_se3,
+			m->compute_static_thread_mgmt_se4,
+			m->compute_static_thread_mgmt_se5,
+			m->compute_static_thread_mgmt_se6,
+			m->compute_static_thread_mgmt_se7);
+	} else {
+		pr_debug("inst: %u, update cu mask to %#x %#x %#x %#x\n",
+			inst, m->compute_static_thread_mgmt_se0,
+			m->compute_static_thread_mgmt_se1,
+			m->compute_static_thread_mgmt_se2,
+			m->compute_static_thread_mgmt_se3);
+	}
+}
+
+static void set_priority(struct v9_mqd *m, struct queue_properties *q)
+{
+	m->cp_hqd_pipe_priority = pipe_priority_map[q->priority];
+	m->cp_hqd_queue_priority = q->priority;
+}
+
+static struct kfd_mem_obj *allocate_mqd(struct kfd_node *node,
+		struct queue_properties *q)
+{
+	int retval;
+	struct kfd_mem_obj *mqd_mem_obj = NULL;
+
+	/* For V9 only, due to a HW bug, the control stack of a user mode
+	 * compute queue needs to be allocated just behind the page boundary
+	 * of its regular MQD buffer. So we allocate an enlarged MQD buffer:
+	 * the first page of the buffer serves as the regular MQD buffer
+	 * purpose and the remaining is for control stack. Although the two
+	 * parts are in the same buffer object, they need different memory
+	 * types: MQD part needs UC (uncached) as usual, while control stack
+	 * needs NC (non coherent), which is different from the UC type which
+	 * is used when control stack is allocated in user space.
+	 *
+	 * Because of all those, we use the gtt allocation function instead
+	 * of sub-allocation function for this enlarged MQD buffer. Moreover,
+	 * in order to achieve two memory types in a single buffer object, we
+	 * pass a special bo flag AMDGPU_GEM_CREATE_CP_MQD_GFX9 to instruct
+	 * amdgpu memory functions to do so.
+	 */
+	if (node->kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
+		mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
+		if (!mqd_mem_obj)
+			return NULL;
+		retval = amdgpu_amdkfd_alloc_gtt_mem(node->adev,
+			(ALIGN(q->ctl_stack_size, PAGE_SIZE) +
+			ALIGN(sizeof(struct v9_mqd), PAGE_SIZE)) *
+			NUM_XCC(node->xcc_mask),
+			&(mqd_mem_obj->gtt_mem),
+			&(mqd_mem_obj->gpu_addr),
+			(void *)&(mqd_mem_obj->cpu_ptr), true);
+
+		if (retval) {
+			kfree(mqd_mem_obj);
+			return NULL;
+		}
+	} else {
+		retval = kfd_gtt_sa_allocate(node, sizeof(struct v9_mqd),
+				&mqd_mem_obj);
+		if (retval)
+			return NULL;
+	}
+
+	return mqd_mem_obj;
+}
+
+static void init_mqd(struct mqd_manager *mm, void **mqd,
+			struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+			struct queue_properties *q)
+{
+	uint64_t addr;
+	struct v9_mqd *m;
+
+	m = (struct v9_mqd *) mqd_mem_obj->cpu_ptr;
+	addr = mqd_mem_obj->gpu_addr;
+
+	memset(m, 0, sizeof(struct v9_mqd));
+
+	m->header = 0xC0310800;
+	m->compute_pipelinestat_enable = 1;
+	m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
+	m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
+	m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
+	m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
+	m->compute_static_thread_mgmt_se4 = 0xFFFFFFFF;
+	m->compute_static_thread_mgmt_se5 = 0xFFFFFFFF;
+	m->compute_static_thread_mgmt_se6 = 0xFFFFFFFF;
+	m->compute_static_thread_mgmt_se7 = 0xFFFFFFFF;
+
+	m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
+			0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
+
+	m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT;
+
+	m->cp_mqd_base_addr_lo        = lower_32_bits(addr);
+	m->cp_mqd_base_addr_hi        = upper_32_bits(addr);
+
+	m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT |
+			1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
+			1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
+
+	/* Set cp_hqd_hq_scheduler0 bit 14 to 1 to have the CP set up the
+	 * DISPATCH_PTR.  This is required for the kfd debugger
+	 */
+	m->cp_hqd_hq_status0 = 1 << 14;
+
+	if (q->format == KFD_QUEUE_FORMAT_AQL)
+		m->cp_hqd_aql_control =
+			1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT;
+
+	if (q->tba_addr) {
+		m->compute_pgm_rsrc2 |=
+			(1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT);
+	}
+
+	if (mm->dev->kfd->cwsr_enabled && q->ctx_save_restore_area_address) {
+		m->cp_hqd_persistent_state |=
+			(1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT);
+		m->cp_hqd_ctx_save_base_addr_lo =
+			lower_32_bits(q->ctx_save_restore_area_address);
+		m->cp_hqd_ctx_save_base_addr_hi =
+			upper_32_bits(q->ctx_save_restore_area_address);
+		m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size;
+		m->cp_hqd_cntl_stack_size = q->ctl_stack_size;
+		m->cp_hqd_cntl_stack_offset = q->ctl_stack_size;
+		m->cp_hqd_wg_state_offset = q->ctl_stack_size;
+	}
+
+	*mqd = m;
+	if (gart_addr)
+		*gart_addr = addr;
+	update_mqd(mm, m, q, NULL);
+}
+
+static int load_mqd(struct mqd_manager *mm, void *mqd,
+			uint32_t pipe_id, uint32_t queue_id,
+			struct queue_properties *p, struct mm_struct *mms)
+{
+	/* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
+	uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
+
+	return mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id,
+					  (uint32_t __user *)p->write_ptr,
+					  wptr_shift, 0, mms, 0);
+}
+
+static void update_mqd(struct mqd_manager *mm, void *mqd,
+			struct queue_properties *q,
+			struct mqd_update_info *minfo)
+{
+	struct v9_mqd *m;
+
+	m = get_mqd(mqd);
+
+	m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
+	m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1;
+	pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
+
+	m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
+	m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
+
+	m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+	m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+	m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
+	m->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr);
+
+	m->cp_hqd_pq_doorbell_control =
+		q->doorbell_off <<
+			CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+	pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
+			m->cp_hqd_pq_doorbell_control);
+
+	m->cp_hqd_ib_control =
+		3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT |
+		1 << CP_HQD_IB_CONTROL__IB_EXE_DISABLE__SHIFT;
+
+	/*
+	 * HW does not clamp this field correctly. Maximum EOP queue size
+	 * is constrained by per-SE EOP done signal count, which is 8-bit.
+	 * Limit is 0xFF EOP entries (= 0x7F8 dwords). CP will not submit
+	 * more than (EOP entry count - 1) so a queue size of 0x800 dwords
+	 * is safe, giving a maximum field value of 0xA.
+	 *
+	 * Also, do calculation only if EOP is used (size > 0), otherwise
+	 * the order_base_2 calculation provides incorrect result.
+	 *
+	 */
+	m->cp_hqd_eop_control = q->eop_ring_buffer_size ?
+		min(0xA, order_base_2(q->eop_ring_buffer_size / 4) - 1) : 0;
+
+	m->cp_hqd_eop_base_addr_lo =
+			lower_32_bits(q->eop_ring_buffer_address >> 8);
+	m->cp_hqd_eop_base_addr_hi =
+			upper_32_bits(q->eop_ring_buffer_address >> 8);
+
+	m->cp_hqd_iq_timer = 0;
+
+	m->cp_hqd_vmid = q->vmid;
+
+	if (q->format == KFD_QUEUE_FORMAT_AQL) {
+		m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
+				2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT |
+				1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT |
+				1 << CP_HQD_PQ_CONTROL__WPP_CLAMP_EN__SHIFT;
+		m->cp_hqd_pq_doorbell_control |= 1 <<
+			CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT;
+	}
+	if (mm->dev->kfd->cwsr_enabled && q->ctx_save_restore_area_address)
+		m->cp_hqd_ctx_save_control = 0;
+
+	if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3))
+		update_cu_mask(mm, mqd, minfo, 0);
+	set_priority(m, q);
+
+	q->is_active = QUEUE_IS_ACTIVE(*q);
+}
+
+
+static uint32_t read_doorbell_id(void *mqd)
+{
+	struct v9_mqd *m = (struct v9_mqd *)mqd;
+
+	return m->queue_doorbell_id0;
+}
+
+static int get_wave_state(struct mqd_manager *mm, void *mqd,
+			  struct queue_properties *q,
+			  void __user *ctl_stack,
+			  u32 *ctl_stack_used_size,
+			  u32 *save_area_used_size)
+{
+	struct v9_mqd *m;
+	struct kfd_context_save_area_header header;
+
+	/* Control stack is located one page after MQD. */
+	void *mqd_ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE);
+
+	m = get_mqd(mqd);
+
+	*ctl_stack_used_size = m->cp_hqd_cntl_stack_size -
+		m->cp_hqd_cntl_stack_offset;
+	*save_area_used_size = m->cp_hqd_wg_state_offset -
+		m->cp_hqd_cntl_stack_size;
+
+	header.wave_state.control_stack_size = *ctl_stack_used_size;
+	header.wave_state.wave_state_size = *save_area_used_size;
+
+	header.wave_state.wave_state_offset = m->cp_hqd_wg_state_offset;
+	header.wave_state.control_stack_offset = m->cp_hqd_cntl_stack_offset;
+
+	if (copy_to_user(ctl_stack, &header, sizeof(header.wave_state)))
+		return -EFAULT;
+
+	if (copy_to_user(ctl_stack + m->cp_hqd_cntl_stack_offset,
+				mqd_ctl_stack + m->cp_hqd_cntl_stack_offset,
+				*ctl_stack_used_size))
+		return -EFAULT;
+
+	return 0;
+}
+
+static void get_checkpoint_info(struct mqd_manager *mm, void *mqd, u32 *ctl_stack_size)
+{
+	struct v9_mqd *m = get_mqd(mqd);
+
+	*ctl_stack_size = m->cp_hqd_cntl_stack_size;
+}
+
+static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst)
+{
+	struct v9_mqd *m;
+	/* Control stack is located one page after MQD. */
+	void *ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE);
+
+	m = get_mqd(mqd);
+
+	memcpy(mqd_dst, m, sizeof(struct v9_mqd));
+	memcpy(ctl_stack_dst, ctl_stack, m->cp_hqd_cntl_stack_size);
+}
+
+static void restore_mqd(struct mqd_manager *mm, void **mqd,
+			struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+			struct queue_properties *qp,
+			const void *mqd_src,
+			const void *ctl_stack_src, u32 ctl_stack_size)
+{
+	uint64_t addr;
+	struct v9_mqd *m;
+	void *ctl_stack;
+
+	m = (struct v9_mqd *) mqd_mem_obj->cpu_ptr;
+	addr = mqd_mem_obj->gpu_addr;
+
+	memcpy(m, mqd_src, sizeof(*m));
+
+	*mqd = m;
+	if (gart_addr)
+		*gart_addr = addr;
+
+	/* Control stack is located one page after MQD. */
+	ctl_stack = (void *)((uintptr_t)*mqd + PAGE_SIZE);
+	memcpy(ctl_stack, ctl_stack_src, ctl_stack_size);
+
+	m->cp_hqd_pq_doorbell_control =
+		qp->doorbell_off <<
+			CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+	pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
+				m->cp_hqd_pq_doorbell_control);
+
+	qp->is_active = 0;
+}
+
+static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
+			struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+			struct queue_properties *q)
+{
+	struct v9_mqd *m;
+
+	init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q);
+
+	m = get_mqd(*mqd);
+
+	m->cp_hqd_pq_control |= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
+			1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
+}
+
+static int destroy_hiq_mqd(struct mqd_manager *mm, void *mqd,
+			enum kfd_preempt_type type, unsigned int timeout,
+			uint32_t pipe_id, uint32_t queue_id)
+{
+	int err;
+	struct v9_mqd *m;
+	u32 doorbell_off;
+
+	m = get_mqd(mqd);
+
+	doorbell_off = m->cp_hqd_pq_doorbell_control >>
+			CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+	err = amdgpu_amdkfd_unmap_hiq(mm->dev->adev, doorbell_off, 0);
+	if (err)
+		pr_debug("Destroy HIQ MQD failed: %d\n", err);
+
+	return err;
+}
+
+static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
+		struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+		struct queue_properties *q)
+{
+	struct v9_sdma_mqd *m;
+
+	m = (struct v9_sdma_mqd *) mqd_mem_obj->cpu_ptr;
+
+	memset(m, 0, sizeof(struct v9_sdma_mqd));
+
+	*mqd = m;
+	if (gart_addr)
+		*gart_addr = mqd_mem_obj->gpu_addr;
+
+	mm->update_mqd(mm, m, q, NULL);
+}
+
+#define SDMA_RLC_DUMMY_DEFAULT 0xf
+
+static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
+			struct queue_properties *q,
+			struct mqd_update_info *minfo)
+{
+	struct v9_sdma_mqd *m;
+
+	m = get_sdma_mqd(mqd);
+	m->sdmax_rlcx_rb_cntl = order_base_2(q->queue_size / 4)
+		<< SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
+		q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT |
+		1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
+		6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT;
+
+	m->sdmax_rlcx_rb_base = lower_32_bits(q->queue_address >> 8);
+	m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8);
+	m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+	m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+	m->sdmax_rlcx_doorbell_offset =
+		q->doorbell_off << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+	m->sdma_engine_id = q->sdma_engine_id;
+	m->sdma_queue_id = q->sdma_queue_id;
+	m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT;
+
+	q->is_active = QUEUE_IS_ACTIVE(*q);
+}
+
+static void checkpoint_mqd_sdma(struct mqd_manager *mm,
+				void *mqd,
+				void *mqd_dst,
+				void *ctl_stack_dst)
+{
+	struct v9_sdma_mqd *m;
+
+	m = get_sdma_mqd(mqd);
+
+	memcpy(mqd_dst, m, sizeof(struct v9_sdma_mqd));
+}
+
+static void restore_mqd_sdma(struct mqd_manager *mm, void **mqd,
+			     struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+			     struct queue_properties *qp,
+			     const void *mqd_src,
+			     const void *ctl_stack_src, const u32 ctl_stack_size)
+{
+	uint64_t addr;
+	struct v9_sdma_mqd *m;
+
+	m = (struct v9_sdma_mqd *) mqd_mem_obj->cpu_ptr;
+	addr = mqd_mem_obj->gpu_addr;
+
+	memcpy(m, mqd_src, sizeof(*m));
+
+	m->sdmax_rlcx_doorbell_offset =
+		qp->doorbell_off << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+	*mqd = m;
+	if (gart_addr)
+		*gart_addr = addr;
+
+	qp->is_active = 0;
+}
+
+static void init_mqd_hiq_v9_4_3(struct mqd_manager *mm, void **mqd,
+			struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+			struct queue_properties *q)
+{
+	struct v9_mqd *m;
+	int xcc = 0;
+	struct kfd_mem_obj xcc_mqd_mem_obj;
+	uint64_t xcc_gart_addr = 0;
+
+	memset(&xcc_mqd_mem_obj, 0x0, sizeof(struct kfd_mem_obj));
+
+	for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) {
+		kfd_get_hiq_xcc_mqd(mm->dev, &xcc_mqd_mem_obj, xcc);
+
+		init_mqd(mm, (void **)&m, &xcc_mqd_mem_obj, &xcc_gart_addr, q);
+
+		m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
+					1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
+					1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
+		m->cp_mqd_stride_size = kfd_hiq_mqd_stride(mm->dev);
+		if (xcc == 0) {
+			/* Set no_update_rptr = 0 in Master XCC */
+			m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK;
+
+			/* Set the MQD pointer and gart address to XCC0 MQD */
+			*mqd = m;
+			*gart_addr = xcc_gart_addr;
+		}
+	}
+}
+
+static int hiq_load_mqd_kiq_v9_4_3(struct mqd_manager *mm, void *mqd,
+			uint32_t pipe_id, uint32_t queue_id,
+			struct queue_properties *p, struct mm_struct *mms)
+{
+	uint32_t xcc_mask = mm->dev->xcc_mask;
+	int xcc_id, err, inst = 0;
+	void *xcc_mqd;
+	uint64_t hiq_mqd_size = kfd_hiq_mqd_stride(mm->dev);
+
+	for_each_inst(xcc_id, xcc_mask) {
+		xcc_mqd = mqd + hiq_mqd_size * inst;
+		err = mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->adev, xcc_mqd,
+						     pipe_id, queue_id,
+						     p->doorbell_off, xcc_id);
+		if (err) {
+			pr_debug("Failed to load HIQ MQD for XCC: %d\n", inst);
+			break;
+		}
+		++inst;
+	}
+
+	return err;
+}
+
+static int destroy_hiq_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
+			enum kfd_preempt_type type, unsigned int timeout,
+			uint32_t pipe_id, uint32_t queue_id)
+{
+	uint32_t xcc_mask = mm->dev->xcc_mask;
+	int xcc_id, err, inst = 0;
+	uint64_t hiq_mqd_size = kfd_hiq_mqd_stride(mm->dev);
+	struct v9_mqd *m;
+	u32 doorbell_off;
+
+	for_each_inst(xcc_id, xcc_mask) {
+		m = get_mqd(mqd + hiq_mqd_size * inst);
+
+		doorbell_off = m->cp_hqd_pq_doorbell_control >>
+				CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+
+		err = amdgpu_amdkfd_unmap_hiq(mm->dev->adev, doorbell_off, xcc_id);
+		if (err) {
+			pr_debug("Destroy HIQ MQD failed for xcc: %d\n", inst);
+			break;
+		}
+		++inst;
+	}
+
+	return err;
+}
+
+static void get_xcc_mqd(struct kfd_mem_obj *mqd_mem_obj,
+			       struct kfd_mem_obj *xcc_mqd_mem_obj,
+			       uint64_t offset)
+{
+	xcc_mqd_mem_obj->gtt_mem = (offset == 0) ?
+					mqd_mem_obj->gtt_mem : NULL;
+	xcc_mqd_mem_obj->gpu_addr = mqd_mem_obj->gpu_addr + offset;
+	xcc_mqd_mem_obj->cpu_ptr = (uint32_t *)((uintptr_t)mqd_mem_obj->cpu_ptr
+						+ offset);
+}
+
+static void init_mqd_v9_4_3(struct mqd_manager *mm, void **mqd,
+			struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+			struct queue_properties *q)
+{
+	struct v9_mqd *m;
+	int xcc = 0;
+	struct kfd_mem_obj xcc_mqd_mem_obj;
+	uint64_t xcc_gart_addr = 0;
+	uint64_t xcc_ctx_save_restore_area_address;
+	uint64_t offset = mm->mqd_stride(mm, q);
+	uint32_t local_xcc_start = mm->dev->dqm->current_logical_xcc_start++;
+
+	memset(&xcc_mqd_mem_obj, 0x0, sizeof(struct kfd_mem_obj));
+	for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) {
+		get_xcc_mqd(mqd_mem_obj, &xcc_mqd_mem_obj, offset*xcc);
+
+		init_mqd(mm, (void **)&m, &xcc_mqd_mem_obj, &xcc_gart_addr, q);
+
+		m->cp_mqd_stride_size = offset;
+
+		/*
+		 * Update the CWSR address for each XCC if CWSR is enabled
+		 * and CWSR area is allocated in thunk
+		 */
+		if (mm->dev->kfd->cwsr_enabled &&
+		    q->ctx_save_restore_area_address) {
+			xcc_ctx_save_restore_area_address =
+				q->ctx_save_restore_area_address +
+				(xcc * q->ctx_save_restore_area_size);
+
+			m->cp_hqd_ctx_save_base_addr_lo =
+				lower_32_bits(xcc_ctx_save_restore_area_address);
+			m->cp_hqd_ctx_save_base_addr_hi =
+				upper_32_bits(xcc_ctx_save_restore_area_address);
+		}
+
+		if (q->format == KFD_QUEUE_FORMAT_AQL) {
+			m->compute_tg_chunk_size = 1;
+			m->compute_current_logic_xcc_id =
+					(local_xcc_start + xcc) %
+					NUM_XCC(mm->dev->xcc_mask);
+
+			switch (xcc) {
+			case 0:
+				/* Master XCC */
+				m->cp_hqd_pq_control &=
+					~CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK;
+				break;
+			default:
+				break;
+			}
+		} else {
+			/* PM4 Queue */
+			m->compute_current_logic_xcc_id = 0;
+			m->compute_tg_chunk_size = 0;
+			m->pm4_target_xcc_in_xcp = q->pm4_target_xcc;
+		}
+
+		if (xcc == 0) {
+			/* Set the MQD pointer and gart address to XCC0 MQD */
+			*mqd = m;
+			*gart_addr = xcc_gart_addr;
+		}
+	}
+}
+
+static void update_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
+		      struct queue_properties *q, struct mqd_update_info *minfo)
+{
+	struct v9_mqd *m;
+	int xcc = 0;
+	uint64_t size = mm->mqd_stride(mm, q);
+
+	for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) {
+		m = get_mqd(mqd + size * xcc);
+		update_mqd(mm, m, q, minfo);
+
+		update_cu_mask(mm, mqd, minfo, xcc);
+
+		if (q->format == KFD_QUEUE_FORMAT_AQL) {
+			switch (xcc) {
+			case 0:
+				/* Master XCC */
+				m->cp_hqd_pq_control &=
+					~CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK;
+				break;
+			default:
+				break;
+			}
+			m->compute_tg_chunk_size = 1;
+		} else {
+			/* PM4 Queue */
+			m->compute_current_logic_xcc_id = 0;
+			m->compute_tg_chunk_size = 0;
+			m->pm4_target_xcc_in_xcp = q->pm4_target_xcc;
+		}
+	}
+}
+
+static int destroy_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
+		   enum kfd_preempt_type type, unsigned int timeout,
+		   uint32_t pipe_id, uint32_t queue_id)
+{
+	uint32_t xcc_mask = mm->dev->xcc_mask;
+	int xcc_id, err, inst = 0;
+	void *xcc_mqd;
+	struct v9_mqd *m;
+	uint64_t mqd_offset;
+
+	m = get_mqd(mqd);
+	mqd_offset = m->cp_mqd_stride_size;
+
+	for_each_inst(xcc_id, xcc_mask) {
+		xcc_mqd = mqd + mqd_offset * inst;
+		err = mm->dev->kfd2kgd->hqd_destroy(mm->dev->adev, xcc_mqd,
+						    type, timeout, pipe_id,
+						    queue_id, xcc_id);
+		if (err) {
+			pr_debug("Destroy MQD failed for xcc: %d\n", inst);
+			break;
+		}
+		++inst;
+	}
+
+	return err;
+}
+
+static int load_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
+			uint32_t pipe_id, uint32_t queue_id,
+			struct queue_properties *p, struct mm_struct *mms)
+{
+	/* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
+	uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
+	uint32_t xcc_mask = mm->dev->xcc_mask;
+	int xcc_id, err, inst = 0;
+	void *xcc_mqd;
+	uint64_t mqd_stride_size = mm->mqd_stride(mm, p);
+
+	for_each_inst(xcc_id, xcc_mask) {
+		xcc_mqd = mqd + mqd_stride_size * inst;
+		err = mm->dev->kfd2kgd->hqd_load(
+			mm->dev->adev, xcc_mqd, pipe_id, queue_id,
+			(uint32_t __user *)p->write_ptr, wptr_shift, 0, mms,
+			xcc_id);
+		if (err) {
+			pr_debug("Load MQD failed for xcc: %d\n", inst);
+			break;
+		}
+		++inst;
+	}
+
+	return err;
+}
+
+static int get_wave_state_v9_4_3(struct mqd_manager *mm, void *mqd,
+				 struct queue_properties *q,
+				 void __user *ctl_stack,
+				 u32 *ctl_stack_used_size,
+				 u32 *save_area_used_size)
+{
+	int xcc, err = 0;
+	void *xcc_mqd;
+	void __user *xcc_ctl_stack;
+	uint64_t mqd_stride_size = mm->mqd_stride(mm, q);
+	u32 tmp_ctl_stack_used_size = 0, tmp_save_area_used_size = 0;
+
+	for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) {
+		xcc_mqd = mqd + mqd_stride_size * xcc;
+		xcc_ctl_stack = (void __user *)((uintptr_t)ctl_stack +
+					q->ctx_save_restore_area_size * xcc);
+
+		err = get_wave_state(mm, xcc_mqd, q, xcc_ctl_stack,
+				     &tmp_ctl_stack_used_size,
+				     &tmp_save_area_used_size);
+		if (err)
+			break;
+
+		/*
+		 * Set the ctl_stack_used_size and save_area_used_size to
+		 * ctl_stack_used_size and save_area_used_size of XCC 0 when
+		 * passing the info the user-space.
+		 * For multi XCC, user-space would have to look at the header
+		 * info of each Control stack area to determine the control
+		 * stack size and save area used.
+		 */
+		if (xcc == 0) {
+			*ctl_stack_used_size = tmp_ctl_stack_used_size;
+			*save_area_used_size = tmp_save_area_used_size;
+		}
+	}
+
+	return err;
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+static int debugfs_show_mqd(struct seq_file *m, void *data)
+{
+	seq_hex_dump(m, "    ", DUMP_PREFIX_OFFSET, 32, 4,
+		     data, sizeof(struct v9_mqd), false);
+	return 0;
+}
+
+static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
+{
+	seq_hex_dump(m, "    ", DUMP_PREFIX_OFFSET, 32, 4,
+		     data, sizeof(struct v9_sdma_mqd), false);
+	return 0;
+}
+
+#endif
+
+struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
+		struct kfd_node *dev)
+{
+	struct mqd_manager *mqd;
+
+	if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
+		return NULL;
+
+	mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
+	if (!mqd)
+		return NULL;
+
+	mqd->dev = dev;
+
+	switch (type) {
+	case KFD_MQD_TYPE_CP:
+		mqd->allocate_mqd = allocate_mqd;
+		mqd->free_mqd = kfd_free_mqd_cp;
+		mqd->is_occupied = kfd_is_occupied_cp;
+		mqd->get_checkpoint_info = get_checkpoint_info;
+		mqd->checkpoint_mqd = checkpoint_mqd;
+		mqd->restore_mqd = restore_mqd;
+		mqd->mqd_size = sizeof(struct v9_mqd);
+		mqd->mqd_stride = mqd_stride_v9;
+#if defined(CONFIG_DEBUG_FS)
+		mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+		if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3)) {
+			mqd->init_mqd = init_mqd_v9_4_3;
+			mqd->load_mqd = load_mqd_v9_4_3;
+			mqd->update_mqd = update_mqd_v9_4_3;
+			mqd->destroy_mqd = destroy_mqd_v9_4_3;
+			mqd->get_wave_state = get_wave_state_v9_4_3;
+		} else {
+			mqd->init_mqd = init_mqd;
+			mqd->load_mqd = load_mqd;
+			mqd->update_mqd = update_mqd;
+			mqd->destroy_mqd = kfd_destroy_mqd_cp;
+			mqd->get_wave_state = get_wave_state;
+		}
+		break;
+	case KFD_MQD_TYPE_HIQ:
+		mqd->allocate_mqd = allocate_hiq_mqd;
+		mqd->free_mqd = free_mqd_hiq_sdma;
+		mqd->update_mqd = update_mqd;
+		mqd->is_occupied = kfd_is_occupied_cp;
+		mqd->mqd_size = sizeof(struct v9_mqd);
+		mqd->mqd_stride = kfd_mqd_stride;
+#if defined(CONFIG_DEBUG_FS)
+		mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+		mqd->read_doorbell_id = read_doorbell_id;
+		if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3)) {
+			mqd->init_mqd = init_mqd_hiq_v9_4_3;
+			mqd->load_mqd = hiq_load_mqd_kiq_v9_4_3;
+			mqd->destroy_mqd = destroy_hiq_mqd_v9_4_3;
+		} else {
+			mqd->init_mqd = init_mqd_hiq;
+			mqd->load_mqd = kfd_hiq_load_mqd_kiq;
+			mqd->destroy_mqd = destroy_hiq_mqd;
+		}
+		break;
+	case KFD_MQD_TYPE_DIQ:
+		mqd->allocate_mqd = allocate_mqd;
+		mqd->init_mqd = init_mqd_hiq;
+		mqd->free_mqd = kfd_free_mqd_cp;
+		mqd->load_mqd = load_mqd;
+		mqd->update_mqd = update_mqd;
+		mqd->destroy_mqd = kfd_destroy_mqd_cp;
+		mqd->is_occupied = kfd_is_occupied_cp;
+		mqd->mqd_size = sizeof(struct v9_mqd);
+#if defined(CONFIG_DEBUG_FS)
+		mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+		break;
+	case KFD_MQD_TYPE_SDMA:
+		mqd->allocate_mqd = allocate_sdma_mqd;
+		mqd->init_mqd = init_mqd_sdma;
+		mqd->free_mqd = free_mqd_hiq_sdma;
+		mqd->load_mqd = kfd_load_mqd_sdma;
+		mqd->update_mqd = update_mqd_sdma;
+		mqd->destroy_mqd = kfd_destroy_mqd_sdma;
+		mqd->is_occupied = kfd_is_occupied_sdma;
+		mqd->checkpoint_mqd = checkpoint_mqd_sdma;
+		mqd->restore_mqd = restore_mqd_sdma;
+		mqd->mqd_size = sizeof(struct v9_sdma_mqd);
+		mqd->mqd_stride = kfd_mqd_stride;
+#if defined(CONFIG_DEBUG_FS)
+		mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
+#endif
+		break;
+	default:
+		kfree(mqd);
+		return NULL;
+	}
+
+	return mqd;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
new file mode 100644
index 000000000..3e1a574d4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -0,0 +1,523 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/mm_types.h>
+
+#include "kfd_priv.h"
+#include "kfd_mqd_manager.h"
+#include "vi_structs.h"
+#include "gca/gfx_8_0_sh_mask.h"
+#include "gca/gfx_8_0_enum.h"
+#include "oss/oss_3_0_sh_mask.h"
+
+#define CP_MQD_CONTROL__PRIV_STATE__SHIFT 0x8
+
+static inline struct vi_mqd *get_mqd(void *mqd)
+{
+	return (struct vi_mqd *)mqd;
+}
+
+static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+	return (struct vi_sdma_mqd *)mqd;
+}
+
+static void update_cu_mask(struct mqd_manager *mm, void *mqd,
+			struct mqd_update_info *minfo)
+{
+	struct vi_mqd *m;
+	uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */
+
+	if (!minfo || !minfo->cu_mask.ptr)
+		return;
+
+	mqd_symmetrically_map_cu_mask(mm,
+		minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask, 0);
+
+	m = get_mqd(mqd);
+	m->compute_static_thread_mgmt_se0 = se_mask[0];
+	m->compute_static_thread_mgmt_se1 = se_mask[1];
+	m->compute_static_thread_mgmt_se2 = se_mask[2];
+	m->compute_static_thread_mgmt_se3 = se_mask[3];
+
+	pr_debug("Update cu mask to %#x %#x %#x %#x\n",
+		m->compute_static_thread_mgmt_se0,
+		m->compute_static_thread_mgmt_se1,
+		m->compute_static_thread_mgmt_se2,
+		m->compute_static_thread_mgmt_se3);
+}
+
+static void set_priority(struct vi_mqd *m, struct queue_properties *q)
+{
+	m->cp_hqd_pipe_priority = pipe_priority_map[q->priority];
+	m->cp_hqd_queue_priority = q->priority;
+}
+
+static struct kfd_mem_obj *allocate_mqd(struct kfd_node *kfd,
+					struct queue_properties *q)
+{
+	struct kfd_mem_obj *mqd_mem_obj;
+
+	if (kfd_gtt_sa_allocate(kfd, sizeof(struct vi_mqd),
+			&mqd_mem_obj))
+		return NULL;
+
+	return mqd_mem_obj;
+}
+
+static void init_mqd(struct mqd_manager *mm, void **mqd,
+			struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+			struct queue_properties *q)
+{
+	uint64_t addr;
+	struct vi_mqd *m;
+
+	m = (struct vi_mqd *) mqd_mem_obj->cpu_ptr;
+	addr = mqd_mem_obj->gpu_addr;
+
+	memset(m, 0, sizeof(struct vi_mqd));
+
+	m->header = 0xC0310800;
+	m->compute_pipelinestat_enable = 1;
+	m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
+	m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
+	m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
+	m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
+
+	m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
+			0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
+
+	m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT |
+			MTYPE_UC << CP_MQD_CONTROL__MTYPE__SHIFT;
+
+	m->cp_mqd_base_addr_lo        = lower_32_bits(addr);
+	m->cp_mqd_base_addr_hi        = upper_32_bits(addr);
+
+	m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT |
+			1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
+			1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
+
+	set_priority(m, q);
+	m->cp_hqd_eop_rptr = 1 << CP_HQD_EOP_RPTR__INIT_FETCHER__SHIFT;
+
+	if (q->format == KFD_QUEUE_FORMAT_AQL)
+		m->cp_hqd_iq_rptr = 1;
+
+	if (q->tba_addr) {
+		m->compute_tba_lo = lower_32_bits(q->tba_addr >> 8);
+		m->compute_tba_hi = upper_32_bits(q->tba_addr >> 8);
+		m->compute_tma_lo = lower_32_bits(q->tma_addr >> 8);
+		m->compute_tma_hi = upper_32_bits(q->tma_addr >> 8);
+		m->compute_pgm_rsrc2 |=
+			(1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT);
+	}
+
+	if (mm->dev->kfd->cwsr_enabled && q->ctx_save_restore_area_address) {
+		m->cp_hqd_persistent_state |=
+			(1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT);
+		m->cp_hqd_ctx_save_base_addr_lo =
+			lower_32_bits(q->ctx_save_restore_area_address);
+		m->cp_hqd_ctx_save_base_addr_hi =
+			upper_32_bits(q->ctx_save_restore_area_address);
+		m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size;
+		m->cp_hqd_cntl_stack_size = q->ctl_stack_size;
+		m->cp_hqd_cntl_stack_offset = q->ctl_stack_size;
+		m->cp_hqd_wg_state_offset = q->ctl_stack_size;
+	}
+
+	*mqd = m;
+	if (gart_addr)
+		*gart_addr = addr;
+	mm->update_mqd(mm, m, q, NULL);
+}
+
+static int load_mqd(struct mqd_manager *mm, void *mqd,
+			uint32_t pipe_id, uint32_t queue_id,
+			struct queue_properties *p, struct mm_struct *mms)
+{
+	/* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
+	uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
+	uint32_t wptr_mask = (uint32_t)((p->queue_size / 4) - 1);
+
+	return mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id,
+					  (uint32_t __user *)p->write_ptr,
+					  wptr_shift, wptr_mask, mms, 0);
+}
+
+static void __update_mqd(struct mqd_manager *mm, void *mqd,
+			struct queue_properties *q, struct mqd_update_info *minfo,
+			unsigned int mtype, unsigned int atc_bit)
+{
+	struct vi_mqd *m;
+
+	m = get_mqd(mqd);
+
+	m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT |
+			atc_bit << CP_HQD_PQ_CONTROL__PQ_ATC__SHIFT |
+			mtype << CP_HQD_PQ_CONTROL__MTYPE__SHIFT;
+	m->cp_hqd_pq_control |=	order_base_2(q->queue_size / 4) - 1;
+	pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
+
+	m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
+	m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
+
+	m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+	m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+	m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
+	m->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr);
+
+	m->cp_hqd_pq_doorbell_control =
+		q->doorbell_off <<
+			CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+	pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
+			m->cp_hqd_pq_doorbell_control);
+
+	m->cp_hqd_eop_control = atc_bit << CP_HQD_EOP_CONTROL__EOP_ATC__SHIFT |
+			mtype << CP_HQD_EOP_CONTROL__MTYPE__SHIFT;
+
+	m->cp_hqd_ib_control = atc_bit << CP_HQD_IB_CONTROL__IB_ATC__SHIFT |
+			3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT |
+			mtype << CP_HQD_IB_CONTROL__MTYPE__SHIFT;
+
+	/*
+	 * HW does not clamp this field correctly. Maximum EOP queue size
+	 * is constrained by per-SE EOP done signal count, which is 8-bit.
+	 * Limit is 0xFF EOP entries (= 0x7F8 dwords). CP will not submit
+	 * more than (EOP entry count - 1) so a queue size of 0x800 dwords
+	 * is safe, giving a maximum field value of 0xA.
+	 */
+	m->cp_hqd_eop_control |= min(0xA,
+		order_base_2(q->eop_ring_buffer_size / 4) - 1);
+	m->cp_hqd_eop_base_addr_lo =
+			lower_32_bits(q->eop_ring_buffer_address >> 8);
+	m->cp_hqd_eop_base_addr_hi =
+			upper_32_bits(q->eop_ring_buffer_address >> 8);
+
+	m->cp_hqd_iq_timer = atc_bit << CP_HQD_IQ_TIMER__IQ_ATC__SHIFT |
+			mtype << CP_HQD_IQ_TIMER__MTYPE__SHIFT;
+
+	m->cp_hqd_vmid = q->vmid;
+
+	if (q->format == KFD_QUEUE_FORMAT_AQL) {
+		m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
+				2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT;
+	}
+
+	if (mm->dev->kfd->cwsr_enabled && q->ctx_save_restore_area_address)
+		m->cp_hqd_ctx_save_control =
+			atc_bit << CP_HQD_CTX_SAVE_CONTROL__ATC__SHIFT |
+			mtype << CP_HQD_CTX_SAVE_CONTROL__MTYPE__SHIFT;
+
+	update_cu_mask(mm, mqd, minfo);
+	set_priority(m, q);
+
+	q->is_active = QUEUE_IS_ACTIVE(*q);
+}
+
+static uint32_t read_doorbell_id(void *mqd)
+{
+	struct vi_mqd *m = (struct vi_mqd *)mqd;
+
+	return m->queue_doorbell_id0;
+}
+
+static void update_mqd(struct mqd_manager *mm, void *mqd,
+		       struct queue_properties *q,
+		       struct mqd_update_info *minfo)
+{
+	__update_mqd(mm, mqd, q, minfo, MTYPE_UC, 0);
+}
+
+static int get_wave_state(struct mqd_manager *mm, void *mqd,
+			  struct queue_properties *q,
+			  void __user *ctl_stack,
+			  u32 *ctl_stack_used_size,
+			  u32 *save_area_used_size)
+{
+	struct vi_mqd *m;
+
+	m = get_mqd(mqd);
+
+	*ctl_stack_used_size = m->cp_hqd_cntl_stack_size -
+		m->cp_hqd_cntl_stack_offset;
+	*save_area_used_size = m->cp_hqd_wg_state_offset -
+		m->cp_hqd_cntl_stack_size;
+
+	/* Control stack is not copied to user mode for GFXv8 because
+	 * it's part of the context save area that is already
+	 * accessible to user mode
+	 */
+
+	return 0;
+}
+
+static void get_checkpoint_info(struct mqd_manager *mm, void *mqd, u32 *ctl_stack_size)
+{
+	/* Control stack is stored in user mode */
+	*ctl_stack_size = 0;
+}
+
+static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst)
+{
+	struct vi_mqd *m;
+
+	m = get_mqd(mqd);
+
+	memcpy(mqd_dst, m, sizeof(struct vi_mqd));
+}
+
+static void restore_mqd(struct mqd_manager *mm, void **mqd,
+			struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+			struct queue_properties *qp,
+			const void *mqd_src,
+			const void *ctl_stack_src, const u32 ctl_stack_size)
+{
+	uint64_t addr;
+	struct vi_mqd *m;
+
+	m = (struct vi_mqd *) mqd_mem_obj->cpu_ptr;
+	addr = mqd_mem_obj->gpu_addr;
+
+	memcpy(m, mqd_src, sizeof(*m));
+
+	*mqd = m;
+	if (gart_addr)
+		*gart_addr = addr;
+
+	m->cp_hqd_pq_doorbell_control =
+		qp->doorbell_off <<
+			CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+	pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
+			m->cp_hqd_pq_doorbell_control);
+
+	qp->is_active = 0;
+}
+
+static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
+			struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+			struct queue_properties *q)
+{
+	struct vi_mqd *m;
+
+	init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q);
+
+	m = get_mqd(*mqd);
+
+	m->cp_hqd_pq_control |= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
+			1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
+}
+
+static void update_mqd_hiq(struct mqd_manager *mm, void *mqd,
+			struct queue_properties *q,
+			struct mqd_update_info *minfo)
+{
+	__update_mqd(mm, mqd, q, minfo, MTYPE_UC, 0);
+}
+
+static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
+		struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+		struct queue_properties *q)
+{
+	struct vi_sdma_mqd *m;
+
+	m = (struct vi_sdma_mqd *) mqd_mem_obj->cpu_ptr;
+
+	memset(m, 0, sizeof(struct vi_sdma_mqd));
+
+	*mqd = m;
+	if (gart_addr)
+		*gart_addr = mqd_mem_obj->gpu_addr;
+
+	mm->update_mqd(mm, m, q, NULL);
+}
+
+static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
+			struct queue_properties *q,
+			struct mqd_update_info *minfo)
+{
+	struct vi_sdma_mqd *m;
+
+	m = get_sdma_mqd(mqd);
+	m->sdmax_rlcx_rb_cntl = order_base_2(q->queue_size / 4)
+		<< SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
+		q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT |
+		1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
+		6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT;
+
+	m->sdmax_rlcx_rb_base = lower_32_bits(q->queue_address >> 8);
+	m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8);
+	m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+	m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+	m->sdmax_rlcx_doorbell =
+		q->doorbell_off << SDMA0_RLC0_DOORBELL__OFFSET__SHIFT;
+
+	m->sdmax_rlcx_virtual_addr = q->sdma_vm_addr;
+
+	m->sdma_engine_id = q->sdma_engine_id;
+	m->sdma_queue_id = q->sdma_queue_id;
+
+	q->is_active = QUEUE_IS_ACTIVE(*q);
+}
+
+static void checkpoint_mqd_sdma(struct mqd_manager *mm,
+				void *mqd,
+				void *mqd_dst,
+				void *ctl_stack_dst)
+{
+	struct vi_sdma_mqd *m;
+
+	m = get_sdma_mqd(mqd);
+
+	memcpy(mqd_dst, m, sizeof(struct vi_sdma_mqd));
+}
+
+static void restore_mqd_sdma(struct mqd_manager *mm, void **mqd,
+			     struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+			     struct queue_properties *qp,
+			     const void *mqd_src,
+			     const void *ctl_stack_src, const u32 ctl_stack_size)
+{
+	uint64_t addr;
+	struct vi_sdma_mqd *m;
+
+	m = (struct vi_sdma_mqd *) mqd_mem_obj->cpu_ptr;
+	addr = mqd_mem_obj->gpu_addr;
+
+	memcpy(m, mqd_src, sizeof(*m));
+
+	m->sdmax_rlcx_doorbell =
+		qp->doorbell_off << SDMA0_RLC0_DOORBELL__OFFSET__SHIFT;
+
+	*mqd = m;
+	if (gart_addr)
+		*gart_addr = addr;
+
+	qp->is_active = 0;
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+
+static int debugfs_show_mqd(struct seq_file *m, void *data)
+{
+	seq_hex_dump(m, "    ", DUMP_PREFIX_OFFSET, 32, 4,
+		     data, sizeof(struct vi_mqd), false);
+	return 0;
+}
+
+static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
+{
+	seq_hex_dump(m, "    ", DUMP_PREFIX_OFFSET, 32, 4,
+		     data, sizeof(struct vi_sdma_mqd), false);
+	return 0;
+}
+
+#endif
+
+struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
+		struct kfd_node *dev)
+{
+	struct mqd_manager *mqd;
+
+	if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
+		return NULL;
+
+	mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
+	if (!mqd)
+		return NULL;
+
+	mqd->dev = dev;
+
+	switch (type) {
+	case KFD_MQD_TYPE_CP:
+		mqd->allocate_mqd = allocate_mqd;
+		mqd->init_mqd = init_mqd;
+		mqd->free_mqd = kfd_free_mqd_cp;
+		mqd->load_mqd = load_mqd;
+		mqd->update_mqd = update_mqd;
+		mqd->destroy_mqd = kfd_destroy_mqd_cp;
+		mqd->is_occupied = kfd_is_occupied_cp;
+		mqd->get_wave_state = get_wave_state;
+		mqd->get_checkpoint_info = get_checkpoint_info;
+		mqd->checkpoint_mqd = checkpoint_mqd;
+		mqd->restore_mqd = restore_mqd;
+		mqd->mqd_size = sizeof(struct vi_mqd);
+#if defined(CONFIG_DEBUG_FS)
+		mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+		break;
+	case KFD_MQD_TYPE_HIQ:
+		mqd->allocate_mqd = allocate_hiq_mqd;
+		mqd->init_mqd = init_mqd_hiq;
+		mqd->free_mqd = free_mqd_hiq_sdma;
+		mqd->load_mqd = load_mqd;
+		mqd->update_mqd = update_mqd_hiq;
+		mqd->destroy_mqd = kfd_destroy_mqd_cp;
+		mqd->is_occupied = kfd_is_occupied_cp;
+		mqd->mqd_size = sizeof(struct vi_mqd);
+		mqd->mqd_stride = kfd_mqd_stride;
+#if defined(CONFIG_DEBUG_FS)
+		mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+		mqd->read_doorbell_id = read_doorbell_id;
+		break;
+	case KFD_MQD_TYPE_DIQ:
+		mqd->allocate_mqd = allocate_mqd;
+		mqd->init_mqd = init_mqd_hiq;
+		mqd->free_mqd = kfd_free_mqd_cp;
+		mqd->load_mqd = load_mqd;
+		mqd->update_mqd = update_mqd_hiq;
+		mqd->destroy_mqd = kfd_destroy_mqd_cp;
+		mqd->is_occupied = kfd_is_occupied_cp;
+		mqd->mqd_size = sizeof(struct vi_mqd);
+		mqd->mqd_stride = kfd_mqd_stride;
+#if defined(CONFIG_DEBUG_FS)
+		mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+		break;
+	case KFD_MQD_TYPE_SDMA:
+		mqd->allocate_mqd = allocate_sdma_mqd;
+		mqd->init_mqd = init_mqd_sdma;
+		mqd->free_mqd = free_mqd_hiq_sdma;
+		mqd->load_mqd = kfd_load_mqd_sdma;
+		mqd->update_mqd = update_mqd_sdma;
+		mqd->destroy_mqd = kfd_destroy_mqd_sdma;
+		mqd->is_occupied = kfd_is_occupied_sdma;
+		mqd->checkpoint_mqd = checkpoint_mqd_sdma;
+		mqd->restore_mqd = restore_mqd_sdma;
+		mqd->mqd_size = sizeof(struct vi_sdma_mqd);
+		mqd->mqd_stride = kfd_mqd_stride;
+#if defined(CONFIG_DEBUG_FS)
+		mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
+#endif
+		break;
+	default:
+		kfree(mqd);
+		return NULL;
+	}
+
+	return mqd;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
new file mode 100644
index 000000000..401096c10
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -0,0 +1,493 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include "kfd_device_queue_manager.h"
+#include "kfd_kernel_queue.h"
+#include "kfd_priv.h"
+
+static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes,
+				unsigned int buffer_size_bytes)
+{
+	unsigned int temp = *wptr + increment_bytes / sizeof(uint32_t);
+
+	WARN((temp * sizeof(uint32_t)) > buffer_size_bytes,
+	     "Runlist IB overflow");
+	*wptr = temp;
+}
+
+static void pm_calc_rlib_size(struct packet_manager *pm,
+				unsigned int *rlib_size,
+				bool *over_subscription)
+{
+	unsigned int process_count, queue_count, compute_queue_count, gws_queue_count;
+	unsigned int map_queue_size;
+	unsigned int max_proc_per_quantum = 1;
+	struct kfd_node *dev = pm->dqm->dev;
+
+	process_count = pm->dqm->processes_count;
+	queue_count = pm->dqm->active_queue_count;
+	compute_queue_count = pm->dqm->active_cp_queue_count;
+	gws_queue_count = pm->dqm->gws_queue_count;
+
+	/* check if there is over subscription
+	 * Note: the arbitration between the number of VMIDs and
+	 * hws_max_conc_proc has been done in
+	 * kgd2kfd_device_init().
+	 */
+	*over_subscription = false;
+
+	if (dev->max_proc_per_quantum > 1)
+		max_proc_per_quantum = dev->max_proc_per_quantum;
+
+	if ((process_count > max_proc_per_quantum) ||
+	    compute_queue_count > get_cp_queues_num(pm->dqm) ||
+	    gws_queue_count > 1) {
+		*over_subscription = true;
+		pr_debug("Over subscribed runlist\n");
+	}
+
+	map_queue_size = pm->pmf->map_queues_size;
+	/* calculate run list ib allocation size */
+	*rlib_size = process_count * pm->pmf->map_process_size +
+		     queue_count * map_queue_size;
+
+	/*
+	 * Increase the allocation size in case we need a chained run list
+	 * when over subscription
+	 */
+	if (*over_subscription)
+		*rlib_size += pm->pmf->runlist_size;
+
+	pr_debug("runlist ib size %d\n", *rlib_size);
+}
+
+static int pm_allocate_runlist_ib(struct packet_manager *pm,
+				unsigned int **rl_buffer,
+				uint64_t *rl_gpu_buffer,
+				unsigned int *rl_buffer_size,
+				bool *is_over_subscription)
+{
+	int retval;
+
+	if (WARN_ON(pm->allocated))
+		return -EINVAL;
+
+	pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription);
+
+	mutex_lock(&pm->lock);
+
+	retval = kfd_gtt_sa_allocate(pm->dqm->dev, *rl_buffer_size,
+					&pm->ib_buffer_obj);
+
+	if (retval) {
+		pr_err("Failed to allocate runlist IB\n");
+		goto out;
+	}
+
+	*(void **)rl_buffer = pm->ib_buffer_obj->cpu_ptr;
+	*rl_gpu_buffer = pm->ib_buffer_obj->gpu_addr;
+
+	memset(*rl_buffer, 0, *rl_buffer_size);
+	pm->allocated = true;
+
+out:
+	mutex_unlock(&pm->lock);
+	return retval;
+}
+
+static int pm_create_runlist_ib(struct packet_manager *pm,
+				struct list_head *queues,
+				uint64_t *rl_gpu_addr,
+				size_t *rl_size_bytes)
+{
+	unsigned int alloc_size_bytes;
+	unsigned int *rl_buffer, rl_wptr, i;
+	int retval, processes_mapped;
+	struct device_process_node *cur;
+	struct qcm_process_device *qpd;
+	struct queue *q;
+	struct kernel_queue *kq;
+	bool is_over_subscription;
+
+	rl_wptr = retval = processes_mapped = 0;
+
+	retval = pm_allocate_runlist_ib(pm, &rl_buffer, rl_gpu_addr,
+				&alloc_size_bytes, &is_over_subscription);
+	if (retval)
+		return retval;
+
+	*rl_size_bytes = alloc_size_bytes;
+	pm->ib_size_bytes = alloc_size_bytes;
+
+	pr_debug("Building runlist ib process count: %d queues count %d\n",
+		pm->dqm->processes_count, pm->dqm->active_queue_count);
+
+	/* build the run list ib packet */
+	list_for_each_entry(cur, queues, list) {
+		qpd = cur->qpd;
+		/* build map process packet */
+		if (processes_mapped >= pm->dqm->processes_count) {
+			pr_debug("Not enough space left in runlist IB\n");
+			pm_release_ib(pm);
+			return -ENOMEM;
+		}
+
+		retval = pm->pmf->map_process(pm, &rl_buffer[rl_wptr], qpd);
+		if (retval)
+			return retval;
+
+		processes_mapped++;
+		inc_wptr(&rl_wptr, pm->pmf->map_process_size,
+				alloc_size_bytes);
+
+		list_for_each_entry(kq, &qpd->priv_queue_list, list) {
+			if (!kq->queue->properties.is_active)
+				continue;
+
+			pr_debug("static_queue, mapping kernel q %d, is debug status %d\n",
+				kq->queue->queue, qpd->is_debug);
+
+			retval = pm->pmf->map_queues(pm,
+						&rl_buffer[rl_wptr],
+						kq->queue,
+						qpd->is_debug);
+			if (retval)
+				return retval;
+
+			inc_wptr(&rl_wptr,
+				pm->pmf->map_queues_size,
+				alloc_size_bytes);
+		}
+
+		list_for_each_entry(q, &qpd->queues_list, list) {
+			if (!q->properties.is_active)
+				continue;
+
+			pr_debug("static_queue, mapping user queue %d, is debug status %d\n",
+				q->queue, qpd->is_debug);
+
+			retval = pm->pmf->map_queues(pm,
+						&rl_buffer[rl_wptr],
+						q,
+						qpd->is_debug);
+
+			if (retval)
+				return retval;
+
+			inc_wptr(&rl_wptr,
+				pm->pmf->map_queues_size,
+				alloc_size_bytes);
+		}
+	}
+
+	pr_debug("Finished map process and queues to runlist\n");
+
+	if (is_over_subscription) {
+		if (!pm->is_over_subscription)
+			pr_warn("Runlist is getting oversubscribed. Expect reduced ROCm performance.\n");
+		retval = pm->pmf->runlist(pm, &rl_buffer[rl_wptr],
+					*rl_gpu_addr,
+					alloc_size_bytes / sizeof(uint32_t),
+					true);
+	}
+	pm->is_over_subscription = is_over_subscription;
+
+	for (i = 0; i < alloc_size_bytes / sizeof(uint32_t); i++)
+		pr_debug("0x%2X ", rl_buffer[i]);
+	pr_debug("\n");
+
+	return retval;
+}
+
+int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
+{
+	switch (dqm->dev->adev->asic_type) {
+	case CHIP_KAVERI:
+	case CHIP_HAWAII:
+		/* PM4 packet structures on CIK are the same as on VI */
+	case CHIP_CARRIZO:
+	case CHIP_TONGA:
+	case CHIP_FIJI:
+	case CHIP_POLARIS10:
+	case CHIP_POLARIS11:
+	case CHIP_POLARIS12:
+	case CHIP_VEGAM:
+		pm->pmf = &kfd_vi_pm_funcs;
+		break;
+	default:
+		if (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 2) ||
+		    KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3))
+			pm->pmf = &kfd_aldebaran_pm_funcs;
+		else if (KFD_GC_VERSION(dqm->dev) >= IP_VERSION(9, 0, 1))
+			pm->pmf = &kfd_v9_pm_funcs;
+		else {
+			WARN(1, "Unexpected ASIC family %u",
+			     dqm->dev->adev->asic_type);
+			return -EINVAL;
+		}
+	}
+
+	pm->dqm = dqm;
+	mutex_init(&pm->lock);
+	pm->priv_queue = kernel_queue_init(dqm->dev, KFD_QUEUE_TYPE_HIQ);
+	if (!pm->priv_queue) {
+		mutex_destroy(&pm->lock);
+		return -ENOMEM;
+	}
+	pm->allocated = false;
+
+	return 0;
+}
+
+void pm_uninit(struct packet_manager *pm, bool hanging)
+{
+	mutex_destroy(&pm->lock);
+	kernel_queue_uninit(pm->priv_queue, hanging);
+	pm->priv_queue = NULL;
+}
+
+int pm_send_set_resources(struct packet_manager *pm,
+				struct scheduling_resources *res)
+{
+	uint32_t *buffer, size;
+	int retval = 0;
+
+	size = pm->pmf->set_resources_size;
+	mutex_lock(&pm->lock);
+	kq_acquire_packet_buffer(pm->priv_queue,
+					size / sizeof(uint32_t),
+					(unsigned int **)&buffer);
+	if (!buffer) {
+		pr_err("Failed to allocate buffer on kernel queue\n");
+		retval = -ENOMEM;
+		goto out;
+	}
+
+	retval = pm->pmf->set_resources(pm, buffer, res);
+	if (!retval)
+		kq_submit_packet(pm->priv_queue);
+	else
+		kq_rollback_packet(pm->priv_queue);
+
+out:
+	mutex_unlock(&pm->lock);
+
+	return retval;
+}
+
+int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
+{
+	uint64_t rl_gpu_ib_addr;
+	uint32_t *rl_buffer;
+	size_t rl_ib_size, packet_size_dwords;
+	int retval;
+
+	retval = pm_create_runlist_ib(pm, dqm_queues, &rl_gpu_ib_addr,
+					&rl_ib_size);
+	if (retval)
+		goto fail_create_runlist_ib;
+
+	pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr);
+
+	packet_size_dwords = pm->pmf->runlist_size / sizeof(uint32_t);
+	mutex_lock(&pm->lock);
+
+	retval = kq_acquire_packet_buffer(pm->priv_queue,
+					packet_size_dwords, &rl_buffer);
+	if (retval)
+		goto fail_acquire_packet_buffer;
+
+	retval = pm->pmf->runlist(pm, rl_buffer, rl_gpu_ib_addr,
+					rl_ib_size / sizeof(uint32_t), false);
+	if (retval)
+		goto fail_create_runlist;
+
+	kq_submit_packet(pm->priv_queue);
+
+	mutex_unlock(&pm->lock);
+
+	return retval;
+
+fail_create_runlist:
+	kq_rollback_packet(pm->priv_queue);
+fail_acquire_packet_buffer:
+	mutex_unlock(&pm->lock);
+fail_create_runlist_ib:
+	pm_release_ib(pm);
+	return retval;
+}
+
+int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
+			uint64_t fence_value)
+{
+	uint32_t *buffer, size;
+	int retval = 0;
+
+	if (WARN_ON(!fence_address))
+		return -EFAULT;
+
+	size = pm->pmf->query_status_size;
+	mutex_lock(&pm->lock);
+	kq_acquire_packet_buffer(pm->priv_queue,
+			size / sizeof(uint32_t), (unsigned int **)&buffer);
+	if (!buffer) {
+		pr_err("Failed to allocate buffer on kernel queue\n");
+		retval = -ENOMEM;
+		goto out;
+	}
+
+	retval = pm->pmf->query_status(pm, buffer, fence_address, fence_value);
+	if (!retval)
+		kq_submit_packet(pm->priv_queue);
+	else
+		kq_rollback_packet(pm->priv_queue);
+
+out:
+	mutex_unlock(&pm->lock);
+	return retval;
+}
+
+int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period)
+{
+	int retval = 0;
+	uint32_t *buffer, size;
+
+	size = pm->pmf->set_grace_period_size;
+
+	mutex_lock(&pm->lock);
+
+	if (size) {
+		kq_acquire_packet_buffer(pm->priv_queue,
+			size / sizeof(uint32_t),
+			(unsigned int **)&buffer);
+
+		if (!buffer) {
+			pr_err("Failed to allocate buffer on kernel queue\n");
+			retval = -ENOMEM;
+			goto out;
+		}
+
+		retval = pm->pmf->set_grace_period(pm, buffer, grace_period);
+		if (!retval)
+			kq_submit_packet(pm->priv_queue);
+		else
+			kq_rollback_packet(pm->priv_queue);
+	}
+
+out:
+	mutex_unlock(&pm->lock);
+	return retval;
+}
+
+int pm_send_unmap_queue(struct packet_manager *pm,
+			enum kfd_unmap_queues_filter filter,
+			uint32_t filter_param, bool reset)
+{
+	uint32_t *buffer, size;
+	int retval = 0;
+
+	size = pm->pmf->unmap_queues_size;
+	mutex_lock(&pm->lock);
+	kq_acquire_packet_buffer(pm->priv_queue,
+			size / sizeof(uint32_t), (unsigned int **)&buffer);
+	if (!buffer) {
+		pr_err("Failed to allocate buffer on kernel queue\n");
+		retval = -ENOMEM;
+		goto out;
+	}
+
+	retval = pm->pmf->unmap_queues(pm, buffer, filter, filter_param, reset);
+	if (!retval)
+		kq_submit_packet(pm->priv_queue);
+	else
+		kq_rollback_packet(pm->priv_queue);
+
+out:
+	mutex_unlock(&pm->lock);
+	return retval;
+}
+
+void pm_release_ib(struct packet_manager *pm)
+{
+	mutex_lock(&pm->lock);
+	if (pm->allocated) {
+		kfd_gtt_sa_free(pm->dqm->dev, pm->ib_buffer_obj);
+		pm->allocated = false;
+	}
+	mutex_unlock(&pm->lock);
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+int pm_debugfs_runlist(struct seq_file *m, void *data)
+{
+	struct packet_manager *pm = data;
+
+	mutex_lock(&pm->lock);
+
+	if (!pm->allocated) {
+		seq_puts(m, "  No active runlist\n");
+		goto out;
+	}
+
+	seq_hex_dump(m, "  ", DUMP_PREFIX_OFFSET, 32, 4,
+		     pm->ib_buffer_obj->cpu_ptr, pm->ib_size_bytes, false);
+
+out:
+	mutex_unlock(&pm->lock);
+	return 0;
+}
+
+int pm_debugfs_hang_hws(struct packet_manager *pm)
+{
+	uint32_t *buffer, size;
+	int r = 0;
+
+	if (!pm->priv_queue)
+		return -EAGAIN;
+
+	size = pm->pmf->query_status_size;
+	mutex_lock(&pm->lock);
+	kq_acquire_packet_buffer(pm->priv_queue,
+			size / sizeof(uint32_t), (unsigned int **)&buffer);
+	if (!buffer) {
+		pr_err("Failed to allocate buffer on kernel queue\n");
+		r = -ENOMEM;
+		goto out;
+	}
+	memset(buffer, 0x55, size);
+	kq_submit_packet(pm->priv_queue);
+
+	pr_info("Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.",
+		buffer[0], buffer[1], buffer[2], buffer[3],
+		buffer[4], buffer[5], buffer[6]);
+out:
+	mutex_unlock(&pm->lock);
+	return r;
+}
+
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
new file mode 100644
index 000000000..1a03173e2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
@@ -0,0 +1,437 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2016-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "kfd_kernel_queue.h"
+#include "kfd_device_queue_manager.h"
+#include "kfd_pm4_headers_ai.h"
+#include "kfd_pm4_headers_aldebaran.h"
+#include "kfd_pm4_opcodes.h"
+#include "gc/gc_10_1_0_sh_mask.h"
+
+static int pm_map_process_v9(struct packet_manager *pm,
+		uint32_t *buffer, struct qcm_process_device *qpd)
+{
+	struct pm4_mes_map_process *packet;
+	uint64_t vm_page_table_base_addr = qpd->page_table_base;
+	struct kfd_node *kfd = pm->dqm->dev;
+	struct kfd_process_device *pdd =
+			container_of(qpd, struct kfd_process_device, qpd);
+
+	packet = (struct pm4_mes_map_process *)buffer;
+	memset(buffer, 0, sizeof(struct pm4_mes_map_process));
+	packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
+					sizeof(struct pm4_mes_map_process));
+	packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
+	packet->bitfields2.process_quantum = 10;
+	packet->bitfields2.pasid = qpd->pqm->process->pasid;
+	packet->bitfields14.gds_size = qpd->gds_size & 0x3F;
+	packet->bitfields14.gds_size_hi = (qpd->gds_size >> 6) & 0xF;
+	packet->bitfields14.num_gws = (qpd->mapped_gws_queue) ? qpd->num_gws : 0;
+	packet->bitfields14.num_oac = qpd->num_oac;
+	packet->bitfields14.sdma_enable = 1;
+	packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
+
+	if (kfd->dqm->trap_debug_vmid && pdd->process->debug_trap_enabled &&
+			pdd->process->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED) {
+		packet->bitfields2.debug_vmid = kfd->dqm->trap_debug_vmid;
+		packet->bitfields2.new_debug = 1;
+	}
+
+	packet->sh_mem_config = qpd->sh_mem_config;
+	packet->sh_mem_bases = qpd->sh_mem_bases;
+	if (qpd->tba_addr) {
+		packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
+		/* On GFX9, unlike GFX10, bit TRAP_EN of SQ_SHADER_TBA_HI is
+		 * not defined, so setting it won't do any harm.
+		 */
+		packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8)
+				| 1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT;
+
+		packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
+		packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
+	}
+
+	packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
+	packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
+
+	packet->vm_context_page_table_base_addr_lo32 =
+			lower_32_bits(vm_page_table_base_addr);
+	packet->vm_context_page_table_base_addr_hi32 =
+			upper_32_bits(vm_page_table_base_addr);
+
+	return 0;
+}
+
+static int pm_map_process_aldebaran(struct packet_manager *pm,
+		uint32_t *buffer, struct qcm_process_device *qpd)
+{
+	struct pm4_mes_map_process_aldebaran *packet;
+	uint64_t vm_page_table_base_addr = qpd->page_table_base;
+	struct kfd_dev *kfd = pm->dqm->dev->kfd;
+	struct kfd_process_device *pdd =
+			container_of(qpd, struct kfd_process_device, qpd);
+	int i;
+
+	packet = (struct pm4_mes_map_process_aldebaran *)buffer;
+	memset(buffer, 0, sizeof(struct pm4_mes_map_process_aldebaran));
+	packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
+			sizeof(struct pm4_mes_map_process_aldebaran));
+	packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
+	packet->bitfields2.process_quantum = 10;
+	packet->bitfields2.pasid = qpd->pqm->process->pasid;
+	packet->bitfields14.gds_size = qpd->gds_size & 0x3F;
+	packet->bitfields14.gds_size_hi = (qpd->gds_size >> 6) & 0xF;
+	packet->bitfields14.num_gws = (qpd->mapped_gws_queue) ? qpd->num_gws : 0;
+	packet->bitfields14.num_oac = qpd->num_oac;
+	packet->bitfields14.sdma_enable = 1;
+	packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
+	packet->spi_gdbg_per_vmid_cntl = pdd->spi_dbg_override |
+						pdd->spi_dbg_launch_mode;
+
+	if (pdd->process->debug_trap_enabled) {
+		for (i = 0; i < kfd->device_info.num_of_watch_points; i++)
+			packet->tcp_watch_cntl[i] = pdd->watch_points[i];
+
+		packet->bitfields2.single_memops =
+				!!(pdd->process->dbg_flags & KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP);
+	}
+
+	packet->sh_mem_config = qpd->sh_mem_config;
+	packet->sh_mem_bases = qpd->sh_mem_bases;
+	if (qpd->tba_addr) {
+		packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
+		packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8);
+		packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
+		packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
+	}
+
+	packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
+	packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
+
+	packet->vm_context_page_table_base_addr_lo32 =
+			lower_32_bits(vm_page_table_base_addr);
+	packet->vm_context_page_table_base_addr_hi32 =
+			upper_32_bits(vm_page_table_base_addr);
+
+	return 0;
+}
+
+static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer,
+			uint64_t ib, size_t ib_size_in_dwords, bool chain)
+{
+	struct pm4_mes_runlist *packet;
+
+	int concurrent_proc_cnt = 0;
+	struct kfd_node *kfd = pm->dqm->dev;
+
+	/* Determine the number of processes to map together to HW:
+	 * it can not exceed the number of VMIDs available to the
+	 * scheduler, and it is determined by the smaller of the number
+	 * of processes in the runlist and kfd module parameter
+	 * hws_max_conc_proc.
+	 * Note: the arbitration between the number of VMIDs and
+	 * hws_max_conc_proc has been done in
+	 * kgd2kfd_device_init().
+	 */
+	concurrent_proc_cnt = min(pm->dqm->processes_count,
+			kfd->max_proc_per_quantum);
+
+	packet = (struct pm4_mes_runlist *)buffer;
+
+	memset(buffer, 0, sizeof(struct pm4_mes_runlist));
+	packet->header.u32All = pm_build_pm4_header(IT_RUN_LIST,
+						sizeof(struct pm4_mes_runlist));
+
+	packet->bitfields4.ib_size = ib_size_in_dwords;
+	packet->bitfields4.chain = chain ? 1 : 0;
+	packet->bitfields4.offload_polling = 0;
+	packet->bitfields4.chained_runlist_idle_disable = chain ? 1 : 0;
+	packet->bitfields4.valid = 1;
+	packet->bitfields4.process_cnt = concurrent_proc_cnt;
+	packet->ordinal2 = lower_32_bits(ib);
+	packet->ib_base_hi = upper_32_bits(ib);
+
+	return 0;
+}
+
+static int pm_set_resources_v9(struct packet_manager *pm, uint32_t *buffer,
+				struct scheduling_resources *res)
+{
+	struct pm4_mes_set_resources *packet;
+
+	packet = (struct pm4_mes_set_resources *)buffer;
+	memset(buffer, 0, sizeof(struct pm4_mes_set_resources));
+
+	packet->header.u32All = pm_build_pm4_header(IT_SET_RESOURCES,
+					sizeof(struct pm4_mes_set_resources));
+
+	packet->bitfields2.queue_type =
+			queue_type__mes_set_resources__hsa_interface_queue_hiq;
+	packet->bitfields2.vmid_mask = res->vmid_mask;
+	packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100;
+	packet->bitfields7.oac_mask = res->oac_mask;
+	packet->bitfields8.gds_heap_base = res->gds_heap_base;
+	packet->bitfields8.gds_heap_size = res->gds_heap_size;
+
+	packet->gws_mask_lo = lower_32_bits(res->gws_mask);
+	packet->gws_mask_hi = upper_32_bits(res->gws_mask);
+
+	packet->queue_mask_lo = lower_32_bits(res->queue_mask);
+	packet->queue_mask_hi = upper_32_bits(res->queue_mask);
+
+	return 0;
+}
+
+static inline bool pm_use_ext_eng(struct kfd_dev *dev)
+{
+	return dev->adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 2, 0);
+}
+
+static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
+		struct queue *q, bool is_static)
+{
+	struct pm4_mes_map_queues *packet;
+	bool use_static = is_static;
+
+	packet = (struct pm4_mes_map_queues *)buffer;
+	memset(buffer, 0, sizeof(struct pm4_mes_map_queues));
+
+	packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
+					sizeof(struct pm4_mes_map_queues));
+	packet->bitfields2.num_queues = 1;
+	packet->bitfields2.queue_sel =
+		queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
+
+	packet->bitfields2.engine_sel =
+		engine_sel__mes_map_queues__compute_vi;
+	packet->bitfields2.gws_control_queue = q->gws ? 1 : 0;
+	packet->bitfields2.extended_engine_sel =
+		extended_engine_sel__mes_map_queues__legacy_engine_sel;
+	packet->bitfields2.queue_type =
+		queue_type__mes_map_queues__normal_compute_vi;
+
+	switch (q->properties.type) {
+	case KFD_QUEUE_TYPE_COMPUTE:
+		if (use_static)
+			packet->bitfields2.queue_type =
+		queue_type__mes_map_queues__normal_latency_static_queue_vi;
+		break;
+	case KFD_QUEUE_TYPE_DIQ:
+		packet->bitfields2.queue_type =
+			queue_type__mes_map_queues__debug_interface_queue_vi;
+		break;
+	case KFD_QUEUE_TYPE_SDMA:
+	case KFD_QUEUE_TYPE_SDMA_XGMI:
+		use_static = false; /* no static queues under SDMA */
+		if (q->properties.sdma_engine_id < 2 &&
+		    !pm_use_ext_eng(q->device->kfd))
+			packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
+				engine_sel__mes_map_queues__sdma0_vi;
+		else {
+			/*
+			 * For GFX9.4.3, SDMA engine id can be greater than 8.
+			 * For such cases, set extended_engine_sel to 2 and
+			 * ensure engine_sel lies between 0-7.
+			 */
+			if (q->properties.sdma_engine_id >= 8)
+				packet->bitfields2.extended_engine_sel =
+					extended_engine_sel__mes_map_queues__sdma8_to_15_sel;
+			else
+				packet->bitfields2.extended_engine_sel =
+					extended_engine_sel__mes_map_queues__sdma0_to_7_sel;
+
+			packet->bitfields2.engine_sel = q->properties.sdma_engine_id % 8;
+		}
+		break;
+	default:
+		WARN(1, "queue type %d", q->properties.type);
+		return -EINVAL;
+	}
+	packet->bitfields3.doorbell_offset =
+			q->properties.doorbell_off;
+
+	packet->mqd_addr_lo =
+			lower_32_bits(q->gart_mqd_addr);
+
+	packet->mqd_addr_hi =
+			upper_32_bits(q->gart_mqd_addr);
+
+	packet->wptr_addr_lo =
+			lower_32_bits((uint64_t)q->properties.write_ptr);
+
+	packet->wptr_addr_hi =
+			upper_32_bits((uint64_t)q->properties.write_ptr);
+
+	return 0;
+}
+
+static int pm_set_grace_period_v9(struct packet_manager *pm,
+		uint32_t *buffer,
+		uint32_t grace_period)
+{
+	struct pm4_mec_write_data_mmio *packet;
+	uint32_t reg_offset = 0;
+	uint32_t reg_data = 0;
+
+	pm->dqm->dev->kfd2kgd->build_grace_period_packet_info(
+			pm->dqm->dev->adev,
+			pm->dqm->wait_times,
+			grace_period,
+			&reg_offset,
+			&reg_data);
+
+	if (grace_period == USE_DEFAULT_GRACE_PERIOD)
+		reg_data = pm->dqm->wait_times;
+
+	packet = (struct pm4_mec_write_data_mmio *)buffer;
+	memset(buffer, 0, sizeof(struct pm4_mec_write_data_mmio));
+
+	packet->header.u32All = pm_build_pm4_header(IT_WRITE_DATA,
+					sizeof(struct pm4_mec_write_data_mmio));
+
+	packet->bitfields2.dst_sel  = dst_sel___write_data__mem_mapped_register;
+	packet->bitfields2.addr_incr =
+			addr_incr___write_data__do_not_increment_address;
+
+	packet->bitfields3.dst_mmreg_addr = reg_offset;
+
+	packet->data = reg_data;
+
+	return 0;
+}
+
+static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer,
+			enum kfd_unmap_queues_filter filter,
+			uint32_t filter_param, bool reset)
+{
+	struct pm4_mes_unmap_queues *packet;
+
+	packet = (struct pm4_mes_unmap_queues *)buffer;
+	memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues));
+
+	packet->header.u32All = pm_build_pm4_header(IT_UNMAP_QUEUES,
+					sizeof(struct pm4_mes_unmap_queues));
+
+	packet->bitfields2.extended_engine_sel =
+				pm_use_ext_eng(pm->dqm->dev->kfd) ?
+		extended_engine_sel__mes_unmap_queues__sdma0_to_7_sel :
+		extended_engine_sel__mes_unmap_queues__legacy_engine_sel;
+
+	packet->bitfields2.engine_sel =
+		engine_sel__mes_unmap_queues__compute;
+
+	if (reset)
+		packet->bitfields2.action =
+			action__mes_unmap_queues__reset_queues;
+	else
+		packet->bitfields2.action =
+			action__mes_unmap_queues__preempt_queues;
+
+	switch (filter) {
+	case KFD_UNMAP_QUEUES_FILTER_BY_PASID:
+		packet->bitfields2.queue_sel =
+			queue_sel__mes_unmap_queues__perform_request_on_pasid_queues;
+		packet->bitfields3a.pasid = filter_param;
+		break;
+	case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES:
+		packet->bitfields2.queue_sel =
+			queue_sel__mes_unmap_queues__unmap_all_queues;
+		break;
+	case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES:
+		/* in this case, we do not preempt static queues */
+		packet->bitfields2.queue_sel =
+			queue_sel__mes_unmap_queues__unmap_all_non_static_queues;
+		break;
+	default:
+		WARN(1, "filter %d", filter);
+		return -EINVAL;
+	}
+
+	return 0;
+
+}
+
+static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
+			uint64_t fence_address,	uint64_t fence_value)
+{
+	struct pm4_mes_query_status *packet;
+
+	packet = (struct pm4_mes_query_status *)buffer;
+	memset(buffer, 0, sizeof(struct pm4_mes_query_status));
+
+
+	packet->header.u32All = pm_build_pm4_header(IT_QUERY_STATUS,
+					sizeof(struct pm4_mes_query_status));
+
+	packet->bitfields2.context_id = 0;
+	packet->bitfields2.interrupt_sel =
+			interrupt_sel__mes_query_status__completion_status;
+	packet->bitfields2.command =
+			command__mes_query_status__fence_only_after_write_ack;
+
+	packet->addr_hi = upper_32_bits((uint64_t)fence_address);
+	packet->addr_lo = lower_32_bits((uint64_t)fence_address);
+	packet->data_hi = upper_32_bits((uint64_t)fence_value);
+	packet->data_lo = lower_32_bits((uint64_t)fence_value);
+
+	return 0;
+}
+
+const struct packet_manager_funcs kfd_v9_pm_funcs = {
+	.map_process		= pm_map_process_v9,
+	.runlist		= pm_runlist_v9,
+	.set_resources		= pm_set_resources_v9,
+	.map_queues		= pm_map_queues_v9,
+	.unmap_queues		= pm_unmap_queues_v9,
+	.set_grace_period       = pm_set_grace_period_v9,
+	.query_status		= pm_query_status_v9,
+	.release_mem		= NULL,
+	.map_process_size	= sizeof(struct pm4_mes_map_process),
+	.runlist_size		= sizeof(struct pm4_mes_runlist),
+	.set_resources_size	= sizeof(struct pm4_mes_set_resources),
+	.map_queues_size	= sizeof(struct pm4_mes_map_queues),
+	.unmap_queues_size	= sizeof(struct pm4_mes_unmap_queues),
+	.set_grace_period_size  = sizeof(struct pm4_mec_write_data_mmio),
+	.query_status_size	= sizeof(struct pm4_mes_query_status),
+	.release_mem_size	= 0,
+};
+
+const struct packet_manager_funcs kfd_aldebaran_pm_funcs = {
+	.map_process		= pm_map_process_aldebaran,
+	.runlist		= pm_runlist_v9,
+	.set_resources		= pm_set_resources_v9,
+	.map_queues		= pm_map_queues_v9,
+	.unmap_queues		= pm_unmap_queues_v9,
+	.set_grace_period       = pm_set_grace_period_v9,
+	.query_status		= pm_query_status_v9,
+	.release_mem		= NULL,
+	.map_process_size	= sizeof(struct pm4_mes_map_process_aldebaran),
+	.runlist_size		= sizeof(struct pm4_mes_runlist),
+	.set_resources_size	= sizeof(struct pm4_mes_set_resources),
+	.map_queues_size	= sizeof(struct pm4_mes_map_queues),
+	.unmap_queues_size	= sizeof(struct pm4_mes_unmap_queues),
+	.set_grace_period_size  = sizeof(struct pm4_mec_write_data_mmio),
+	.query_status_size	= sizeof(struct pm4_mes_query_status),
+	.release_mem_size	= 0,
+};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
new file mode 100644
index 000000000..c1199d06d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
@@ -0,0 +1,317 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "kfd_kernel_queue.h"
+#include "kfd_device_queue_manager.h"
+#include "kfd_pm4_headers_vi.h"
+#include "kfd_pm4_opcodes.h"
+
+unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size)
+{
+	union PM4_MES_TYPE_3_HEADER header;
+
+	header.u32All = 0;
+	header.opcode = opcode;
+	header.count = packet_size / 4 - 2;
+	header.type = PM4_TYPE_3;
+
+	return header.u32All;
+}
+
+static int pm_map_process_vi(struct packet_manager *pm, uint32_t *buffer,
+				struct qcm_process_device *qpd)
+{
+	struct pm4_mes_map_process *packet;
+
+	packet = (struct pm4_mes_map_process *)buffer;
+
+	memset(buffer, 0, sizeof(struct pm4_mes_map_process));
+
+	packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
+					sizeof(struct pm4_mes_map_process));
+	packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
+	packet->bitfields2.process_quantum = 10;
+	packet->bitfields2.pasid = qpd->pqm->process->pasid;
+	packet->bitfields3.page_table_base = qpd->page_table_base;
+	packet->bitfields10.gds_size = qpd->gds_size;
+	packet->bitfields10.num_gws = qpd->num_gws;
+	packet->bitfields10.num_oac = qpd->num_oac;
+	packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
+
+	packet->sh_mem_config = qpd->sh_mem_config;
+	packet->sh_mem_bases = qpd->sh_mem_bases;
+	packet->sh_mem_ape1_base = qpd->sh_mem_ape1_base;
+	packet->sh_mem_ape1_limit = qpd->sh_mem_ape1_limit;
+
+	packet->sh_hidden_private_base_vmid = qpd->sh_hidden_private_base;
+
+	packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
+	packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
+
+	return 0;
+}
+
+static int pm_runlist_vi(struct packet_manager *pm, uint32_t *buffer,
+			uint64_t ib, size_t ib_size_in_dwords, bool chain)
+{
+	struct pm4_mes_runlist *packet;
+	int concurrent_proc_cnt = 0;
+	struct kfd_node *kfd = pm->dqm->dev;
+
+	if (WARN_ON(!ib))
+		return -EFAULT;
+
+	/* Determine the number of processes to map together to HW:
+	 * it can not exceed the number of VMIDs available to the
+	 * scheduler, and it is determined by the smaller of the number
+	 * of processes in the runlist and kfd module parameter
+	 * hws_max_conc_proc.
+	 * Note: the arbitration between the number of VMIDs and
+	 * hws_max_conc_proc has been done in
+	 * kgd2kfd_device_init().
+	 */
+	concurrent_proc_cnt = min(pm->dqm->processes_count,
+			kfd->max_proc_per_quantum);
+
+	packet = (struct pm4_mes_runlist *)buffer;
+
+	memset(buffer, 0, sizeof(struct pm4_mes_runlist));
+	packet->header.u32All = pm_build_pm4_header(IT_RUN_LIST,
+						sizeof(struct pm4_mes_runlist));
+
+	packet->bitfields4.ib_size = ib_size_in_dwords;
+	packet->bitfields4.chain = chain ? 1 : 0;
+	packet->bitfields4.offload_polling = 0;
+	packet->bitfields4.valid = 1;
+	packet->bitfields4.process_cnt = concurrent_proc_cnt;
+	packet->ordinal2 = lower_32_bits(ib);
+	packet->bitfields3.ib_base_hi = upper_32_bits(ib);
+
+	return 0;
+}
+
+static int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer,
+			       struct scheduling_resources *res)
+{
+	struct pm4_mes_set_resources *packet;
+
+	packet = (struct pm4_mes_set_resources *)buffer;
+	memset(buffer, 0, sizeof(struct pm4_mes_set_resources));
+
+	packet->header.u32All = pm_build_pm4_header(IT_SET_RESOURCES,
+					sizeof(struct pm4_mes_set_resources));
+
+	packet->bitfields2.queue_type =
+			queue_type__mes_set_resources__hsa_interface_queue_hiq;
+	packet->bitfields2.vmid_mask = res->vmid_mask;
+	packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100;
+	packet->bitfields7.oac_mask = res->oac_mask;
+	packet->bitfields8.gds_heap_base = res->gds_heap_base;
+	packet->bitfields8.gds_heap_size = res->gds_heap_size;
+
+	packet->gws_mask_lo = lower_32_bits(res->gws_mask);
+	packet->gws_mask_hi = upper_32_bits(res->gws_mask);
+
+	packet->queue_mask_lo = lower_32_bits(res->queue_mask);
+	packet->queue_mask_hi = upper_32_bits(res->queue_mask);
+
+	return 0;
+}
+
+static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer,
+		struct queue *q, bool is_static)
+{
+	struct pm4_mes_map_queues *packet;
+	bool use_static = is_static;
+
+	packet = (struct pm4_mes_map_queues *)buffer;
+	memset(buffer, 0, sizeof(struct pm4_mes_map_queues));
+
+	packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
+					sizeof(struct pm4_mes_map_queues));
+	packet->bitfields2.num_queues = 1;
+	packet->bitfields2.queue_sel =
+		queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
+
+	packet->bitfields2.engine_sel =
+		engine_sel__mes_map_queues__compute_vi;
+	packet->bitfields2.queue_type =
+		queue_type__mes_map_queues__normal_compute_vi;
+
+	switch (q->properties.type) {
+	case KFD_QUEUE_TYPE_COMPUTE:
+		if (use_static)
+			packet->bitfields2.queue_type =
+		queue_type__mes_map_queues__normal_latency_static_queue_vi;
+		break;
+	case KFD_QUEUE_TYPE_DIQ:
+		packet->bitfields2.queue_type =
+			queue_type__mes_map_queues__debug_interface_queue_vi;
+		break;
+	case KFD_QUEUE_TYPE_SDMA:
+	case KFD_QUEUE_TYPE_SDMA_XGMI:
+		packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
+				engine_sel__mes_map_queues__sdma0_vi;
+		use_static = false; /* no static queues under SDMA */
+		break;
+	default:
+		WARN(1, "queue type %d", q->properties.type);
+		return -EINVAL;
+	}
+	packet->bitfields3.doorbell_offset =
+			q->properties.doorbell_off;
+
+	packet->mqd_addr_lo =
+			lower_32_bits(q->gart_mqd_addr);
+
+	packet->mqd_addr_hi =
+			upper_32_bits(q->gart_mqd_addr);
+
+	packet->wptr_addr_lo =
+			lower_32_bits((uint64_t)q->properties.write_ptr);
+
+	packet->wptr_addr_hi =
+			upper_32_bits((uint64_t)q->properties.write_ptr);
+
+	return 0;
+}
+
+static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer,
+			enum kfd_unmap_queues_filter filter,
+			uint32_t filter_param, bool reset)
+{
+	struct pm4_mes_unmap_queues *packet;
+
+	packet = (struct pm4_mes_unmap_queues *)buffer;
+	memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues));
+
+	packet->header.u32All = pm_build_pm4_header(IT_UNMAP_QUEUES,
+					sizeof(struct pm4_mes_unmap_queues));
+
+	packet->bitfields2.engine_sel =
+			engine_sel__mes_unmap_queues__compute;
+
+	if (reset)
+		packet->bitfields2.action =
+			action__mes_unmap_queues__reset_queues;
+	else
+		packet->bitfields2.action =
+			action__mes_unmap_queues__preempt_queues;
+
+	switch (filter) {
+	case KFD_UNMAP_QUEUES_FILTER_BY_PASID:
+		packet->bitfields2.queue_sel =
+			queue_sel__mes_unmap_queues__perform_request_on_pasid_queues;
+		packet->bitfields3a.pasid = filter_param;
+		break;
+	case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES:
+		packet->bitfields2.queue_sel =
+			queue_sel__mes_unmap_queues__unmap_all_queues;
+		break;
+	case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES:
+		/* in this case, we do not preempt static queues */
+		packet->bitfields2.queue_sel =
+			queue_sel__mes_unmap_queues__unmap_all_non_static_queues;
+		break;
+	default:
+		WARN(1, "filter %d", filter);
+		return -EINVAL;
+	}
+
+	return 0;
+
+}
+
+static int pm_query_status_vi(struct packet_manager *pm, uint32_t *buffer,
+			uint64_t fence_address,	uint64_t fence_value)
+{
+	struct pm4_mes_query_status *packet;
+
+	packet = (struct pm4_mes_query_status *)buffer;
+	memset(buffer, 0, sizeof(struct pm4_mes_query_status));
+
+	packet->header.u32All = pm_build_pm4_header(IT_QUERY_STATUS,
+					sizeof(struct pm4_mes_query_status));
+
+	packet->bitfields2.context_id = 0;
+	packet->bitfields2.interrupt_sel =
+			interrupt_sel__mes_query_status__completion_status;
+	packet->bitfields2.command =
+			command__mes_query_status__fence_only_after_write_ack;
+
+	packet->addr_hi = upper_32_bits((uint64_t)fence_address);
+	packet->addr_lo = lower_32_bits((uint64_t)fence_address);
+	packet->data_hi = upper_32_bits((uint64_t)fence_value);
+	packet->data_lo = lower_32_bits((uint64_t)fence_value);
+
+	return 0;
+}
+
+static int pm_release_mem_vi(uint64_t gpu_addr, uint32_t *buffer)
+{
+	struct pm4_mec_release_mem *packet;
+
+	packet = (struct pm4_mec_release_mem *)buffer;
+	memset(buffer, 0, sizeof(*packet));
+
+	packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
+						 sizeof(*packet));
+
+	packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
+	packet->bitfields2.event_index = event_index___release_mem__end_of_pipe;
+	packet->bitfields2.tcl1_action_ena = 1;
+	packet->bitfields2.tc_action_ena = 1;
+	packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
+	packet->bitfields2.atc = 0;
+
+	packet->bitfields3.data_sel = data_sel___release_mem__send_32_bit_low;
+	packet->bitfields3.int_sel =
+		int_sel___release_mem__send_interrupt_after_write_confirm;
+
+	packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
+	packet->address_hi = upper_32_bits(gpu_addr);
+
+	packet->data_lo = 0;
+
+	return 0;
+}
+
+const struct packet_manager_funcs kfd_vi_pm_funcs = {
+	.map_process		= pm_map_process_vi,
+	.runlist		= pm_runlist_vi,
+	.set_resources		= pm_set_resources_vi,
+	.map_queues		= pm_map_queues_vi,
+	.unmap_queues		= pm_unmap_queues_vi,
+	.set_grace_period	= NULL,
+	.query_status		= pm_query_status_vi,
+	.release_mem		= pm_release_mem_vi,
+	.map_process_size	= sizeof(struct pm4_mes_map_process),
+	.runlist_size		= sizeof(struct pm4_mes_runlist),
+	.set_resources_size	= sizeof(struct pm4_mes_set_resources),
+	.map_queues_size	= sizeof(struct pm4_mes_map_queues),
+	.unmap_queues_size	= sizeof(struct pm4_mes_unmap_queues),
+	.set_grace_period_size	= 0,
+	.query_status_size	= sizeof(struct pm4_mes_query_status),
+	.release_mem_size	= sizeof(struct pm4_mec_release_mem)
+};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
new file mode 100644
index 000000000..e3b250918
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/types.h>
+#include "kfd_priv.h"
+#include "amdgpu_ids.h"
+
+static unsigned int pasid_bits = 16;
+static bool pasids_allocated; /* = false */
+
+bool kfd_set_pasid_limit(unsigned int new_limit)
+{
+	if (new_limit < 2)
+		return false;
+
+	if (new_limit < (1U << pasid_bits)) {
+		if (pasids_allocated)
+			/* We've already allocated user PASIDs, too late to
+			 * change the limit
+			 */
+			return false;
+
+		while (new_limit < (1U << pasid_bits))
+			pasid_bits--;
+	}
+
+	return true;
+}
+
+unsigned int kfd_get_pasid_limit(void)
+{
+	return 1U << pasid_bits;
+}
+
+u32 kfd_pasid_alloc(void)
+{
+	int r = amdgpu_pasid_alloc(pasid_bits);
+
+	if (r > 0) {
+		pasids_allocated = true;
+		return r;
+	}
+
+	return 0;
+}
+
+void kfd_pasid_free(u32 pasid)
+{
+	amdgpu_pasid_free(pasid);
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h
new file mode 100644
index 000000000..7274edfd3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h
@@ -0,0 +1,156 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef KFD_PM4_HEADERS_H_
+#define KFD_PM4_HEADERS_H_
+
+#ifndef PM4_MES_HEADER_DEFINED
+#define PM4_MES_HEADER_DEFINED
+union PM4_MES_TYPE_3_HEADER {
+	struct {
+		/* reserved */
+		uint32_t reserved1:8;
+		/* IT opcode */
+		uint32_t opcode:8;
+		/* number of DWORDs - 1 in the information body */
+		uint32_t count:14;
+		/* packet identifier. It should be 3 for type 3 packets */
+		uint32_t type:2;
+	};
+	uint32_t u32all;
+};
+#endif /* PM4_MES_HEADER_DEFINED */
+
+
+/*--------------------MES_MAP_PROCESS-------------------- */
+
+#ifndef PM4_MES_MAP_PROCESS_DEFINED
+#define PM4_MES_MAP_PROCESS_DEFINED
+
+struct pm4_map_process {
+	union {
+		union PM4_MES_TYPE_3_HEADER header;	/* header */
+		uint32_t ordinal1;
+	};
+
+	union {
+		struct {
+			uint32_t pasid:16;
+			uint32_t reserved1:8;
+			uint32_t diq_enable:1;
+			uint32_t process_quantum:7;
+		} bitfields2;
+		uint32_t ordinal2;
+	};
+
+	union {
+		struct {
+			uint32_t page_table_base:28;
+			uint32_t reserved3:4;
+		} bitfields3;
+		uint32_t ordinal3;
+	};
+
+	uint32_t sh_mem_bases;
+	uint32_t sh_mem_ape1_base;
+	uint32_t sh_mem_ape1_limit;
+	uint32_t sh_mem_config;
+	uint32_t gds_addr_lo;
+	uint32_t gds_addr_hi;
+
+	union {
+		struct {
+			uint32_t num_gws:6;
+			uint32_t reserved4:2;
+			uint32_t num_oac:4;
+			uint32_t reserved5:4;
+			uint32_t gds_size:6;
+			uint32_t num_queues:10;
+		} bitfields10;
+		uint32_t ordinal10;
+	};
+
+};
+#endif
+
+#ifndef PM4_MES_MAP_PROCESS_DEFINED_KV_SCRATCH
+#define PM4_MES_MAP_PROCESS_DEFINED_KV_SCRATCH
+
+struct pm4_map_process_scratch_kv {
+	union {
+		union PM4_MES_TYPE_3_HEADER   header; /* header */
+		uint32_t            ordinal1;
+	};
+
+	union {
+		struct {
+			uint32_t pasid:16;
+			uint32_t reserved1:8;
+			uint32_t diq_enable:1;
+			uint32_t process_quantum:7;
+		} bitfields2;
+		uint32_t ordinal2;
+	};
+
+	union {
+		struct {
+			uint32_t page_table_base:28;
+			uint32_t reserved2:4;
+		} bitfields3;
+		uint32_t ordinal3;
+	};
+
+	uint32_t reserved3;
+	uint32_t sh_mem_bases;
+	uint32_t sh_mem_config;
+	uint32_t sh_mem_ape1_base;
+	uint32_t sh_mem_ape1_limit;
+	uint32_t sh_hidden_private_base_vmid;
+	uint32_t reserved4;
+	uint32_t reserved5;
+	uint32_t gds_addr_lo;
+	uint32_t gds_addr_hi;
+
+	union {
+		struct {
+			uint32_t num_gws:6;
+			uint32_t reserved6:2;
+			uint32_t num_oac:4;
+			uint32_t reserved7:4;
+			uint32_t gds_size:6;
+			uint32_t num_queues:10;
+		} bitfields14;
+		uint32_t ordinal14;
+	};
+
+	uint32_t completion_signal_lo32;
+uint32_t completion_signal_hi32;
+};
+#endif
+
+enum {
+	CACHE_FLUSH_AND_INV_TS_EVENT = 0x00000014
+};
+
+#endif /* KFD_PM4_HEADERS_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
new file mode 100644
index 000000000..8b6b2bd5c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
@@ -0,0 +1,659 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright 2016-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef F32_MES_PM4_PACKETS_H
+#define F32_MES_PM4_PACKETS_H
+
+#ifndef PM4_MES_HEADER_DEFINED
+#define PM4_MES_HEADER_DEFINED
+union PM4_MES_TYPE_3_HEADER {
+	struct {
+		uint32_t reserved1 : 8; /* < reserved */
+		uint32_t opcode    : 8; /* < IT opcode */
+		uint32_t count     : 14;/* < number of DWORDs - 1 in the
+					 *   information body.
+					 */
+		uint32_t type      : 2; /* < packet identifier.
+					 *   It should be 3 for type 3 packets
+					 */
+	};
+	uint32_t u32All;
+};
+#endif /* PM4_MES_HEADER_DEFINED */
+
+/*--------------------MES_SET_RESOURCES--------------------*/
+
+#ifndef PM4_MES_SET_RESOURCES_DEFINED
+#define PM4_MES_SET_RESOURCES_DEFINED
+enum mes_set_resources_queue_type_enum {
+	queue_type__mes_set_resources__kernel_interface_queue_kiq = 0,
+	queue_type__mes_set_resources__hsa_interface_queue_hiq = 1,
+	queue_type__mes_set_resources__hsa_debug_interface_queue = 4
+};
+
+
+struct pm4_mes_set_resources {
+	union {
+		union PM4_MES_TYPE_3_HEADER	header;		/* header */
+		uint32_t			ordinal1;
+	};
+
+	union {
+		struct {
+			uint32_t vmid_mask:16;
+			uint32_t unmap_latency:8;
+			uint32_t reserved1:5;
+			enum mes_set_resources_queue_type_enum queue_type:3;
+		} bitfields2;
+		uint32_t ordinal2;
+	};
+
+	uint32_t queue_mask_lo;
+	uint32_t queue_mask_hi;
+	uint32_t gws_mask_lo;
+	uint32_t gws_mask_hi;
+
+	union {
+		struct {
+			uint32_t oac_mask:16;
+			uint32_t reserved2:16;
+		} bitfields7;
+		uint32_t ordinal7;
+	};
+
+	union {
+		struct {
+		uint32_t gds_heap_base:10;
+		uint32_t reserved3:1;
+		uint32_t gds_heap_size:10;
+		uint32_t reserved4:11;
+		} bitfields8;
+		uint32_t ordinal8;
+	};
+
+};
+#endif
+
+/*--------------------MES_RUN_LIST--------------------*/
+
+#ifndef PM4_MES_RUN_LIST_DEFINED
+#define PM4_MES_RUN_LIST_DEFINED
+
+struct pm4_mes_runlist {
+	union {
+		union PM4_MES_TYPE_3_HEADER header; /* header */
+		uint32_t ordinal1;
+	};
+
+	union {
+		struct {
+			uint32_t reserved1:2;
+			uint32_t ib_base_lo:30;
+		} bitfields2;
+		uint32_t ordinal2;
+	};
+
+	uint32_t ib_base_hi;
+
+	union {
+		struct {
+			uint32_t ib_size:20;
+			uint32_t chain:1;
+			uint32_t offload_polling:1;
+			uint32_t chained_runlist_idle_disable:1;
+			uint32_t valid:1;
+			uint32_t process_cnt:4;
+			uint32_t reserved3:4;
+		} bitfields4;
+		uint32_t ordinal4;
+	};
+
+};
+#endif
+
+/*--------------------MES_MAP_PROCESS--------------------*/
+
+#ifndef PM4_MES_MAP_PROCESS_DEFINED
+#define PM4_MES_MAP_PROCESS_DEFINED
+
+struct pm4_mes_map_process {
+	union {
+		union PM4_MES_TYPE_3_HEADER header;	/* header */
+		uint32_t ordinal1;
+	};
+
+	union {
+		struct {
+			uint32_t pasid:16;
+			uint32_t reserved1:2;
+			uint32_t debug_vmid:4;
+			uint32_t new_debug:1;
+			uint32_t reserved2:1;
+			uint32_t diq_enable:1;
+			uint32_t process_quantum:7;
+		} bitfields2;
+		uint32_t ordinal2;
+	};
+
+	uint32_t vm_context_page_table_base_addr_lo32;
+
+	uint32_t vm_context_page_table_base_addr_hi32;
+
+	uint32_t sh_mem_bases;
+
+	uint32_t sh_mem_config;
+
+	uint32_t sq_shader_tba_lo;
+
+	uint32_t sq_shader_tba_hi;
+
+	uint32_t sq_shader_tma_lo;
+
+	uint32_t sq_shader_tma_hi;
+
+	uint32_t reserved6;
+
+	uint32_t gds_addr_lo;
+
+	uint32_t gds_addr_hi;
+
+	union {
+		struct {
+			uint32_t num_gws:7;
+			uint32_t sdma_enable:1;
+			uint32_t num_oac:4;
+			uint32_t gds_size_hi:4;
+			uint32_t gds_size:6;
+			uint32_t num_queues:10;
+		} bitfields14;
+		uint32_t ordinal14;
+	};
+
+	uint32_t completion_signal_lo;
+
+	uint32_t completion_signal_hi;
+
+};
+
+#endif
+
+/*--------------------MES_MAP_PROCESS_VM--------------------*/
+
+#ifndef PM4_MES_MAP_PROCESS_VM_DEFINED
+#define PM4_MES_MAP_PROCESS_VM_DEFINED
+
+struct PM4_MES_MAP_PROCESS_VM {
+	union {
+		union PM4_MES_TYPE_3_HEADER header;	/* header */
+		uint32_t ordinal1;
+	};
+
+	uint32_t reserved1;
+
+	uint32_t vm_context_cntl;
+
+	uint32_t reserved2;
+
+	uint32_t vm_context_page_table_end_addr_lo32;
+
+	uint32_t vm_context_page_table_end_addr_hi32;
+
+	uint32_t vm_context_page_table_start_addr_lo32;
+
+	uint32_t vm_context_page_table_start_addr_hi32;
+
+	uint32_t reserved3;
+
+	uint32_t reserved4;
+
+	uint32_t reserved5;
+
+	uint32_t reserved6;
+
+	uint32_t reserved7;
+
+	uint32_t reserved8;
+
+	uint32_t completion_signal_lo32;
+
+	uint32_t completion_signal_hi32;
+
+};
+#endif
+
+/*--------------------MES_MAP_QUEUES--------------------*/
+
+#ifndef PM4_MES_MAP_QUEUES_VI_DEFINED
+#define PM4_MES_MAP_QUEUES_VI_DEFINED
+enum mes_map_queues_queue_sel_enum {
+	queue_sel__mes_map_queues__map_to_specified_queue_slots_vi = 0,
+queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi = 1
+};
+
+enum mes_map_queues_queue_type_enum {
+	queue_type__mes_map_queues__normal_compute_vi = 0,
+	queue_type__mes_map_queues__debug_interface_queue_vi = 1,
+	queue_type__mes_map_queues__normal_latency_static_queue_vi = 2,
+queue_type__mes_map_queues__low_latency_static_queue_vi = 3
+};
+
+enum mes_map_queues_engine_sel_enum {
+	engine_sel__mes_map_queues__compute_vi = 0,
+	engine_sel__mes_map_queues__sdma0_vi = 2,
+	engine_sel__mes_map_queues__sdma1_vi = 3
+};
+
+enum mes_map_queues_extended_engine_sel_enum {
+	extended_engine_sel__mes_map_queues__legacy_engine_sel = 0,
+	extended_engine_sel__mes_map_queues__sdma0_to_7_sel  = 1,
+	extended_engine_sel__mes_map_queues__sdma8_to_15_sel = 2
+};
+
+struct pm4_mes_map_queues {
+	union {
+		union PM4_MES_TYPE_3_HEADER   header;            /* header */
+		uint32_t            ordinal1;
+	};
+
+	union {
+		struct {
+			uint32_t reserved1:2;
+			enum mes_map_queues_extended_engine_sel_enum extended_engine_sel:2;
+			enum mes_map_queues_queue_sel_enum queue_sel:2;
+			uint32_t reserved5:6;
+			uint32_t gws_control_queue:1;
+			uint32_t reserved2:8;
+			enum mes_map_queues_queue_type_enum queue_type:3;
+			uint32_t reserved3:2;
+			enum mes_map_queues_engine_sel_enum engine_sel:3;
+			uint32_t num_queues:3;
+		} bitfields2;
+		uint32_t ordinal2;
+	};
+
+	union {
+		struct {
+			uint32_t reserved3:1;
+			uint32_t check_disable:1;
+			uint32_t doorbell_offset:26;
+			uint32_t reserved4:4;
+		} bitfields3;
+		uint32_t ordinal3;
+	};
+
+	uint32_t mqd_addr_lo;
+	uint32_t mqd_addr_hi;
+	uint32_t wptr_addr_lo;
+	uint32_t wptr_addr_hi;
+};
+#endif
+
+/*--------------------MES_QUERY_STATUS--------------------*/
+
+#ifndef PM4_MES_QUERY_STATUS_DEFINED
+#define PM4_MES_QUERY_STATUS_DEFINED
+enum mes_query_status_interrupt_sel_enum {
+	interrupt_sel__mes_query_status__completion_status = 0,
+	interrupt_sel__mes_query_status__process_status = 1,
+	interrupt_sel__mes_query_status__queue_status = 2
+};
+
+enum mes_query_status_command_enum {
+	command__mes_query_status__interrupt_only = 0,
+	command__mes_query_status__fence_only_immediate = 1,
+	command__mes_query_status__fence_only_after_write_ack = 2,
+	command__mes_query_status__fence_wait_for_write_ack_send_interrupt = 3
+};
+
+enum mes_query_status_engine_sel_enum {
+	engine_sel__mes_query_status__compute = 0,
+	engine_sel__mes_query_status__sdma0_queue = 2,
+	engine_sel__mes_query_status__sdma1_queue = 3
+};
+
+struct pm4_mes_query_status {
+	union {
+		union PM4_MES_TYPE_3_HEADER   header;            /* header */
+		uint32_t            ordinal1;
+	};
+
+	union {
+		struct {
+			uint32_t context_id:28;
+			enum mes_query_status_interrupt_sel_enum	interrupt_sel:2;
+			enum mes_query_status_command_enum command:2;
+		} bitfields2;
+		uint32_t ordinal2;
+	};
+
+	union {
+		struct {
+			uint32_t pasid:16;
+			uint32_t reserved1:16;
+		} bitfields3a;
+		struct {
+			uint32_t reserved2:2;
+			uint32_t doorbell_offset:26;
+			enum mes_query_status_engine_sel_enum engine_sel:3;
+			uint32_t reserved3:1;
+		} bitfields3b;
+		uint32_t ordinal3;
+	};
+
+	uint32_t addr_lo;
+	uint32_t addr_hi;
+	uint32_t data_lo;
+	uint32_t data_hi;
+};
+#endif
+
+/*--------------------MES_UNMAP_QUEUES--------------------*/
+
+#ifndef PM4_MES_UNMAP_QUEUES_DEFINED
+#define PM4_MES_UNMAP_QUEUES_DEFINED
+enum mes_unmap_queues_action_enum {
+	action__mes_unmap_queues__preempt_queues = 0,
+	action__mes_unmap_queues__reset_queues = 1,
+	action__mes_unmap_queues__disable_process_queues = 2,
+	action__mes_unmap_queues__reserved = 3
+};
+
+enum mes_unmap_queues_queue_sel_enum {
+	queue_sel__mes_unmap_queues__perform_request_on_specified_queues = 0,
+	queue_sel__mes_unmap_queues__perform_request_on_pasid_queues = 1,
+	queue_sel__mes_unmap_queues__unmap_all_queues = 2,
+	queue_sel__mes_unmap_queues__unmap_all_non_static_queues = 3
+};
+
+enum mes_unmap_queues_engine_sel_enum {
+	engine_sel__mes_unmap_queues__compute = 0,
+	engine_sel__mes_unmap_queues__sdma0 = 2,
+	engine_sel__mes_unmap_queues__sdmal = 3
+};
+
+enum mes_unmap_queues_extended_engine_sel_enum {
+	extended_engine_sel__mes_unmap_queues__legacy_engine_sel = 0,
+	extended_engine_sel__mes_unmap_queues__sdma0_to_7_sel = 1
+};
+
+struct pm4_mes_unmap_queues {
+	union {
+		union PM4_MES_TYPE_3_HEADER   header;            /* header */
+		uint32_t            ordinal1;
+	};
+
+	union {
+		struct {
+			enum mes_unmap_queues_action_enum action:2;
+			enum mes_unmap_queues_extended_engine_sel_enum extended_engine_sel:2;
+			enum mes_unmap_queues_queue_sel_enum queue_sel:2;
+			uint32_t reserved2:20;
+			enum mes_unmap_queues_engine_sel_enum engine_sel:3;
+			uint32_t num_queues:3;
+		} bitfields2;
+		uint32_t ordinal2;
+	};
+
+	union {
+		struct {
+			uint32_t pasid:16;
+			uint32_t reserved3:16;
+		} bitfields3a;
+		struct {
+			uint32_t reserved4:2;
+			uint32_t doorbell_offset0:26;
+			int32_t reserved5:4;
+		} bitfields3b;
+		uint32_t ordinal3;
+	};
+
+	union {
+	struct {
+			uint32_t reserved6:2;
+			uint32_t doorbell_offset1:26;
+			uint32_t reserved7:4;
+		} bitfields4;
+		uint32_t ordinal4;
+	};
+
+	union {
+		struct {
+			uint32_t reserved8:2;
+			uint32_t doorbell_offset2:26;
+			uint32_t reserved9:4;
+		} bitfields5;
+		uint32_t ordinal5;
+	};
+
+	union {
+		struct {
+			uint32_t reserved10:2;
+			uint32_t doorbell_offset3:26;
+			uint32_t reserved11:4;
+		} bitfields6;
+		uint32_t ordinal6;
+	};
+};
+#endif
+
+#ifndef PM4_MEC_RELEASE_MEM_DEFINED
+#define PM4_MEC_RELEASE_MEM_DEFINED
+
+enum mec_release_mem_event_index_enum {
+	event_index__mec_release_mem__end_of_pipe = 5,
+	event_index__mec_release_mem__shader_done = 6
+};
+
+enum mec_release_mem_cache_policy_enum {
+	cache_policy__mec_release_mem__lru = 0,
+	cache_policy__mec_release_mem__stream = 1
+};
+
+enum mec_release_mem_pq_exe_status_enum {
+	pq_exe_status__mec_release_mem__default = 0,
+	pq_exe_status__mec_release_mem__phase_update = 1
+};
+
+enum mec_release_mem_dst_sel_enum {
+	dst_sel__mec_release_mem__memory_controller = 0,
+	dst_sel__mec_release_mem__tc_l2 = 1,
+	dst_sel__mec_release_mem__queue_write_pointer_register = 2,
+	dst_sel__mec_release_mem__queue_write_pointer_poll_mask_bit = 3
+};
+
+enum mec_release_mem_int_sel_enum {
+	int_sel__mec_release_mem__none = 0,
+	int_sel__mec_release_mem__send_interrupt_only = 1,
+	int_sel__mec_release_mem__send_interrupt_after_write_confirm = 2,
+	int_sel__mec_release_mem__send_data_after_write_confirm = 3,
+	int_sel__mec_release_mem__unconditionally_send_int_ctxid = 4,
+	int_sel__mec_release_mem__conditionally_send_int_ctxid_based_on_32_bit_compare = 5,
+	int_sel__mec_release_mem__conditionally_send_int_ctxid_based_on_64_bit_compare = 6
+};
+
+enum mec_release_mem_data_sel_enum {
+	data_sel__mec_release_mem__none = 0,
+	data_sel__mec_release_mem__send_32_bit_low = 1,
+	data_sel__mec_release_mem__send_64_bit_data = 2,
+	data_sel__mec_release_mem__send_gpu_clock_counter = 3,
+	data_sel__mec_release_mem__send_cp_perfcounter_hi_lo = 4,
+	data_sel__mec_release_mem__store_gds_data_to_memory = 5
+};
+
+struct pm4_mec_release_mem {
+	union {
+		union PM4_MES_TYPE_3_HEADER header;     /*header */
+		unsigned int ordinal1;
+	};
+
+	union {
+		struct {
+			unsigned int event_type:6;
+			unsigned int reserved1:2;
+			enum mec_release_mem_event_index_enum event_index:4;
+			unsigned int tcl1_vol_action_ena:1;
+			unsigned int tc_vol_action_ena:1;
+			unsigned int reserved2:1;
+			unsigned int tc_wb_action_ena:1;
+			unsigned int tcl1_action_ena:1;
+			unsigned int tc_action_ena:1;
+			uint32_t reserved3:1;
+			uint32_t tc_nc_action_ena:1;
+			uint32_t tc_wc_action_ena:1;
+			uint32_t tc_md_action_ena:1;
+			uint32_t reserved4:3;
+			enum mec_release_mem_cache_policy_enum cache_policy:2;
+			uint32_t reserved5:2;
+			enum mec_release_mem_pq_exe_status_enum pq_exe_status:1;
+			uint32_t reserved6:2;
+		} bitfields2;
+		unsigned int ordinal2;
+	};
+
+	union {
+		struct {
+			uint32_t reserved7:16;
+			enum mec_release_mem_dst_sel_enum dst_sel:2;
+			uint32_t reserved8:6;
+			enum mec_release_mem_int_sel_enum int_sel:3;
+			uint32_t reserved9:2;
+			enum mec_release_mem_data_sel_enum data_sel:3;
+		} bitfields3;
+		unsigned int ordinal3;
+	};
+
+	union {
+		struct {
+			uint32_t reserved10:2;
+			unsigned int address_lo_32b:30;
+		} bitfields4;
+		struct {
+			uint32_t reserved11:3;
+			uint32_t address_lo_64b:29;
+		} bitfields4b;
+		uint32_t reserved12;
+		unsigned int ordinal4;
+	};
+
+	union {
+		uint32_t address_hi;
+		uint32_t reserved13;
+		uint32_t ordinal5;
+	};
+
+	union {
+		uint32_t data_lo;
+		uint32_t cmp_data_lo;
+		struct {
+			uint32_t dw_offset:16;
+			uint32_t num_dwords:16;
+		} bitfields6c;
+		uint32_t reserved14;
+		uint32_t ordinal6;
+	};
+
+	union {
+		uint32_t data_hi;
+		uint32_t cmp_data_hi;
+		uint32_t reserved15;
+		uint32_t reserved16;
+		uint32_t ordinal7;
+	};
+
+	uint32_t int_ctxid;
+
+};
+
+#endif
+
+#ifndef PM4_MEC_WRITE_DATA_DEFINED
+#define PM4_MEC_WRITE_DATA_DEFINED
+
+enum WRITE_DATA_dst_sel_enum {
+	dst_sel___write_data__mem_mapped_register = 0,
+	dst_sel___write_data__tc_l2 = 2,
+	dst_sel___write_data__gds = 3,
+	dst_sel___write_data__memory = 5,
+	dst_sel___write_data__memory_mapped_adc_persistent_state = 6,
+};
+
+enum WRITE_DATA_addr_incr_enum {
+	addr_incr___write_data__increment_address = 0,
+	addr_incr___write_data__do_not_increment_address = 1
+};
+
+enum WRITE_DATA_wr_confirm_enum {
+	wr_confirm___write_data__do_not_wait_for_write_confirmation = 0,
+	wr_confirm___write_data__wait_for_write_confirmation = 1
+};
+
+enum WRITE_DATA_cache_policy_enum {
+	cache_policy___write_data__lru = 0,
+	cache_policy___write_data__stream = 1
+};
+
+
+struct pm4_mec_write_data_mmio {
+	union {
+		union PM4_MES_TYPE_3_HEADER header;     /*header */
+		unsigned int ordinal1;
+	};
+
+	union {
+		struct {
+			unsigned int reserved1:8;
+			unsigned int dst_sel:4;
+			unsigned int reserved2:4;
+			unsigned int addr_incr:1;
+			unsigned int reserved3:2;
+			unsigned int resume_vf:1;
+			unsigned int wr_confirm:1;
+			unsigned int reserved4:4;
+			unsigned int cache_policy:2;
+			unsigned int reserved5:5;
+		} bitfields2;
+		unsigned int ordinal2;
+	};
+
+	union {
+		struct {
+			unsigned int dst_mmreg_addr:18;
+			unsigned int reserved6:14;
+		} bitfields3;
+		unsigned int ordinal3;
+	};
+
+	uint32_t reserved7;
+
+	uint32_t data;
+
+};
+
+#endif
+
+enum {
+	CACHE_FLUSH_AND_INV_TS_EVENT = 0x00000014
+};
+#endif
+
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h
new file mode 100644
index 000000000..38f5cb6a2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_aldebaran.h
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright 2020-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/*--------------------MES_MAP_PROCESS (PER DEBUG VMID)--------------------*/
+
+#ifndef PM4_MES_MAP_PROCESS_PER_DEBUG_VMID_DEFINED
+#define PM4_MES_MAP_PROCESS_PER_DEBUG_VMID_DEFINED
+
+struct pm4_mes_map_process_aldebaran {
+	union {
+		union PM4_MES_TYPE_3_HEADER header;	/* header */
+		uint32_t ordinal1;
+	};
+
+	union {
+		struct {
+			uint32_t pasid:16;	    /* 0 - 15  */
+			uint32_t single_memops:1;   /* 16      */
+			uint32_t reserved1:1;	    /* 17      */
+			uint32_t debug_vmid:4;	    /* 18 - 21 */
+			uint32_t new_debug:1;	    /* 22      */
+			uint32_t tmz:1;		    /* 23      */
+			uint32_t diq_enable:1;      /* 24      */
+			uint32_t process_quantum:7; /* 25 - 31 */
+		} bitfields2;
+		uint32_t ordinal2;
+	};
+
+	uint32_t vm_context_page_table_base_addr_lo32;
+
+	uint32_t vm_context_page_table_base_addr_hi32;
+
+	uint32_t sh_mem_bases;
+
+	uint32_t sh_mem_config;
+
+	uint32_t sq_shader_tba_lo;
+
+	uint32_t sq_shader_tba_hi;
+
+	uint32_t sq_shader_tma_lo;
+
+	uint32_t sq_shader_tma_hi;
+
+	uint32_t reserved6;
+
+	uint32_t gds_addr_lo;
+
+	uint32_t gds_addr_hi;
+
+	union {
+		struct {
+			uint32_t num_gws:7;
+			uint32_t sdma_enable:1;
+			uint32_t num_oac:4;
+			uint32_t gds_size_hi:4;
+			uint32_t gds_size:6;
+			uint32_t num_queues:10;
+		} bitfields14;
+		uint32_t ordinal14;
+	};
+
+	uint32_t spi_gdbg_per_vmid_cntl;
+
+	uint32_t tcp_watch_cntl[4];
+
+	uint32_t completion_signal_lo;
+
+	uint32_t completion_signal_hi;
+
+};
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h
new file mode 100644
index 000000000..8147395c0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h
@@ -0,0 +1,506 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef F32_MES_PM4_PACKETS_H
+#define F32_MES_PM4_PACKETS_H
+
+#ifndef PM4_MES_HEADER_DEFINED
+#define PM4_MES_HEADER_DEFINED
+union PM4_MES_TYPE_3_HEADER {
+	struct {
+		uint32_t reserved1 : 8; /* < reserved */
+		uint32_t opcode    : 8; /* < IT opcode */
+		uint32_t count     : 14;/* < Number of DWORDS - 1 in the
+					 *   information body
+					 */
+		uint32_t type      : 2; /* < packet identifier
+					 *   It should be 3 for type 3 packets
+					 */
+	};
+	uint32_t u32All;
+};
+#endif /* PM4_MES_HEADER_DEFINED */
+
+/*--------------------MES_SET_RESOURCES--------------------*/
+
+#ifndef PM4_MES_SET_RESOURCES_DEFINED
+#define PM4_MES_SET_RESOURCES_DEFINED
+enum mes_set_resources_queue_type_enum {
+	queue_type__mes_set_resources__kernel_interface_queue_kiq = 0,
+	queue_type__mes_set_resources__hsa_interface_queue_hiq = 1,
+	queue_type__mes_set_resources__hsa_debug_interface_queue = 4
+};
+
+
+struct pm4_mes_set_resources {
+	union {
+		union PM4_MES_TYPE_3_HEADER	header;		/* header */
+		uint32_t			ordinal1;
+	};
+
+	union {
+		struct {
+			uint32_t vmid_mask:16;
+			uint32_t unmap_latency:8;
+			uint32_t reserved1:5;
+			enum mes_set_resources_queue_type_enum queue_type:3;
+		} bitfields2;
+		uint32_t ordinal2;
+	};
+
+	uint32_t queue_mask_lo;
+	uint32_t queue_mask_hi;
+	uint32_t gws_mask_lo;
+	uint32_t gws_mask_hi;
+
+	union {
+		struct {
+			uint32_t oac_mask:16;
+			uint32_t reserved2:16;
+		} bitfields7;
+		uint32_t ordinal7;
+	};
+
+	union {
+		struct {
+		uint32_t gds_heap_base:6;
+		uint32_t reserved3:5;
+		uint32_t gds_heap_size:6;
+		uint32_t reserved4:15;
+		} bitfields8;
+		uint32_t ordinal8;
+	};
+
+};
+#endif
+
+/*--------------------MES_RUN_LIST--------------------*/
+
+#ifndef PM4_MES_RUN_LIST_DEFINED
+#define PM4_MES_RUN_LIST_DEFINED
+
+struct pm4_mes_runlist {
+	union {
+		union PM4_MES_TYPE_3_HEADER   header;            /* header */
+		uint32_t            ordinal1;
+	};
+
+	union {
+		struct {
+			uint32_t reserved1:2;
+			uint32_t ib_base_lo:30;
+		} bitfields2;
+		uint32_t ordinal2;
+	};
+
+	union {
+		struct {
+			uint32_t ib_base_hi:16;
+			uint32_t reserved2:16;
+		} bitfields3;
+		uint32_t ordinal3;
+	};
+
+	union {
+		struct {
+			uint32_t ib_size:20;
+			uint32_t chain:1;
+			uint32_t offload_polling:1;
+			uint32_t reserved2:1;
+			uint32_t valid:1;
+			uint32_t process_cnt:4;
+			uint32_t reserved3:4;
+		} bitfields4;
+		uint32_t ordinal4;
+	};
+
+};
+#endif
+
+/*--------------------MES_MAP_PROCESS--------------------*/
+
+#ifndef PM4_MES_MAP_PROCESS_DEFINED
+#define PM4_MES_MAP_PROCESS_DEFINED
+
+struct pm4_mes_map_process {
+	union {
+		union PM4_MES_TYPE_3_HEADER header;	/* header */
+		uint32_t ordinal1;
+	};
+
+	union {
+		struct {
+			uint32_t pasid:16;
+			uint32_t reserved1:8;
+			uint32_t diq_enable:1;
+			uint32_t process_quantum:7;
+		} bitfields2;
+		uint32_t ordinal2;
+	};
+
+	union {
+		struct {
+			uint32_t page_table_base:28;
+			uint32_t reserved3:4;
+		} bitfields3;
+		uint32_t ordinal3;
+	};
+
+	uint32_t reserved;
+
+	uint32_t sh_mem_bases;
+	uint32_t sh_mem_config;
+	uint32_t sh_mem_ape1_base;
+	uint32_t sh_mem_ape1_limit;
+
+	uint32_t sh_hidden_private_base_vmid;
+
+	uint32_t reserved2;
+	uint32_t reserved3;
+
+	uint32_t gds_addr_lo;
+	uint32_t gds_addr_hi;
+
+	union {
+		struct {
+			uint32_t num_gws:6;
+			uint32_t reserved4:2;
+			uint32_t num_oac:4;
+			uint32_t reserved5:4;
+			uint32_t gds_size:6;
+			uint32_t num_queues:10;
+		} bitfields10;
+		uint32_t ordinal10;
+	};
+
+	uint32_t completion_signal_lo;
+	uint32_t completion_signal_hi;
+
+};
+
+#endif
+
+/*--------------------MES_MAP_QUEUES--------------------*/
+
+#ifndef PM4_MES_MAP_QUEUES_VI_DEFINED
+#define PM4_MES_MAP_QUEUES_VI_DEFINED
+enum mes_map_queues_queue_sel_vi_enum {
+	queue_sel__mes_map_queues__map_to_specified_queue_slots_vi = 0,
+queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi = 1
+};
+
+enum mes_map_queues_queue_type_vi_enum {
+	queue_type__mes_map_queues__normal_compute_vi = 0,
+	queue_type__mes_map_queues__debug_interface_queue_vi = 1,
+	queue_type__mes_map_queues__normal_latency_static_queue_vi = 2,
+queue_type__mes_map_queues__low_latency_static_queue_vi = 3
+};
+
+enum mes_map_queues_engine_sel_vi_enum {
+	engine_sel__mes_map_queues__compute_vi = 0,
+	engine_sel__mes_map_queues__sdma0_vi = 2,
+	engine_sel__mes_map_queues__sdma1_vi = 3
+};
+
+
+struct pm4_mes_map_queues {
+	union {
+		union PM4_MES_TYPE_3_HEADER   header;            /* header */
+		uint32_t            ordinal1;
+	};
+
+	union {
+		struct {
+			uint32_t reserved1:4;
+			enum mes_map_queues_queue_sel_vi_enum queue_sel:2;
+			uint32_t reserved2:15;
+			enum mes_map_queues_queue_type_vi_enum queue_type:3;
+			uint32_t reserved3:2;
+			enum mes_map_queues_engine_sel_vi_enum engine_sel:3;
+			uint32_t num_queues:3;
+		} bitfields2;
+		uint32_t ordinal2;
+	};
+
+	union {
+		struct {
+			uint32_t reserved3:1;
+			uint32_t check_disable:1;
+			uint32_t doorbell_offset:21;
+			uint32_t reserved4:3;
+			uint32_t queue:6;
+		} bitfields3;
+		uint32_t ordinal3;
+	};
+
+	uint32_t mqd_addr_lo;
+	uint32_t mqd_addr_hi;
+	uint32_t wptr_addr_lo;
+	uint32_t wptr_addr_hi;
+};
+#endif
+
+/*--------------------MES_QUERY_STATUS--------------------*/
+
+#ifndef PM4_MES_QUERY_STATUS_DEFINED
+#define PM4_MES_QUERY_STATUS_DEFINED
+enum mes_query_status_interrupt_sel_enum {
+	interrupt_sel__mes_query_status__completion_status = 0,
+	interrupt_sel__mes_query_status__process_status = 1,
+	interrupt_sel__mes_query_status__queue_status = 2
+};
+
+enum mes_query_status_command_enum {
+	command__mes_query_status__interrupt_only = 0,
+	command__mes_query_status__fence_only_immediate = 1,
+	command__mes_query_status__fence_only_after_write_ack = 2,
+	command__mes_query_status__fence_wait_for_write_ack_send_interrupt = 3
+};
+
+enum mes_query_status_engine_sel_enum {
+	engine_sel__mes_query_status__compute = 0,
+	engine_sel__mes_query_status__sdma0_queue = 2,
+	engine_sel__mes_query_status__sdma1_queue = 3
+};
+
+struct pm4_mes_query_status {
+	union {
+		union PM4_MES_TYPE_3_HEADER   header;            /* header */
+		uint32_t            ordinal1;
+	};
+
+	union {
+		struct {
+			uint32_t context_id:28;
+			enum mes_query_status_interrupt_sel_enum
+				interrupt_sel:2;
+			enum mes_query_status_command_enum command:2;
+		} bitfields2;
+		uint32_t ordinal2;
+	};
+
+	union {
+		struct {
+			uint32_t pasid:16;
+			uint32_t reserved1:16;
+		} bitfields3a;
+		struct {
+			uint32_t reserved2:2;
+			uint32_t doorbell_offset:21;
+			uint32_t reserved3:2;
+			enum mes_query_status_engine_sel_enum engine_sel:3;
+			uint32_t reserved4:4;
+		} bitfields3b;
+		uint32_t ordinal3;
+	};
+
+	uint32_t addr_lo;
+	uint32_t addr_hi;
+	uint32_t data_lo;
+	uint32_t data_hi;
+};
+#endif
+
+/*--------------------MES_UNMAP_QUEUES--------------------*/
+
+#ifndef PM4_MES_UNMAP_QUEUES_DEFINED
+#define PM4_MES_UNMAP_QUEUES_DEFINED
+enum mes_unmap_queues_action_enum {
+	action__mes_unmap_queues__preempt_queues = 0,
+	action__mes_unmap_queues__reset_queues = 1,
+	action__mes_unmap_queues__disable_process_queues = 2,
+	action__mes_unmap_queues__reserved = 3
+};
+
+enum mes_unmap_queues_queue_sel_enum {
+	queue_sel__mes_unmap_queues__perform_request_on_specified_queues = 0,
+	queue_sel__mes_unmap_queues__perform_request_on_pasid_queues = 1,
+	queue_sel__mes_unmap_queues__unmap_all_queues = 2,
+	queue_sel__mes_unmap_queues__unmap_all_non_static_queues = 3
+};
+
+enum mes_unmap_queues_engine_sel_enum {
+	engine_sel__mes_unmap_queues__compute = 0,
+	engine_sel__mes_unmap_queues__sdma0 = 2,
+	engine_sel__mes_unmap_queues__sdmal = 3
+};
+
+struct pm4_mes_unmap_queues {
+	union {
+		union PM4_MES_TYPE_3_HEADER   header;            /* header */
+		uint32_t            ordinal1;
+	};
+
+	union {
+		struct {
+			enum mes_unmap_queues_action_enum action:2;
+			uint32_t reserved1:2;
+			enum mes_unmap_queues_queue_sel_enum queue_sel:2;
+			uint32_t reserved2:20;
+			enum mes_unmap_queues_engine_sel_enum engine_sel:3;
+			uint32_t num_queues:3;
+		} bitfields2;
+		uint32_t ordinal2;
+	};
+
+	union {
+		struct {
+			uint32_t pasid:16;
+			uint32_t reserved3:16;
+		} bitfields3a;
+		struct {
+			uint32_t reserved4:2;
+			uint32_t doorbell_offset0:21;
+			uint32_t reserved5:9;
+		} bitfields3b;
+		uint32_t ordinal3;
+	};
+
+	union {
+	struct {
+			uint32_t reserved6:2;
+			uint32_t doorbell_offset1:21;
+			uint32_t reserved7:9;
+		} bitfields4;
+		uint32_t ordinal4;
+	};
+
+	union {
+		struct {
+			uint32_t reserved8:2;
+			uint32_t doorbell_offset2:21;
+			uint32_t reserved9:9;
+		} bitfields5;
+		uint32_t ordinal5;
+	};
+
+	union {
+		struct {
+			uint32_t reserved10:2;
+			uint32_t doorbell_offset3:21;
+			uint32_t reserved11:9;
+		} bitfields6;
+		uint32_t ordinal6;
+	};
+};
+#endif
+
+#ifndef PM4_MEC_RELEASE_MEM_DEFINED
+#define PM4_MEC_RELEASE_MEM_DEFINED
+enum RELEASE_MEM_event_index_enum {
+	event_index___release_mem__end_of_pipe = 5,
+	event_index___release_mem__shader_done = 6
+};
+
+enum RELEASE_MEM_cache_policy_enum {
+	cache_policy___release_mem__lru = 0,
+	cache_policy___release_mem__stream = 1,
+	cache_policy___release_mem__bypass = 2
+};
+
+enum RELEASE_MEM_dst_sel_enum {
+	dst_sel___release_mem__memory_controller = 0,
+	dst_sel___release_mem__tc_l2 = 1,
+	dst_sel___release_mem__queue_write_pointer_register = 2,
+	dst_sel___release_mem__queue_write_pointer_poll_mask_bit = 3
+};
+
+enum RELEASE_MEM_int_sel_enum {
+	int_sel___release_mem__none = 0,
+	int_sel___release_mem__send_interrupt_only = 1,
+	int_sel___release_mem__send_interrupt_after_write_confirm = 2,
+	int_sel___release_mem__send_data_after_write_confirm = 3
+};
+
+enum RELEASE_MEM_data_sel_enum {
+	data_sel___release_mem__none = 0,
+	data_sel___release_mem__send_32_bit_low = 1,
+	data_sel___release_mem__send_64_bit_data = 2,
+	data_sel___release_mem__send_gpu_clock_counter = 3,
+	data_sel___release_mem__send_cp_perfcounter_hi_lo = 4,
+	data_sel___release_mem__store_gds_data_to_memory = 5
+};
+
+struct pm4_mec_release_mem {
+	union {
+		union PM4_MES_TYPE_3_HEADER header;     /*header */
+		unsigned int ordinal1;
+	};
+
+	union {
+		struct {
+			unsigned int event_type:6;
+			unsigned int reserved1:2;
+			enum RELEASE_MEM_event_index_enum event_index:4;
+			unsigned int tcl1_vol_action_ena:1;
+			unsigned int tc_vol_action_ena:1;
+			unsigned int reserved2:1;
+			unsigned int tc_wb_action_ena:1;
+			unsigned int tcl1_action_ena:1;
+			unsigned int tc_action_ena:1;
+			unsigned int reserved3:6;
+			unsigned int atc:1;
+			enum RELEASE_MEM_cache_policy_enum cache_policy:2;
+			unsigned int reserved4:5;
+		} bitfields2;
+		unsigned int ordinal2;
+	};
+
+	union {
+		struct {
+			unsigned int reserved5:16;
+			enum RELEASE_MEM_dst_sel_enum dst_sel:2;
+			unsigned int reserved6:6;
+			enum RELEASE_MEM_int_sel_enum int_sel:3;
+			unsigned int reserved7:2;
+			enum RELEASE_MEM_data_sel_enum data_sel:3;
+		} bitfields3;
+		unsigned int ordinal3;
+	};
+
+	union {
+		struct {
+			unsigned int reserved8:2;
+			unsigned int address_lo_32b:30;
+		} bitfields4;
+		struct {
+			unsigned int reserved9:3;
+			unsigned int address_lo_64b:29;
+		} bitfields5;
+		unsigned int ordinal4;
+	};
+
+	unsigned int address_hi;
+
+	unsigned int data_lo;
+
+	unsigned int data_hi;
+};
+#endif
+
+enum {
+	CACHE_FLUSH_AND_INV_TS_EVENT = 0x00000014
+};
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_opcodes.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_opcodes.h
new file mode 100644
index 000000000..5bfd0f9cb
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_opcodes.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+
+#ifndef KFD_PM4_OPCODES_H
+#define KFD_PM4_OPCODES_H
+
+enum it_opcode_type {
+	IT_NOP                               = 0x10,
+	IT_SET_BASE                          = 0x11,
+	IT_CLEAR_STATE                       = 0x12,
+	IT_INDEX_BUFFER_SIZE                 = 0x13,
+	IT_DISPATCH_DIRECT                   = 0x15,
+	IT_DISPATCH_INDIRECT                 = 0x16,
+	IT_ATOMIC_GDS                        = 0x1D,
+	IT_OCCLUSION_QUERY                   = 0x1F,
+	IT_SET_PREDICATION                   = 0x20,
+	IT_REG_RMW                           = 0x21,
+	IT_COND_EXEC                         = 0x22,
+	IT_PRED_EXEC                         = 0x23,
+	IT_DRAW_INDIRECT                     = 0x24,
+	IT_DRAW_INDEX_INDIRECT               = 0x25,
+	IT_INDEX_BASE                        = 0x26,
+	IT_DRAW_INDEX_2                      = 0x27,
+	IT_CONTEXT_CONTROL                   = 0x28,
+	IT_INDEX_TYPE                        = 0x2A,
+	IT_DRAW_INDIRECT_MULTI               = 0x2C,
+	IT_DRAW_INDEX_AUTO                   = 0x2D,
+	IT_NUM_INSTANCES                     = 0x2F,
+	IT_DRAW_INDEX_MULTI_AUTO             = 0x30,
+	IT_INDIRECT_BUFFER_CNST              = 0x33,
+	IT_STRMOUT_BUFFER_UPDATE             = 0x34,
+	IT_DRAW_INDEX_OFFSET_2               = 0x35,
+	IT_DRAW_PREAMBLE                     = 0x36,
+	IT_WRITE_DATA                        = 0x37,
+	IT_DRAW_INDEX_INDIRECT_MULTI         = 0x38,
+	IT_MEM_SEMAPHORE                     = 0x39,
+	IT_COPY_DW                           = 0x3B,
+	IT_WAIT_REG_MEM                      = 0x3C,
+	IT_INDIRECT_BUFFER                   = 0x3F,
+	IT_COPY_DATA                         = 0x40,
+	IT_PFP_SYNC_ME                       = 0x42,
+	IT_SURFACE_SYNC                      = 0x43,
+	IT_COND_WRITE                        = 0x45,
+	IT_EVENT_WRITE                       = 0x46,
+	IT_EVENT_WRITE_EOP                   = 0x47,
+	IT_EVENT_WRITE_EOS                   = 0x48,
+	IT_RELEASE_MEM                       = 0x49,
+	IT_PREAMBLE_CNTL                     = 0x4A,
+	IT_DMA_DATA                          = 0x50,
+	IT_ACQUIRE_MEM                       = 0x58,
+	IT_REWIND                            = 0x59,
+	IT_LOAD_UCONFIG_REG                  = 0x5E,
+	IT_LOAD_SH_REG                       = 0x5F,
+	IT_LOAD_CONFIG_REG                   = 0x60,
+	IT_LOAD_CONTEXT_REG                  = 0x61,
+	IT_SET_CONFIG_REG                    = 0x68,
+	IT_SET_CONTEXT_REG                   = 0x69,
+	IT_SET_CONTEXT_REG_INDIRECT          = 0x73,
+	IT_SET_SH_REG                        = 0x76,
+	IT_SET_SH_REG_OFFSET                 = 0x77,
+	IT_SET_QUEUE_REG                     = 0x78,
+	IT_SET_UCONFIG_REG                   = 0x79,
+	IT_SCRATCH_RAM_WRITE                 = 0x7D,
+	IT_SCRATCH_RAM_READ                  = 0x7E,
+	IT_LOAD_CONST_RAM                    = 0x80,
+	IT_WRITE_CONST_RAM                   = 0x81,
+	IT_DUMP_CONST_RAM                    = 0x83,
+	IT_INCREMENT_CE_COUNTER              = 0x84,
+	IT_INCREMENT_DE_COUNTER              = 0x85,
+	IT_WAIT_ON_CE_COUNTER                = 0x86,
+	IT_WAIT_ON_DE_COUNTER_DIFF           = 0x88,
+	IT_SWITCH_BUFFER                     = 0x8B,
+	IT_SET_RESOURCES                     = 0xA0,
+	IT_MAP_PROCESS                       = 0xA1,
+	IT_MAP_QUEUES                        = 0xA2,
+	IT_UNMAP_QUEUES                      = 0xA3,
+	IT_QUERY_STATUS                      = 0xA4,
+	IT_RUN_LIST                          = 0xA5,
+};
+
+#define PM4_TYPE_0 0
+#define PM4_TYPE_2 2
+#define PM4_TYPE_3 3
+
+#endif /* KFD_PM4_OPCODES_H */
+
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
new file mode 100644
index 000000000..3287a3961
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -0,0 +1,1526 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef KFD_PRIV_H_INCLUDED
+#define KFD_PRIV_H_INCLUDED
+
+#include <linux/hashtable.h>
+#include <linux/mmu_notifier.h>
+#include <linux/memremap.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/atomic.h>
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
+#include <linux/kfd_ioctl.h>
+#include <linux/idr.h>
+#include <linux/kfifo.h>
+#include <linux/seq_file.h>
+#include <linux/kref.h>
+#include <linux/sysfs.h>
+#include <linux/device_cgroup.h>
+#include <drm/drm_file.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_device.h>
+#include <drm/drm_ioctl.h>
+#include <kgd_kfd_interface.h>
+#include <linux/swap.h>
+
+#include "amd_shared.h"
+#include "amdgpu.h"
+
+#define KFD_MAX_RING_ENTRY_SIZE	8
+
+#define KFD_SYSFS_FILE_MODE 0444
+
+/* GPU ID hash width in bits */
+#define KFD_GPU_ID_HASH_WIDTH 16
+
+/* Use upper bits of mmap offset to store KFD driver specific information.
+ * BITS[63:62] - Encode MMAP type
+ * BITS[61:46] - Encode gpu_id. To identify to which GPU the offset belongs to
+ * BITS[45:0]  - MMAP offset value
+ *
+ * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
+ *  defines are w.r.t to PAGE_SIZE
+ */
+#define KFD_MMAP_TYPE_SHIFT	62
+#define KFD_MMAP_TYPE_MASK	(0x3ULL << KFD_MMAP_TYPE_SHIFT)
+#define KFD_MMAP_TYPE_DOORBELL	(0x3ULL << KFD_MMAP_TYPE_SHIFT)
+#define KFD_MMAP_TYPE_EVENTS	(0x2ULL << KFD_MMAP_TYPE_SHIFT)
+#define KFD_MMAP_TYPE_RESERVED_MEM	(0x1ULL << KFD_MMAP_TYPE_SHIFT)
+#define KFD_MMAP_TYPE_MMIO	(0x0ULL << KFD_MMAP_TYPE_SHIFT)
+
+#define KFD_MMAP_GPU_ID_SHIFT 46
+#define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \
+				<< KFD_MMAP_GPU_ID_SHIFT)
+#define KFD_MMAP_GPU_ID(gpu_id) ((((uint64_t)gpu_id) << KFD_MMAP_GPU_ID_SHIFT)\
+				& KFD_MMAP_GPU_ID_MASK)
+#define KFD_MMAP_GET_GPU_ID(offset)    ((offset & KFD_MMAP_GPU_ID_MASK) \
+				>> KFD_MMAP_GPU_ID_SHIFT)
+
+/*
+ * When working with cp scheduler we should assign the HIQ manually or via
+ * the amdgpu driver to a fixed hqd slot, here are the fixed HIQ hqd slot
+ * definitions for Kaveri. In Kaveri only the first ME queues participates
+ * in the cp scheduling taking that in mind we set the HIQ slot in the
+ * second ME.
+ */
+#define KFD_CIK_HIQ_PIPE 4
+#define KFD_CIK_HIQ_QUEUE 0
+
+/* Macro for allocating structures */
+#define kfd_alloc_struct(ptr_to_struct)	\
+	((typeof(ptr_to_struct)) kzalloc(sizeof(*ptr_to_struct), GFP_KERNEL))
+
+#define KFD_MAX_NUM_OF_PROCESSES 512
+#define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS 1024
+
+/*
+ * Size of the per-process TBA+TMA buffer: 2 pages
+ *
+ * The first page is the TBA used for the CWSR ISA code. The second
+ * page is used as TMA for user-mode trap handler setup in daisy-chain mode.
+ */
+#define KFD_CWSR_TBA_TMA_SIZE (PAGE_SIZE * 2)
+#define KFD_CWSR_TMA_OFFSET PAGE_SIZE
+
+#define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE		\
+	(KFD_MAX_NUM_OF_PROCESSES *			\
+			KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
+
+#define KFD_KERNEL_QUEUE_SIZE 2048
+
+#define KFD_UNMAP_LATENCY_MS	(4000)
+
+#define KFD_MAX_SDMA_QUEUES	128
+
+/*
+ * 512 = 0x200
+ * The doorbell index distance between SDMA RLC (2*i) and (2*i+1) in the
+ * same SDMA engine on SOC15, which has 8-byte doorbells for SDMA.
+ * 512 8-byte doorbell distance (i.e. one page away) ensures that SDMA RLC
+ * (2*i+1) doorbells (in terms of the lower 12 bit address) lie exactly in
+ * the OFFSET and SIZE set in registers like BIF_SDMA0_DOORBELL_RANGE.
+ */
+#define KFD_QUEUE_DOORBELL_MIRROR_OFFSET 512
+
+/**
+ * enum kfd_ioctl_flags - KFD ioctl flags
+ * Various flags that can be set in &amdkfd_ioctl_desc.flags to control how
+ * userspace can use a given ioctl.
+ */
+enum kfd_ioctl_flags {
+	/*
+	 * @KFD_IOC_FLAG_CHECKPOINT_RESTORE:
+	 * Certain KFD ioctls such as AMDKFD_IOC_CRIU_OP can potentially
+	 * perform privileged operations and load arbitrary data into MQDs and
+	 * eventually HQD registers when the queue is mapped by HWS. In order to
+	 * prevent this we should perform additional security checks.
+	 *
+	 * This is equivalent to callers with the CHECKPOINT_RESTORE capability.
+	 *
+	 * Note: Since earlier versions of docker do not support CHECKPOINT_RESTORE,
+	 * we also allow ioctls with SYS_ADMIN capability.
+	 */
+	KFD_IOC_FLAG_CHECKPOINT_RESTORE = BIT(0),
+};
+/*
+ * Kernel module parameter to specify maximum number of supported queues per
+ * device
+ */
+extern int max_num_of_queues_per_device;
+
+
+/* Kernel module parameter to specify the scheduling policy */
+extern int sched_policy;
+
+/*
+ * Kernel module parameter to specify the maximum process
+ * number per HW scheduler
+ */
+extern int hws_max_conc_proc;
+
+extern int cwsr_enable;
+
+/*
+ * Kernel module parameter to specify whether to send sigterm to HSA process on
+ * unhandled exception
+ */
+extern int send_sigterm;
+
+/*
+ * This kernel module is used to simulate large bar machine on non-large bar
+ * enabled machines.
+ */
+extern int debug_largebar;
+
+/* Set sh_mem_config.retry_disable on GFX v9 */
+extern int amdgpu_noretry;
+
+/* Halt if HWS hang is detected */
+extern int halt_if_hws_hang;
+
+/* Whether MEC FW support GWS barriers */
+extern bool hws_gws_support;
+
+/* Queue preemption timeout in ms */
+extern int queue_preemption_timeout_ms;
+
+/*
+ * Don't evict process queues on vm fault
+ */
+extern int amdgpu_no_queue_eviction_on_vm_fault;
+
+/* Enable eviction debug messages */
+extern bool debug_evictions;
+
+extern struct mutex kfd_processes_mutex;
+
+enum cache_policy {
+	cache_policy_coherent,
+	cache_policy_noncoherent
+};
+
+#define KFD_GC_VERSION(dev) ((dev)->adev->ip_versions[GC_HWIP][0])
+#define KFD_IS_SOC15(dev)   ((KFD_GC_VERSION(dev)) >= (IP_VERSION(9, 0, 1)))
+#define KFD_SUPPORT_XNACK_PER_PROCESS(dev)\
+	((KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2)) ||	\
+	 (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3)))
+
+struct kfd_node;
+
+struct kfd_event_interrupt_class {
+	bool (*interrupt_isr)(struct kfd_node *dev,
+			const uint32_t *ih_ring_entry, uint32_t *patched_ihre,
+			bool *patched_flag);
+	void (*interrupt_wq)(struct kfd_node *dev,
+			const uint32_t *ih_ring_entry);
+};
+
+struct kfd_device_info {
+	uint32_t gfx_target_version;
+	const struct kfd_event_interrupt_class *event_interrupt_class;
+	unsigned int max_pasid_bits;
+	unsigned int max_no_of_hqd;
+	unsigned int doorbell_size;
+	size_t ih_ring_entry_size;
+	uint8_t num_of_watch_points;
+	uint16_t mqd_size_aligned;
+	bool supports_cwsr;
+	bool needs_pci_atomics;
+	uint32_t no_atomic_fw_version;
+	unsigned int num_sdma_queues_per_engine;
+	unsigned int num_reserved_sdma_queues_per_engine;
+	DECLARE_BITMAP(reserved_sdma_queues_bitmap, KFD_MAX_SDMA_QUEUES);
+};
+
+unsigned int kfd_get_num_sdma_engines(struct kfd_node *kdev);
+unsigned int kfd_get_num_xgmi_sdma_engines(struct kfd_node *kdev);
+
+struct kfd_mem_obj {
+	uint32_t range_start;
+	uint32_t range_end;
+	uint64_t gpu_addr;
+	uint32_t *cpu_ptr;
+	void *gtt_mem;
+};
+
+struct kfd_vmid_info {
+	uint32_t first_vmid_kfd;
+	uint32_t last_vmid_kfd;
+	uint32_t vmid_num_kfd;
+};
+
+#define MAX_KFD_NODES	8
+
+struct kfd_dev;
+
+struct kfd_node {
+	unsigned int node_id;
+	struct amdgpu_device *adev;     /* Duplicated here along with keeping
+					 * a copy in kfd_dev to save a hop
+					 */
+	const struct kfd2kgd_calls *kfd2kgd; /* Duplicated here along with
+					      * keeping a copy in kfd_dev to
+					      * save a hop
+					      */
+	struct kfd_vmid_info vm_info;
+	unsigned int id;                /* topology stub index */
+	uint32_t xcc_mask; /* Instance mask of XCCs present */
+	struct amdgpu_xcp *xcp;
+
+	/* Interrupts */
+	struct kfifo ih_fifo;
+	struct workqueue_struct *ih_wq;
+	struct work_struct interrupt_work;
+	spinlock_t interrupt_lock;
+
+	/*
+	 * Interrupts of interest to KFD are copied
+	 * from the HW ring into a SW ring.
+	 */
+	bool interrupts_active;
+	uint32_t interrupt_bitmap; /* Only used for GFX 9.4.3 */
+
+	/* QCM Device instance */
+	struct device_queue_manager *dqm;
+
+	/* Global GWS resource shared between processes */
+	void *gws;
+	bool gws_debug_workaround;
+
+	/* Clients watching SMI events */
+	struct list_head smi_clients;
+	spinlock_t smi_lock;
+	uint32_t reset_seq_num;
+
+	/* SRAM ECC flag */
+	atomic_t sram_ecc_flag;
+
+	/*spm process id */
+	unsigned int spm_pasid;
+
+	/* Maximum process number mapped to HW scheduler */
+	unsigned int max_proc_per_quantum;
+
+	unsigned int compute_vmid_bitmap;
+
+	struct kfd_local_mem_info local_mem_info;
+
+	struct kfd_dev *kfd;
+};
+
+struct kfd_dev {
+	struct amdgpu_device *adev;
+
+	struct kfd_device_info device_info;
+
+	u32 __iomem *doorbell_kernel_ptr; /* This is a pointer for a doorbells
+					   * page used by kernel queue
+					   */
+
+	struct kgd2kfd_shared_resources shared_resources;
+
+	const struct kfd2kgd_calls *kfd2kgd;
+	struct mutex doorbell_mutex;
+
+	void *gtt_mem;
+	uint64_t gtt_start_gpu_addr;
+	void *gtt_start_cpu_ptr;
+	void *gtt_sa_bitmap;
+	struct mutex gtt_sa_lock;
+	unsigned int gtt_sa_chunk_size;
+	unsigned int gtt_sa_num_of_chunks;
+
+	bool init_complete;
+
+	/* Firmware versions */
+	uint16_t mec_fw_version;
+	uint16_t mec2_fw_version;
+	uint16_t sdma_fw_version;
+
+	/* CWSR */
+	bool cwsr_enabled;
+	const void *cwsr_isa;
+	unsigned int cwsr_isa_size;
+
+	/* xGMI */
+	uint64_t hive_id;
+
+	bool pci_atomic_requested;
+
+	/* Compute Profile ref. count */
+	atomic_t compute_profile;
+
+	struct ida doorbell_ida;
+	unsigned int max_doorbell_slices;
+
+	int noretry;
+
+	struct kfd_node *nodes[MAX_KFD_NODES];
+	unsigned int num_nodes;
+
+	/* Track per device allocated watch points */
+	uint32_t alloc_watch_ids;
+	spinlock_t watch_points_lock;
+
+	/* Kernel doorbells for KFD device */
+	struct amdgpu_bo *doorbells;
+
+	/* bitmap for dynamic doorbell allocation from doorbell object */
+	unsigned long *doorbell_bitmap;
+};
+
+enum kfd_mempool {
+	KFD_MEMPOOL_SYSTEM_CACHEABLE = 1,
+	KFD_MEMPOOL_SYSTEM_WRITECOMBINE = 2,
+	KFD_MEMPOOL_FRAMEBUFFER = 3,
+};
+
+/* Character device interface */
+int kfd_chardev_init(void);
+void kfd_chardev_exit(void);
+
+/**
+ * enum kfd_unmap_queues_filter - Enum for queue filters.
+ *
+ * @KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES: Preempts all queues in the
+ *						running queues list.
+ *
+ * @KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES: Preempts all non-static queues
+ *						in the run list.
+ *
+ * @KFD_UNMAP_QUEUES_FILTER_BY_PASID: Preempts queues that belongs to
+ *						specific process.
+ *
+ */
+enum kfd_unmap_queues_filter {
+	KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES = 1,
+	KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES = 2,
+	KFD_UNMAP_QUEUES_FILTER_BY_PASID = 3
+};
+
+/**
+ * enum kfd_queue_type - Enum for various queue types.
+ *
+ * @KFD_QUEUE_TYPE_COMPUTE: Regular user mode queue type.
+ *
+ * @KFD_QUEUE_TYPE_SDMA: SDMA user mode queue type.
+ *
+ * @KFD_QUEUE_TYPE_HIQ: HIQ queue type.
+ *
+ * @KFD_QUEUE_TYPE_DIQ: DIQ queue type.
+ *
+ * @KFD_QUEUE_TYPE_SDMA_XGMI: Special SDMA queue for XGMI interface.
+ */
+enum kfd_queue_type  {
+	KFD_QUEUE_TYPE_COMPUTE,
+	KFD_QUEUE_TYPE_SDMA,
+	KFD_QUEUE_TYPE_HIQ,
+	KFD_QUEUE_TYPE_DIQ,
+	KFD_QUEUE_TYPE_SDMA_XGMI
+};
+
+enum kfd_queue_format {
+	KFD_QUEUE_FORMAT_PM4,
+	KFD_QUEUE_FORMAT_AQL
+};
+
+enum KFD_QUEUE_PRIORITY {
+	KFD_QUEUE_PRIORITY_MINIMUM = 0,
+	KFD_QUEUE_PRIORITY_MAXIMUM = 15
+};
+
+/**
+ * struct queue_properties
+ *
+ * @type: The queue type.
+ *
+ * @queue_id: Queue identifier.
+ *
+ * @queue_address: Queue ring buffer address.
+ *
+ * @queue_size: Queue ring buffer size.
+ *
+ * @priority: Defines the queue priority relative to other queues in the
+ * process.
+ * This is just an indication and HW scheduling may override the priority as
+ * necessary while keeping the relative prioritization.
+ * the priority granularity is from 0 to f which f is the highest priority.
+ * currently all queues are initialized with the highest priority.
+ *
+ * @queue_percent: This field is partially implemented and currently a zero in
+ * this field defines that the queue is non active.
+ *
+ * @read_ptr: User space address which points to the number of dwords the
+ * cp read from the ring buffer. This field updates automatically by the H/W.
+ *
+ * @write_ptr: Defines the number of dwords written to the ring buffer.
+ *
+ * @doorbell_ptr: Notifies the H/W of new packet written to the queue ring
+ * buffer. This field should be similar to write_ptr and the user should
+ * update this field after updating the write_ptr.
+ *
+ * @doorbell_off: The doorbell offset in the doorbell pci-bar.
+ *
+ * @is_interop: Defines if this is a interop queue. Interop queue means that
+ * the queue can access both graphics and compute resources.
+ *
+ * @is_evicted: Defines if the queue is evicted. Only active queues
+ * are evicted, rendering them inactive.
+ *
+ * @is_active: Defines if the queue is active or not. @is_active and
+ * @is_evicted are protected by the DQM lock.
+ *
+ * @is_gws: Defines if the queue has been updated to be GWS-capable or not.
+ * @is_gws should be protected by the DQM lock, since changing it can yield the
+ * possibility of updating DQM state on number of GWS queues.
+ *
+ * @vmid: If the scheduling mode is no cp scheduling the field defines the vmid
+ * of the queue.
+ *
+ * This structure represents the queue properties for each queue no matter if
+ * it's user mode or kernel mode queue.
+ *
+ */
+
+struct queue_properties {
+	enum kfd_queue_type type;
+	enum kfd_queue_format format;
+	unsigned int queue_id;
+	uint64_t queue_address;
+	uint64_t  queue_size;
+	uint32_t priority;
+	uint32_t queue_percent;
+	uint32_t *read_ptr;
+	uint32_t *write_ptr;
+	void __iomem *doorbell_ptr;
+	uint32_t doorbell_off;
+	bool is_interop;
+	bool is_evicted;
+	bool is_suspended;
+	bool is_being_destroyed;
+	bool is_active;
+	bool is_gws;
+	uint32_t pm4_target_xcc;
+	bool is_dbg_wa;
+	bool is_user_cu_masked;
+	/* Not relevant for user mode queues in cp scheduling */
+	unsigned int vmid;
+	/* Relevant only for sdma queues*/
+	uint32_t sdma_engine_id;
+	uint32_t sdma_queue_id;
+	uint32_t sdma_vm_addr;
+	/* Relevant only for VI */
+	uint64_t eop_ring_buffer_address;
+	uint32_t eop_ring_buffer_size;
+	uint64_t ctx_save_restore_area_address;
+	uint32_t ctx_save_restore_area_size;
+	uint32_t ctl_stack_size;
+	uint64_t tba_addr;
+	uint64_t tma_addr;
+	uint64_t exception_status;
+};
+
+#define QUEUE_IS_ACTIVE(q) ((q).queue_size > 0 &&	\
+			    (q).queue_address != 0 &&	\
+			    (q).queue_percent > 0 &&	\
+			    !(q).is_evicted &&		\
+			    !(q).is_suspended)
+
+enum mqd_update_flag {
+	UPDATE_FLAG_DBG_WA_ENABLE = 1,
+	UPDATE_FLAG_DBG_WA_DISABLE = 2,
+};
+
+struct mqd_update_info {
+	union {
+		struct {
+			uint32_t count; /* Must be a multiple of 32 */
+			uint32_t *ptr;
+		} cu_mask;
+	};
+	enum mqd_update_flag update_flag;
+};
+
+/**
+ * struct queue
+ *
+ * @list: Queue linked list.
+ *
+ * @mqd: The queue MQD (memory queue descriptor).
+ *
+ * @mqd_mem_obj: The MQD local gpu memory object.
+ *
+ * @gart_mqd_addr: The MQD gart mc address.
+ *
+ * @properties: The queue properties.
+ *
+ * @mec: Used only in no cp scheduling mode and identifies to micro engine id
+ *	 that the queue should be executed on.
+ *
+ * @pipe: Used only in no cp scheduling mode and identifies the queue's pipe
+ *	  id.
+ *
+ * @queue: Used only in no cp scheduliong mode and identifies the queue's slot.
+ *
+ * @process: The kfd process that created this queue.
+ *
+ * @device: The kfd device that created this queue.
+ *
+ * @gws: Pointing to gws kgd_mem if this is a gws control queue; NULL
+ * otherwise.
+ *
+ * This structure represents user mode compute queues.
+ * It contains all the necessary data to handle such queues.
+ *
+ */
+
+struct queue {
+	struct list_head list;
+	void *mqd;
+	struct kfd_mem_obj *mqd_mem_obj;
+	uint64_t gart_mqd_addr;
+	struct queue_properties properties;
+
+	uint32_t mec;
+	uint32_t pipe;
+	uint32_t queue;
+
+	unsigned int sdma_id;
+	unsigned int doorbell_id;
+
+	struct kfd_process	*process;
+	struct kfd_node		*device;
+	void *gws;
+
+	/* procfs */
+	struct kobject kobj;
+
+	void *gang_ctx_bo;
+	uint64_t gang_ctx_gpu_addr;
+	void *gang_ctx_cpu_ptr;
+
+	struct amdgpu_bo *wptr_bo;
+};
+
+enum KFD_MQD_TYPE {
+	KFD_MQD_TYPE_HIQ = 0,		/* for hiq */
+	KFD_MQD_TYPE_CP,		/* for cp queues and diq */
+	KFD_MQD_TYPE_SDMA,		/* for sdma queues */
+	KFD_MQD_TYPE_DIQ,		/* for diq */
+	KFD_MQD_TYPE_MAX
+};
+
+enum KFD_PIPE_PRIORITY {
+	KFD_PIPE_PRIORITY_CS_LOW = 0,
+	KFD_PIPE_PRIORITY_CS_MEDIUM,
+	KFD_PIPE_PRIORITY_CS_HIGH
+};
+
+struct scheduling_resources {
+	unsigned int vmid_mask;
+	enum kfd_queue_type type;
+	uint64_t queue_mask;
+	uint64_t gws_mask;
+	uint32_t oac_mask;
+	uint32_t gds_heap_base;
+	uint32_t gds_heap_size;
+};
+
+struct process_queue_manager {
+	/* data */
+	struct kfd_process	*process;
+	struct list_head	queues;
+	unsigned long		*queue_slot_bitmap;
+};
+
+struct qcm_process_device {
+	/* The Device Queue Manager that owns this data */
+	struct device_queue_manager *dqm;
+	struct process_queue_manager *pqm;
+	/* Queues list */
+	struct list_head queues_list;
+	struct list_head priv_queue_list;
+
+	unsigned int queue_count;
+	unsigned int vmid;
+	bool is_debug;
+	unsigned int evicted; /* eviction counter, 0=active */
+
+	/* This flag tells if we should reset all wavefronts on
+	 * process termination
+	 */
+	bool reset_wavefronts;
+
+	/* This flag tells us if this process has a GWS-capable
+	 * queue that will be mapped into the runlist. It's
+	 * possible to request a GWS BO, but not have the queue
+	 * currently mapped, and this changes how the MAP_PROCESS
+	 * PM4 packet is configured.
+	 */
+	bool mapped_gws_queue;
+
+	/* All the memory management data should be here too */
+	uint64_t gds_context_area;
+	/* Contains page table flags such as AMDGPU_PTE_VALID since gfx9 */
+	uint64_t page_table_base;
+	uint32_t sh_mem_config;
+	uint32_t sh_mem_bases;
+	uint32_t sh_mem_ape1_base;
+	uint32_t sh_mem_ape1_limit;
+	uint32_t gds_size;
+	uint32_t num_gws;
+	uint32_t num_oac;
+	uint32_t sh_hidden_private_base;
+
+	/* CWSR memory */
+	struct kgd_mem *cwsr_mem;
+	void *cwsr_kaddr;
+	uint64_t cwsr_base;
+	uint64_t tba_addr;
+	uint64_t tma_addr;
+
+	/* IB memory */
+	struct kgd_mem *ib_mem;
+	uint64_t ib_base;
+	void *ib_kaddr;
+
+	/* doorbells for kfd process */
+	struct amdgpu_bo *proc_doorbells;
+
+	/* bitmap for dynamic doorbell allocation from the bo */
+	unsigned long *doorbell_bitmap;
+};
+
+/* KFD Memory Eviction */
+
+/* Approx. wait time before attempting to restore evicted BOs */
+#define PROCESS_RESTORE_TIME_MS 100
+/* Approx. back off time if restore fails due to lack of memory */
+#define PROCESS_BACK_OFF_TIME_MS 100
+/* Approx. time before evicting the process again */
+#define PROCESS_ACTIVE_TIME_MS 10
+
+/* 8 byte handle containing GPU ID in the most significant 4 bytes and
+ * idr_handle in the least significant 4 bytes
+ */
+#define MAKE_HANDLE(gpu_id, idr_handle) \
+	(((uint64_t)(gpu_id) << 32) + idr_handle)
+#define GET_GPU_ID(handle) (handle >> 32)
+#define GET_IDR_HANDLE(handle) (handle & 0xFFFFFFFF)
+
+enum kfd_pdd_bound {
+	PDD_UNBOUND = 0,
+	PDD_BOUND,
+	PDD_BOUND_SUSPENDED,
+};
+
+#define MAX_SYSFS_FILENAME_LEN 15
+
+/*
+ * SDMA counter runs at 100MHz frequency.
+ * We display SDMA activity in microsecond granularity in sysfs.
+ * As a result, the divisor is 100.
+ */
+#define SDMA_ACTIVITY_DIVISOR  100
+
+/* Data that is per-process-per device. */
+struct kfd_process_device {
+	/* The device that owns this data. */
+	struct kfd_node *dev;
+
+	/* The process that owns this kfd_process_device. */
+	struct kfd_process *process;
+
+	/* per-process-per device QCM data structure */
+	struct qcm_process_device qpd;
+
+	/*Apertures*/
+	uint64_t lds_base;
+	uint64_t lds_limit;
+	uint64_t gpuvm_base;
+	uint64_t gpuvm_limit;
+	uint64_t scratch_base;
+	uint64_t scratch_limit;
+
+	/* VM context for GPUVM allocations */
+	struct file *drm_file;
+	void *drm_priv;
+	atomic64_t tlb_seq;
+
+	/* GPUVM allocations storage */
+	struct idr alloc_idr;
+
+	/* Flag used to tell the pdd has dequeued from the dqm.
+	 * This is used to prevent dev->dqm->ops.process_termination() from
+	 * being called twice when it is already called in IOMMU callback
+	 * function.
+	 */
+	bool already_dequeued;
+	bool runtime_inuse;
+
+	/* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
+	enum kfd_pdd_bound bound;
+
+	/* VRAM usage */
+	uint64_t vram_usage;
+	struct attribute attr_vram;
+	char vram_filename[MAX_SYSFS_FILENAME_LEN];
+
+	/* SDMA activity tracking */
+	uint64_t sdma_past_activity_counter;
+	struct attribute attr_sdma;
+	char sdma_filename[MAX_SYSFS_FILENAME_LEN];
+
+	/* Eviction activity tracking */
+	uint64_t last_evict_timestamp;
+	atomic64_t evict_duration_counter;
+	struct attribute attr_evict;
+
+	struct kobject *kobj_stats;
+
+	/*
+	 * @cu_occupancy: Reports occupancy of Compute Units (CU) of a process
+	 * that is associated with device encoded by "this" struct instance. The
+	 * value reflects CU usage by all of the waves launched by this process
+	 * on this device. A very important property of occupancy parameter is
+	 * that its value is a snapshot of current use.
+	 *
+	 * Following is to be noted regarding how this parameter is reported:
+	 *
+	 *  The number of waves that a CU can launch is limited by couple of
+	 *  parameters. These are encoded by struct amdgpu_cu_info instance
+	 *  that is part of every device definition. For GFX9 devices this
+	 *  translates to 40 waves (simd_per_cu * max_waves_per_simd) when waves
+	 *  do not use scratch memory and 32 waves (max_scratch_slots_per_cu)
+	 *  when they do use scratch memory. This could change for future
+	 *  devices and therefore this example should be considered as a guide.
+	 *
+	 *  All CU's of a device are available for the process. This may not be true
+	 *  under certain conditions - e.g. CU masking.
+	 *
+	 *  Finally number of CU's that are occupied by a process is affected by both
+	 *  number of CU's a device has along with number of other competing processes
+	 */
+	struct attribute attr_cu_occupancy;
+
+	/* sysfs counters for GPU retry fault and page migration tracking */
+	struct kobject *kobj_counters;
+	struct attribute attr_faults;
+	struct attribute attr_page_in;
+	struct attribute attr_page_out;
+	uint64_t faults;
+	uint64_t page_in;
+	uint64_t page_out;
+
+	/* Exception code status*/
+	uint64_t exception_status;
+	void *vm_fault_exc_data;
+	size_t vm_fault_exc_data_size;
+
+	/* Tracks debug per-vmid request settings */
+	uint32_t spi_dbg_override;
+	uint32_t spi_dbg_launch_mode;
+	uint32_t watch_points[4];
+	uint32_t alloc_watch_ids;
+
+	/*
+	 * If this process has been checkpointed before, then the user
+	 * application will use the original gpu_id on the
+	 * checkpointed node to refer to this device.
+	 */
+	uint32_t user_gpu_id;
+
+	void *proc_ctx_bo;
+	uint64_t proc_ctx_gpu_addr;
+	void *proc_ctx_cpu_ptr;
+};
+
+#define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
+
+struct svm_range_list {
+	struct mutex			lock;
+	struct rb_root_cached		objects;
+	struct list_head		list;
+	struct work_struct		deferred_list_work;
+	struct list_head		deferred_range_list;
+	struct list_head                criu_svm_metadata_list;
+	spinlock_t			deferred_list_lock;
+	atomic_t			evicted_ranges;
+	atomic_t			drain_pagefaults;
+	struct delayed_work		restore_work;
+	DECLARE_BITMAP(bitmap_supported, MAX_GPU_INSTANCE);
+	struct task_struct		*faulting_task;
+};
+
+/* Process data */
+struct kfd_process {
+	/*
+	 * kfd_process are stored in an mm_struct*->kfd_process*
+	 * hash table (kfd_processes in kfd_process.c)
+	 */
+	struct hlist_node kfd_processes;
+
+	/*
+	 * Opaque pointer to mm_struct. We don't hold a reference to
+	 * it so it should never be dereferenced from here. This is
+	 * only used for looking up processes by their mm.
+	 */
+	void *mm;
+
+	struct kref ref;
+	struct work_struct release_work;
+
+	struct mutex mutex;
+
+	/*
+	 * In any process, the thread that started main() is the lead
+	 * thread and outlives the rest.
+	 * It is here because amd_iommu_bind_pasid wants a task_struct.
+	 * It can also be used for safely getting a reference to the
+	 * mm_struct of the process.
+	 */
+	struct task_struct *lead_thread;
+
+	/* We want to receive a notification when the mm_struct is destroyed */
+	struct mmu_notifier mmu_notifier;
+
+	u32 pasid;
+
+	/*
+	 * Array of kfd_process_device pointers,
+	 * one for each device the process is using.
+	 */
+	struct kfd_process_device *pdds[MAX_GPU_INSTANCE];
+	uint32_t n_pdds;
+
+	struct process_queue_manager pqm;
+
+	/*Is the user space process 32 bit?*/
+	bool is_32bit_user_mode;
+
+	/* Event-related data */
+	struct mutex event_mutex;
+	/* Event ID allocator and lookup */
+	struct idr event_idr;
+	/* Event page */
+	u64 signal_handle;
+	struct kfd_signal_page *signal_page;
+	size_t signal_mapped_size;
+	size_t signal_event_count;
+	bool signal_event_limit_reached;
+
+	/* Information used for memory eviction */
+	void *kgd_process_info;
+	/* Eviction fence that is attached to all the BOs of this process. The
+	 * fence will be triggered during eviction and new one will be created
+	 * during restore
+	 */
+	struct dma_fence *ef;
+
+	/* Work items for evicting and restoring BOs */
+	struct delayed_work eviction_work;
+	struct delayed_work restore_work;
+	/* seqno of the last scheduled eviction */
+	unsigned int last_eviction_seqno;
+	/* Approx. the last timestamp (in jiffies) when the process was
+	 * restored after an eviction
+	 */
+	unsigned long last_restore_timestamp;
+
+	/* Indicates device process is debug attached with reserved vmid. */
+	bool debug_trap_enabled;
+
+	/* per-process-per device debug event fd file */
+	struct file *dbg_ev_file;
+
+	/* If the process is a kfd debugger, we need to know so we can clean
+	 * up at exit time.  If a process enables debugging on itself, it does
+	 * its own clean-up, so we don't set the flag here.  We track this by
+	 * counting the number of processes this process is debugging.
+	 */
+	atomic_t debugged_process_count;
+
+	/* If the process is a debugged, this is the debugger process */
+	struct kfd_process *debugger_process;
+
+	/* Kobj for our procfs */
+	struct kobject *kobj;
+	struct kobject *kobj_queues;
+	struct attribute attr_pasid;
+
+	/* Keep track cwsr init */
+	bool has_cwsr;
+
+	/* Exception code enable mask and status */
+	uint64_t exception_enable_mask;
+	uint64_t exception_status;
+
+	/* Used to drain stale interrupts */
+	wait_queue_head_t wait_irq_drain;
+	bool irq_drain_is_open;
+
+	/* shared virtual memory registered by this process */
+	struct svm_range_list svms;
+
+	bool xnack_enabled;
+
+	/* Work area for debugger event writer worker. */
+	struct work_struct debug_event_workarea;
+
+	/* Tracks debug per-vmid request for debug flags */
+	u32 dbg_flags;
+
+	atomic_t poison;
+	/* Queues are in paused stated because we are in the process of doing a CRIU checkpoint */
+	bool queues_paused;
+
+	/* Tracks runtime enable status */
+	struct semaphore runtime_enable_sema;
+	bool is_runtime_retry;
+	struct kfd_runtime_info runtime_info;
+};
+
+#define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
+extern DECLARE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
+extern struct srcu_struct kfd_processes_srcu;
+
+/**
+ * typedef amdkfd_ioctl_t - typedef for ioctl function pointer.
+ *
+ * @filep: pointer to file structure.
+ * @p: amdkfd process pointer.
+ * @data: pointer to arg that was copied from user.
+ *
+ * Return: returns ioctl completion code.
+ */
+typedef int amdkfd_ioctl_t(struct file *filep, struct kfd_process *p,
+				void *data);
+
+struct amdkfd_ioctl_desc {
+	unsigned int cmd;
+	int flags;
+	amdkfd_ioctl_t *func;
+	unsigned int cmd_drv;
+	const char *name;
+};
+bool kfd_dev_is_large_bar(struct kfd_node *dev);
+
+int kfd_process_create_wq(void);
+void kfd_process_destroy_wq(void);
+void kfd_cleanup_processes(void);
+struct kfd_process *kfd_create_process(struct task_struct *thread);
+struct kfd_process *kfd_get_process(const struct task_struct *task);
+struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid);
+struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
+
+int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id);
+int kfd_process_gpuid_from_node(struct kfd_process *p, struct kfd_node *node,
+				uint32_t *gpuid, uint32_t *gpuidx);
+static inline int kfd_process_gpuid_from_gpuidx(struct kfd_process *p,
+				uint32_t gpuidx, uint32_t *gpuid) {
+	return gpuidx < p->n_pdds ? p->pdds[gpuidx]->dev->id : -EINVAL;
+}
+static inline struct kfd_process_device *kfd_process_device_from_gpuidx(
+				struct kfd_process *p, uint32_t gpuidx) {
+	return gpuidx < p->n_pdds ? p->pdds[gpuidx] : NULL;
+}
+
+void kfd_unref_process(struct kfd_process *p);
+int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger);
+int kfd_process_restore_queues(struct kfd_process *p);
+void kfd_suspend_all_processes(void);
+int kfd_resume_all_processes(void);
+
+struct kfd_process_device *kfd_process_device_data_by_id(struct kfd_process *process,
+							 uint32_t gpu_id);
+
+int kfd_process_get_user_gpu_id(struct kfd_process *p, uint32_t actual_gpu_id);
+
+int kfd_process_device_init_vm(struct kfd_process_device *pdd,
+			       struct file *drm_file);
+struct kfd_process_device *kfd_bind_process_to_device(struct kfd_node *dev,
+						struct kfd_process *p);
+struct kfd_process_device *kfd_get_process_device_data(struct kfd_node *dev,
+							struct kfd_process *p);
+struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
+							struct kfd_process *p);
+
+bool kfd_process_xnack_mode(struct kfd_process *p, bool supported);
+
+int kfd_reserved_mem_mmap(struct kfd_node *dev, struct kfd_process *process,
+			  struct vm_area_struct *vma);
+
+/* KFD process API for creating and translating handles */
+int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd,
+					void *mem);
+void *kfd_process_device_translate_handle(struct kfd_process_device *p,
+					int handle);
+void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
+					int handle);
+struct kfd_process *kfd_lookup_process_by_pid(struct pid *pid);
+
+/* PASIDs */
+int kfd_pasid_init(void);
+void kfd_pasid_exit(void);
+bool kfd_set_pasid_limit(unsigned int new_limit);
+unsigned int kfd_get_pasid_limit(void);
+u32 kfd_pasid_alloc(void);
+void kfd_pasid_free(u32 pasid);
+
+/* Doorbells */
+size_t kfd_doorbell_process_slice(struct kfd_dev *kfd);
+int kfd_doorbell_init(struct kfd_dev *kfd);
+void kfd_doorbell_fini(struct kfd_dev *kfd);
+int kfd_doorbell_mmap(struct kfd_node *dev, struct kfd_process *process,
+		      struct vm_area_struct *vma);
+void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
+					unsigned int *doorbell_off);
+void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr);
+u32 read_kernel_doorbell(u32 __iomem *db);
+void write_kernel_doorbell(void __iomem *db, u32 value);
+void write_kernel_doorbell64(void __iomem *db, u64 value);
+unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
+					struct kfd_process_device *pdd,
+					unsigned int doorbell_id);
+phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd);
+int kfd_alloc_process_doorbells(struct kfd_dev *kfd,
+				struct kfd_process_device *pdd);
+void kfd_free_process_doorbells(struct kfd_dev *kfd,
+				struct kfd_process_device *pdd);
+/* GTT Sub-Allocator */
+
+int kfd_gtt_sa_allocate(struct kfd_node *node, unsigned int size,
+			struct kfd_mem_obj **mem_obj);
+
+int kfd_gtt_sa_free(struct kfd_node *node, struct kfd_mem_obj *mem_obj);
+
+extern struct device *kfd_device;
+
+/* KFD's procfs */
+void kfd_procfs_init(void);
+void kfd_procfs_shutdown(void);
+int kfd_procfs_add_queue(struct queue *q);
+void kfd_procfs_del_queue(struct queue *q);
+
+/* Topology */
+int kfd_topology_init(void);
+void kfd_topology_shutdown(void);
+int kfd_topology_add_device(struct kfd_node *gpu);
+int kfd_topology_remove_device(struct kfd_node *gpu);
+struct kfd_topology_device *kfd_topology_device_by_proximity_domain(
+						uint32_t proximity_domain);
+struct kfd_topology_device *kfd_topology_device_by_proximity_domain_no_lock(
+						uint32_t proximity_domain);
+struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id);
+struct kfd_node *kfd_device_by_id(uint32_t gpu_id);
+struct kfd_node *kfd_device_by_pci_dev(const struct pci_dev *pdev);
+static inline bool kfd_irq_is_from_node(struct kfd_node *node, uint32_t node_id,
+					uint32_t vmid)
+{
+	return (node->interrupt_bitmap & (1 << node_id)) != 0 &&
+	       (node->compute_vmid_bitmap & (1 << vmid)) != 0;
+}
+static inline struct kfd_node *kfd_node_by_irq_ids(struct amdgpu_device *adev,
+					uint32_t node_id, uint32_t vmid) {
+	struct kfd_dev *dev = adev->kfd.dev;
+	uint32_t i;
+
+	if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3))
+		return dev->nodes[0];
+
+	for (i = 0; i < dev->num_nodes; i++)
+		if (kfd_irq_is_from_node(dev->nodes[i], node_id, vmid))
+			return dev->nodes[i];
+
+	return NULL;
+}
+int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_node **kdev);
+int kfd_numa_node_to_apic_id(int numa_node_id);
+
+/* Interrupts */
+#define	KFD_IRQ_FENCE_CLIENTID	0xff
+#define	KFD_IRQ_FENCE_SOURCEID	0xff
+#define	KFD_IRQ_IS_FENCE(client, source)				\
+				((client) == KFD_IRQ_FENCE_CLIENTID &&	\
+				(source) == KFD_IRQ_FENCE_SOURCEID)
+int kfd_interrupt_init(struct kfd_node *dev);
+void kfd_interrupt_exit(struct kfd_node *dev);
+bool enqueue_ih_ring_entry(struct kfd_node *kfd, const void *ih_ring_entry);
+bool interrupt_is_wanted(struct kfd_node *dev,
+				const uint32_t *ih_ring_entry,
+				uint32_t *patched_ihre, bool *flag);
+int kfd_process_drain_interrupts(struct kfd_process_device *pdd);
+void kfd_process_close_interrupt_drain(unsigned int pasid);
+
+/* amdkfd Apertures */
+int kfd_init_apertures(struct kfd_process *process);
+
+void kfd_process_set_trap_handler(struct qcm_process_device *qpd,
+				  uint64_t tba_addr,
+				  uint64_t tma_addr);
+void kfd_process_set_trap_debug_flag(struct qcm_process_device *qpd,
+				     bool enabled);
+
+/* CWSR initialization */
+int kfd_process_init_cwsr_apu(struct kfd_process *process, struct file *filep);
+
+/* CRIU */
+/*
+ * Need to increment KFD_CRIU_PRIV_VERSION each time a change is made to any of the CRIU private
+ * structures:
+ * kfd_criu_process_priv_data
+ * kfd_criu_device_priv_data
+ * kfd_criu_bo_priv_data
+ * kfd_criu_queue_priv_data
+ * kfd_criu_event_priv_data
+ * kfd_criu_svm_range_priv_data
+ */
+
+#define KFD_CRIU_PRIV_VERSION 1
+
+struct kfd_criu_process_priv_data {
+	uint32_t version;
+	uint32_t xnack_mode;
+};
+
+struct kfd_criu_device_priv_data {
+	/* For future use */
+	uint64_t reserved;
+};
+
+struct kfd_criu_bo_priv_data {
+	uint64_t user_addr;
+	uint32_t idr_handle;
+	uint32_t mapped_gpuids[MAX_GPU_INSTANCE];
+};
+
+/*
+ * The first 4 bytes of kfd_criu_queue_priv_data, kfd_criu_event_priv_data,
+ * kfd_criu_svm_range_priv_data is the object type
+ */
+enum kfd_criu_object_type {
+	KFD_CRIU_OBJECT_TYPE_QUEUE,
+	KFD_CRIU_OBJECT_TYPE_EVENT,
+	KFD_CRIU_OBJECT_TYPE_SVM_RANGE,
+};
+
+struct kfd_criu_svm_range_priv_data {
+	uint32_t object_type;
+	uint64_t start_addr;
+	uint64_t size;
+	/* Variable length array of attributes */
+	struct kfd_ioctl_svm_attribute attrs[];
+};
+
+struct kfd_criu_queue_priv_data {
+	uint32_t object_type;
+	uint64_t q_address;
+	uint64_t q_size;
+	uint64_t read_ptr_addr;
+	uint64_t write_ptr_addr;
+	uint64_t doorbell_off;
+	uint64_t eop_ring_buffer_address;
+	uint64_t ctx_save_restore_area_address;
+	uint32_t gpu_id;
+	uint32_t type;
+	uint32_t format;
+	uint32_t q_id;
+	uint32_t priority;
+	uint32_t q_percent;
+	uint32_t doorbell_id;
+	uint32_t gws;
+	uint32_t sdma_id;
+	uint32_t eop_ring_buffer_size;
+	uint32_t ctx_save_restore_area_size;
+	uint32_t ctl_stack_size;
+	uint32_t mqd_size;
+};
+
+struct kfd_criu_event_priv_data {
+	uint32_t object_type;
+	uint64_t user_handle;
+	uint32_t event_id;
+	uint32_t auto_reset;
+	uint32_t type;
+	uint32_t signaled;
+
+	union {
+		struct kfd_hsa_memory_exception_data memory_exception_data;
+		struct kfd_hsa_hw_exception_data hw_exception_data;
+	};
+};
+
+int kfd_process_get_queue_info(struct kfd_process *p,
+			       uint32_t *num_queues,
+			       uint64_t *priv_data_sizes);
+
+int kfd_criu_checkpoint_queues(struct kfd_process *p,
+			 uint8_t __user *user_priv_data,
+			 uint64_t *priv_data_offset);
+
+int kfd_criu_restore_queue(struct kfd_process *p,
+			   uint8_t __user *user_priv_data,
+			   uint64_t *priv_data_offset,
+			   uint64_t max_priv_data_size);
+
+int kfd_criu_checkpoint_events(struct kfd_process *p,
+			 uint8_t __user *user_priv_data,
+			 uint64_t *priv_data_offset);
+
+int kfd_criu_restore_event(struct file *devkfd,
+			   struct kfd_process *p,
+			   uint8_t __user *user_priv_data,
+			   uint64_t *priv_data_offset,
+			   uint64_t max_priv_data_size);
+/* CRIU - End */
+
+/* Queue Context Management */
+int init_queue(struct queue **q, const struct queue_properties *properties);
+void uninit_queue(struct queue *q);
+void print_queue_properties(struct queue_properties *q);
+void print_queue(struct queue *q);
+
+struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
+		struct kfd_node *dev);
+struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
+		struct kfd_node *dev);
+struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
+		struct kfd_node *dev);
+struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
+		struct kfd_node *dev);
+struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type,
+		struct kfd_node *dev);
+struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev);
+void device_queue_manager_uninit(struct device_queue_manager *dqm);
+struct kernel_queue *kernel_queue_init(struct kfd_node *dev,
+					enum kfd_queue_type type);
+void kernel_queue_uninit(struct kernel_queue *kq, bool hanging);
+int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid);
+
+/* Process Queue Manager */
+struct process_queue_node {
+	struct queue *q;
+	struct kernel_queue *kq;
+	struct list_head process_queue_list;
+};
+
+void kfd_process_dequeue_from_device(struct kfd_process_device *pdd);
+void kfd_process_dequeue_from_all_devices(struct kfd_process *p);
+int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p);
+void pqm_uninit(struct process_queue_manager *pqm);
+int pqm_create_queue(struct process_queue_manager *pqm,
+			    struct kfd_node *dev,
+			    struct file *f,
+			    struct queue_properties *properties,
+			    unsigned int *qid,
+			    struct amdgpu_bo *wptr_bo,
+			    const struct kfd_criu_queue_priv_data *q_data,
+			    const void *restore_mqd,
+			    const void *restore_ctl_stack,
+			    uint32_t *p_doorbell_offset_in_process);
+int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid);
+int pqm_update_queue_properties(struct process_queue_manager *pqm, unsigned int qid,
+			struct queue_properties *p);
+int pqm_update_mqd(struct process_queue_manager *pqm, unsigned int qid,
+			struct mqd_update_info *minfo);
+int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
+			void *gws);
+struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm,
+						unsigned int qid);
+struct queue *pqm_get_user_queue(struct process_queue_manager *pqm,
+						unsigned int qid);
+int pqm_get_wave_state(struct process_queue_manager *pqm,
+		       unsigned int qid,
+		       void __user *ctl_stack,
+		       u32 *ctl_stack_used_size,
+		       u32 *save_area_used_size);
+int pqm_get_queue_snapshot(struct process_queue_manager *pqm,
+			   uint64_t exception_clear_mask,
+			   void __user *buf,
+			   int *num_qss_entries,
+			   uint32_t *entry_size);
+
+int amdkfd_fence_wait_timeout(uint64_t *fence_addr,
+			      uint64_t fence_value,
+			      unsigned int timeout_ms);
+
+int pqm_get_queue_checkpoint_info(struct process_queue_manager *pqm,
+				  unsigned int qid,
+				  u32 *mqd_size,
+				  u32 *ctl_stack_size);
+/* Packet Manager */
+
+#define KFD_FENCE_COMPLETED (100)
+#define KFD_FENCE_INIT   (10)
+
+struct packet_manager {
+	struct device_queue_manager *dqm;
+	struct kernel_queue *priv_queue;
+	struct mutex lock;
+	bool allocated;
+	struct kfd_mem_obj *ib_buffer_obj;
+	unsigned int ib_size_bytes;
+	bool is_over_subscription;
+
+	const struct packet_manager_funcs *pmf;
+};
+
+struct packet_manager_funcs {
+	/* Support ASIC-specific packet formats for PM4 packets */
+	int (*map_process)(struct packet_manager *pm, uint32_t *buffer,
+			struct qcm_process_device *qpd);
+	int (*runlist)(struct packet_manager *pm, uint32_t *buffer,
+			uint64_t ib, size_t ib_size_in_dwords, bool chain);
+	int (*set_resources)(struct packet_manager *pm, uint32_t *buffer,
+			struct scheduling_resources *res);
+	int (*map_queues)(struct packet_manager *pm, uint32_t *buffer,
+			struct queue *q, bool is_static);
+	int (*unmap_queues)(struct packet_manager *pm, uint32_t *buffer,
+			enum kfd_unmap_queues_filter mode,
+			uint32_t filter_param, bool reset);
+	int (*set_grace_period)(struct packet_manager *pm, uint32_t *buffer,
+			uint32_t grace_period);
+	int (*query_status)(struct packet_manager *pm, uint32_t *buffer,
+			uint64_t fence_address,	uint64_t fence_value);
+	int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer);
+
+	/* Packet sizes */
+	int map_process_size;
+	int runlist_size;
+	int set_resources_size;
+	int map_queues_size;
+	int unmap_queues_size;
+	int set_grace_period_size;
+	int query_status_size;
+	int release_mem_size;
+};
+
+extern const struct packet_manager_funcs kfd_vi_pm_funcs;
+extern const struct packet_manager_funcs kfd_v9_pm_funcs;
+extern const struct packet_manager_funcs kfd_aldebaran_pm_funcs;
+
+int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm);
+void pm_uninit(struct packet_manager *pm, bool hanging);
+int pm_send_set_resources(struct packet_manager *pm,
+				struct scheduling_resources *res);
+int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues);
+int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
+				uint64_t fence_value);
+
+int pm_send_unmap_queue(struct packet_manager *pm,
+			enum kfd_unmap_queues_filter mode,
+			uint32_t filter_param, bool reset);
+
+void pm_release_ib(struct packet_manager *pm);
+
+int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period);
+
+/* Following PM funcs can be shared among VI and AI */
+unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size);
+
+uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
+
+/* Events */
+extern const struct kfd_event_interrupt_class event_interrupt_class_cik;
+extern const struct kfd_event_interrupt_class event_interrupt_class_v9;
+extern const struct kfd_event_interrupt_class event_interrupt_class_v9_4_3;
+extern const struct kfd_event_interrupt_class event_interrupt_class_v10;
+extern const struct kfd_event_interrupt_class event_interrupt_class_v11;
+
+extern const struct kfd_device_global_init_class device_global_init_class_cik;
+
+int kfd_event_init_process(struct kfd_process *p);
+void kfd_event_free_process(struct kfd_process *p);
+int kfd_event_mmap(struct kfd_process *process, struct vm_area_struct *vma);
+int kfd_wait_on_events(struct kfd_process *p,
+		       uint32_t num_events, void __user *data,
+		       bool all, uint32_t *user_timeout_ms,
+		       uint32_t *wait_result);
+void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
+				uint32_t valid_id_bits);
+void kfd_signal_hw_exception_event(u32 pasid);
+int kfd_set_event(struct kfd_process *p, uint32_t event_id);
+int kfd_reset_event(struct kfd_process *p, uint32_t event_id);
+int kfd_kmap_event_page(struct kfd_process *p, uint64_t event_page_offset);
+
+int kfd_event_create(struct file *devkfd, struct kfd_process *p,
+		     uint32_t event_type, bool auto_reset, uint32_t node_id,
+		     uint32_t *event_id, uint32_t *event_trigger_data,
+		     uint64_t *event_page_offset, uint32_t *event_slot_index);
+
+int kfd_get_num_events(struct kfd_process *p);
+int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
+
+void kfd_signal_vm_fault_event(struct kfd_node *dev, u32 pasid,
+				struct kfd_vm_fault_info *info,
+				struct kfd_hsa_memory_exception_data *data);
+
+void kfd_signal_reset_event(struct kfd_node *dev);
+
+void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid);
+
+void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type);
+
+static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev)
+{
+	return KFD_GC_VERSION(dev) > IP_VERSION(9, 4, 2) ||
+	       (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 1) && dev->sdma_fw_version >= 18) ||
+	       KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 0);
+}
+
+int kfd_send_exception_to_runtime(struct kfd_process *p,
+				unsigned int queue_id,
+				uint64_t error_reason);
+bool kfd_is_locked(void);
+
+/* Compute profile */
+void kfd_inc_compute_active(struct kfd_node *dev);
+void kfd_dec_compute_active(struct kfd_node *dev);
+
+/* Cgroup Support */
+/* Check with device cgroup if @kfd device is accessible */
+static inline int kfd_devcgroup_check_permission(struct kfd_node *kfd)
+{
+#if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF)
+	struct drm_device *ddev = adev_to_drm(kfd->adev);
+
+	return devcgroup_check_permission(DEVCG_DEV_CHAR, DRM_MAJOR,
+					  ddev->render->index,
+					  DEVCG_ACC_WRITE | DEVCG_ACC_READ);
+#else
+	return 0;
+#endif
+}
+
+static inline bool kfd_is_first_node(struct kfd_node *node)
+{
+	return (node == node->kfd->nodes[0]);
+}
+
+/* Debugfs */
+#if defined(CONFIG_DEBUG_FS)
+
+void kfd_debugfs_init(void);
+void kfd_debugfs_fini(void);
+int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data);
+int pqm_debugfs_mqds(struct seq_file *m, void *data);
+int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data);
+int dqm_debugfs_hqds(struct seq_file *m, void *data);
+int kfd_debugfs_rls_by_device(struct seq_file *m, void *data);
+int pm_debugfs_runlist(struct seq_file *m, void *data);
+
+int kfd_debugfs_hang_hws(struct kfd_node *dev);
+int pm_debugfs_hang_hws(struct packet_manager *pm);
+int dqm_debugfs_hang_hws(struct device_queue_manager *dqm);
+
+#else
+
+static inline void kfd_debugfs_init(void) {}
+static inline void kfd_debugfs_fini(void) {}
+
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
new file mode 100644
index 000000000..fbf053001
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -0,0 +1,2268 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/mutex.h>
+#include <linux/log2.h>
+#include <linux/sched.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/task.h>
+#include <linux/mmu_context.h>
+#include <linux/slab.h>
+#include <linux/notifier.h>
+#include <linux/compat.h>
+#include <linux/mman.h>
+#include <linux/file.h>
+#include <linux/pm_runtime.h>
+#include "amdgpu_amdkfd.h"
+#include "amdgpu.h"
+
+struct mm_struct;
+
+#include "kfd_priv.h"
+#include "kfd_device_queue_manager.h"
+#include "kfd_svm.h"
+#include "kfd_smi_events.h"
+#include "kfd_debug.h"
+
+/*
+ * List of struct kfd_process (field kfd_process).
+ * Unique/indexed by mm_struct*
+ */
+DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
+DEFINE_MUTEX(kfd_processes_mutex);
+
+DEFINE_SRCU(kfd_processes_srcu);
+
+/* For process termination handling */
+static struct workqueue_struct *kfd_process_wq;
+
+/* Ordered, single-threaded workqueue for restoring evicted
+ * processes. Restoring multiple processes concurrently under memory
+ * pressure can lead to processes blocking each other from validating
+ * their BOs and result in a live-lock situation where processes
+ * remain evicted indefinitely.
+ */
+static struct workqueue_struct *kfd_restore_wq;
+
+static struct kfd_process *find_process(const struct task_struct *thread,
+					bool ref);
+static void kfd_process_ref_release(struct kref *ref);
+static struct kfd_process *create_process(const struct task_struct *thread);
+
+static void evict_process_worker(struct work_struct *work);
+static void restore_process_worker(struct work_struct *work);
+
+static void kfd_process_device_destroy_cwsr_dgpu(struct kfd_process_device *pdd);
+
+struct kfd_procfs_tree {
+	struct kobject *kobj;
+};
+
+static struct kfd_procfs_tree procfs;
+
+/*
+ * Structure for SDMA activity tracking
+ */
+struct kfd_sdma_activity_handler_workarea {
+	struct work_struct sdma_activity_work;
+	struct kfd_process_device *pdd;
+	uint64_t sdma_activity_counter;
+};
+
+struct temp_sdma_queue_list {
+	uint64_t __user *rptr;
+	uint64_t sdma_val;
+	unsigned int queue_id;
+	struct list_head list;
+};
+
+static void kfd_sdma_activity_worker(struct work_struct *work)
+{
+	struct kfd_sdma_activity_handler_workarea *workarea;
+	struct kfd_process_device *pdd;
+	uint64_t val;
+	struct mm_struct *mm;
+	struct queue *q;
+	struct qcm_process_device *qpd;
+	struct device_queue_manager *dqm;
+	int ret = 0;
+	struct temp_sdma_queue_list sdma_q_list;
+	struct temp_sdma_queue_list *sdma_q, *next;
+
+	workarea = container_of(work, struct kfd_sdma_activity_handler_workarea,
+				sdma_activity_work);
+
+	pdd = workarea->pdd;
+	if (!pdd)
+		return;
+	dqm = pdd->dev->dqm;
+	qpd = &pdd->qpd;
+	if (!dqm || !qpd)
+		return;
+	/*
+	 * Total SDMA activity is current SDMA activity + past SDMA activity
+	 * Past SDMA count is stored in pdd.
+	 * To get the current activity counters for all active SDMA queues,
+	 * we loop over all SDMA queues and get their counts from user-space.
+	 *
+	 * We cannot call get_user() with dqm_lock held as it can cause
+	 * a circular lock dependency situation. To read the SDMA stats,
+	 * we need to do the following:
+	 *
+	 * 1. Create a temporary list of SDMA queue nodes from the qpd->queues_list,
+	 *    with dqm_lock/dqm_unlock().
+	 * 2. Call get_user() for each node in temporary list without dqm_lock.
+	 *    Save the SDMA count for each node and also add the count to the total
+	 *    SDMA count counter.
+	 *    Its possible, during this step, a few SDMA queue nodes got deleted
+	 *    from the qpd->queues_list.
+	 * 3. Do a second pass over qpd->queues_list to check if any nodes got deleted.
+	 *    If any node got deleted, its SDMA count would be captured in the sdma
+	 *    past activity counter. So subtract the SDMA counter stored in step 2
+	 *    for this node from the total SDMA count.
+	 */
+	INIT_LIST_HEAD(&sdma_q_list.list);
+
+	/*
+	 * Create the temp list of all SDMA queues
+	 */
+	dqm_lock(dqm);
+
+	list_for_each_entry(q, &qpd->queues_list, list) {
+		if ((q->properties.type != KFD_QUEUE_TYPE_SDMA) &&
+		    (q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI))
+			continue;
+
+		sdma_q = kzalloc(sizeof(struct temp_sdma_queue_list), GFP_KERNEL);
+		if (!sdma_q) {
+			dqm_unlock(dqm);
+			goto cleanup;
+		}
+
+		INIT_LIST_HEAD(&sdma_q->list);
+		sdma_q->rptr = (uint64_t __user *)q->properties.read_ptr;
+		sdma_q->queue_id = q->properties.queue_id;
+		list_add_tail(&sdma_q->list, &sdma_q_list.list);
+	}
+
+	/*
+	 * If the temp list is empty, then no SDMA queues nodes were found in
+	 * qpd->queues_list. Return the past activity count as the total sdma
+	 * count
+	 */
+	if (list_empty(&sdma_q_list.list)) {
+		workarea->sdma_activity_counter = pdd->sdma_past_activity_counter;
+		dqm_unlock(dqm);
+		return;
+	}
+
+	dqm_unlock(dqm);
+
+	/*
+	 * Get the usage count for each SDMA queue in temp_list.
+	 */
+	mm = get_task_mm(pdd->process->lead_thread);
+	if (!mm)
+		goto cleanup;
+
+	kthread_use_mm(mm);
+
+	list_for_each_entry(sdma_q, &sdma_q_list.list, list) {
+		val = 0;
+		ret = read_sdma_queue_counter(sdma_q->rptr, &val);
+		if (ret) {
+			pr_debug("Failed to read SDMA queue active counter for queue id: %d",
+				 sdma_q->queue_id);
+		} else {
+			sdma_q->sdma_val = val;
+			workarea->sdma_activity_counter += val;
+		}
+	}
+
+	kthread_unuse_mm(mm);
+	mmput(mm);
+
+	/*
+	 * Do a second iteration over qpd_queues_list to check if any SDMA
+	 * nodes got deleted while fetching SDMA counter.
+	 */
+	dqm_lock(dqm);
+
+	workarea->sdma_activity_counter += pdd->sdma_past_activity_counter;
+
+	list_for_each_entry(q, &qpd->queues_list, list) {
+		if (list_empty(&sdma_q_list.list))
+			break;
+
+		if ((q->properties.type != KFD_QUEUE_TYPE_SDMA) &&
+		    (q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI))
+			continue;
+
+		list_for_each_entry_safe(sdma_q, next, &sdma_q_list.list, list) {
+			if (((uint64_t __user *)q->properties.read_ptr == sdma_q->rptr) &&
+			     (sdma_q->queue_id == q->properties.queue_id)) {
+				list_del(&sdma_q->list);
+				kfree(sdma_q);
+				break;
+			}
+		}
+	}
+
+	dqm_unlock(dqm);
+
+	/*
+	 * If temp list is not empty, it implies some queues got deleted
+	 * from qpd->queues_list during SDMA usage read. Subtract the SDMA
+	 * count for each node from the total SDMA count.
+	 */
+	list_for_each_entry_safe(sdma_q, next, &sdma_q_list.list, list) {
+		workarea->sdma_activity_counter -= sdma_q->sdma_val;
+		list_del(&sdma_q->list);
+		kfree(sdma_q);
+	}
+
+	return;
+
+cleanup:
+	list_for_each_entry_safe(sdma_q, next, &sdma_q_list.list, list) {
+		list_del(&sdma_q->list);
+		kfree(sdma_q);
+	}
+}
+
+/**
+ * kfd_get_cu_occupancy - Collect number of waves in-flight on this device
+ * by current process. Translates acquired wave count into number of compute units
+ * that are occupied.
+ *
+ * @attr: Handle of attribute that allows reporting of wave count. The attribute
+ * handle encapsulates GPU device it is associated with, thereby allowing collection
+ * of waves in flight, etc
+ * @buffer: Handle of user provided buffer updated with wave count
+ *
+ * Return: Number of bytes written to user buffer or an error value
+ */
+static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer)
+{
+	int cu_cnt;
+	int wave_cnt;
+	int max_waves_per_cu;
+	struct kfd_node *dev = NULL;
+	struct kfd_process *proc = NULL;
+	struct kfd_process_device *pdd = NULL;
+
+	pdd = container_of(attr, struct kfd_process_device, attr_cu_occupancy);
+	dev = pdd->dev;
+	if (dev->kfd2kgd->get_cu_occupancy == NULL)
+		return -EINVAL;
+
+	cu_cnt = 0;
+	proc = pdd->process;
+	if (pdd->qpd.queue_count == 0) {
+		pr_debug("Gpu-Id: %d has no active queues for process %d\n",
+			 dev->id, proc->pasid);
+		return snprintf(buffer, PAGE_SIZE, "%d\n", cu_cnt);
+	}
+
+	/* Collect wave count from device if it supports */
+	wave_cnt = 0;
+	max_waves_per_cu = 0;
+	dev->kfd2kgd->get_cu_occupancy(dev->adev, proc->pasid, &wave_cnt,
+			&max_waves_per_cu, 0);
+
+	/* Translate wave count to number of compute units */
+	cu_cnt = (wave_cnt + (max_waves_per_cu - 1)) / max_waves_per_cu;
+	return snprintf(buffer, PAGE_SIZE, "%d\n", cu_cnt);
+}
+
+static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
+			       char *buffer)
+{
+	if (strcmp(attr->name, "pasid") == 0) {
+		struct kfd_process *p = container_of(attr, struct kfd_process,
+						     attr_pasid);
+
+		return snprintf(buffer, PAGE_SIZE, "%d\n", p->pasid);
+	} else if (strncmp(attr->name, "vram_", 5) == 0) {
+		struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
+							      attr_vram);
+		return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage));
+	} else if (strncmp(attr->name, "sdma_", 5) == 0) {
+		struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
+							      attr_sdma);
+		struct kfd_sdma_activity_handler_workarea sdma_activity_work_handler;
+
+		INIT_WORK(&sdma_activity_work_handler.sdma_activity_work,
+					kfd_sdma_activity_worker);
+
+		sdma_activity_work_handler.pdd = pdd;
+		sdma_activity_work_handler.sdma_activity_counter = 0;
+
+		schedule_work(&sdma_activity_work_handler.sdma_activity_work);
+
+		flush_work(&sdma_activity_work_handler.sdma_activity_work);
+
+		return snprintf(buffer, PAGE_SIZE, "%llu\n",
+				(sdma_activity_work_handler.sdma_activity_counter)/
+				 SDMA_ACTIVITY_DIVISOR);
+	} else {
+		pr_err("Invalid attribute");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void kfd_procfs_kobj_release(struct kobject *kobj)
+{
+	kfree(kobj);
+}
+
+static const struct sysfs_ops kfd_procfs_ops = {
+	.show = kfd_procfs_show,
+};
+
+static const struct kobj_type procfs_type = {
+	.release = kfd_procfs_kobj_release,
+	.sysfs_ops = &kfd_procfs_ops,
+};
+
+void kfd_procfs_init(void)
+{
+	int ret = 0;
+
+	procfs.kobj = kfd_alloc_struct(procfs.kobj);
+	if (!procfs.kobj)
+		return;
+
+	ret = kobject_init_and_add(procfs.kobj, &procfs_type,
+				   &kfd_device->kobj, "proc");
+	if (ret) {
+		pr_warn("Could not create procfs proc folder");
+		/* If we fail to create the procfs, clean up */
+		kfd_procfs_shutdown();
+	}
+}
+
+void kfd_procfs_shutdown(void)
+{
+	if (procfs.kobj) {
+		kobject_del(procfs.kobj);
+		kobject_put(procfs.kobj);
+		procfs.kobj = NULL;
+	}
+}
+
+static ssize_t kfd_procfs_queue_show(struct kobject *kobj,
+				     struct attribute *attr, char *buffer)
+{
+	struct queue *q = container_of(kobj, struct queue, kobj);
+
+	if (!strcmp(attr->name, "size"))
+		return snprintf(buffer, PAGE_SIZE, "%llu",
+				q->properties.queue_size);
+	else if (!strcmp(attr->name, "type"))
+		return snprintf(buffer, PAGE_SIZE, "%d", q->properties.type);
+	else if (!strcmp(attr->name, "gpuid"))
+		return snprintf(buffer, PAGE_SIZE, "%u", q->device->id);
+	else
+		pr_err("Invalid attribute");
+
+	return 0;
+}
+
+static ssize_t kfd_procfs_stats_show(struct kobject *kobj,
+				     struct attribute *attr, char *buffer)
+{
+	if (strcmp(attr->name, "evicted_ms") == 0) {
+		struct kfd_process_device *pdd = container_of(attr,
+				struct kfd_process_device,
+				attr_evict);
+		uint64_t evict_jiffies;
+
+		evict_jiffies = atomic64_read(&pdd->evict_duration_counter);
+
+		return snprintf(buffer,
+				PAGE_SIZE,
+				"%llu\n",
+				jiffies64_to_msecs(evict_jiffies));
+
+	/* Sysfs handle that gets CU occupancy is per device */
+	} else if (strcmp(attr->name, "cu_occupancy") == 0) {
+		return kfd_get_cu_occupancy(attr, buffer);
+	} else {
+		pr_err("Invalid attribute");
+	}
+
+	return 0;
+}
+
+static ssize_t kfd_sysfs_counters_show(struct kobject *kobj,
+				       struct attribute *attr, char *buf)
+{
+	struct kfd_process_device *pdd;
+
+	if (!strcmp(attr->name, "faults")) {
+		pdd = container_of(attr, struct kfd_process_device,
+				   attr_faults);
+		return sysfs_emit(buf, "%llu\n", READ_ONCE(pdd->faults));
+	}
+	if (!strcmp(attr->name, "page_in")) {
+		pdd = container_of(attr, struct kfd_process_device,
+				   attr_page_in);
+		return sysfs_emit(buf, "%llu\n", READ_ONCE(pdd->page_in));
+	}
+	if (!strcmp(attr->name, "page_out")) {
+		pdd = container_of(attr, struct kfd_process_device,
+				   attr_page_out);
+		return sysfs_emit(buf, "%llu\n", READ_ONCE(pdd->page_out));
+	}
+	return 0;
+}
+
+static struct attribute attr_queue_size = {
+	.name = "size",
+	.mode = KFD_SYSFS_FILE_MODE
+};
+
+static struct attribute attr_queue_type = {
+	.name = "type",
+	.mode = KFD_SYSFS_FILE_MODE
+};
+
+static struct attribute attr_queue_gpuid = {
+	.name = "gpuid",
+	.mode = KFD_SYSFS_FILE_MODE
+};
+
+static struct attribute *procfs_queue_attrs[] = {
+	&attr_queue_size,
+	&attr_queue_type,
+	&attr_queue_gpuid,
+	NULL
+};
+ATTRIBUTE_GROUPS(procfs_queue);
+
+static const struct sysfs_ops procfs_queue_ops = {
+	.show = kfd_procfs_queue_show,
+};
+
+static const struct kobj_type procfs_queue_type = {
+	.sysfs_ops = &procfs_queue_ops,
+	.default_groups = procfs_queue_groups,
+};
+
+static const struct sysfs_ops procfs_stats_ops = {
+	.show = kfd_procfs_stats_show,
+};
+
+static const struct kobj_type procfs_stats_type = {
+	.sysfs_ops = &procfs_stats_ops,
+	.release = kfd_procfs_kobj_release,
+};
+
+static const struct sysfs_ops sysfs_counters_ops = {
+	.show = kfd_sysfs_counters_show,
+};
+
+static const struct kobj_type sysfs_counters_type = {
+	.sysfs_ops = &sysfs_counters_ops,
+	.release = kfd_procfs_kobj_release,
+};
+
+int kfd_procfs_add_queue(struct queue *q)
+{
+	struct kfd_process *proc;
+	int ret;
+
+	if (!q || !q->process)
+		return -EINVAL;
+	proc = q->process;
+
+	/* Create proc/<pid>/queues/<queue id> folder */
+	if (!proc->kobj_queues)
+		return -EFAULT;
+	ret = kobject_init_and_add(&q->kobj, &procfs_queue_type,
+			proc->kobj_queues, "%u", q->properties.queue_id);
+	if (ret < 0) {
+		pr_warn("Creating proc/<pid>/queues/%u failed",
+			q->properties.queue_id);
+		kobject_put(&q->kobj);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void kfd_sysfs_create_file(struct kobject *kobj, struct attribute *attr,
+				 char *name)
+{
+	int ret;
+
+	if (!kobj || !attr || !name)
+		return;
+
+	attr->name = name;
+	attr->mode = KFD_SYSFS_FILE_MODE;
+	sysfs_attr_init(attr);
+
+	ret = sysfs_create_file(kobj, attr);
+	if (ret)
+		pr_warn("Create sysfs %s/%s failed %d", kobj->name, name, ret);
+}
+
+static void kfd_procfs_add_sysfs_stats(struct kfd_process *p)
+{
+	int ret;
+	int i;
+	char stats_dir_filename[MAX_SYSFS_FILENAME_LEN];
+
+	if (!p || !p->kobj)
+		return;
+
+	/*
+	 * Create sysfs files for each GPU:
+	 * - proc/<pid>/stats_<gpuid>/
+	 * - proc/<pid>/stats_<gpuid>/evicted_ms
+	 * - proc/<pid>/stats_<gpuid>/cu_occupancy
+	 */
+	for (i = 0; i < p->n_pdds; i++) {
+		struct kfd_process_device *pdd = p->pdds[i];
+
+		snprintf(stats_dir_filename, MAX_SYSFS_FILENAME_LEN,
+				"stats_%u", pdd->dev->id);
+		pdd->kobj_stats = kfd_alloc_struct(pdd->kobj_stats);
+		if (!pdd->kobj_stats)
+			return;
+
+		ret = kobject_init_and_add(pdd->kobj_stats,
+					   &procfs_stats_type,
+					   p->kobj,
+					   stats_dir_filename);
+
+		if (ret) {
+			pr_warn("Creating KFD proc/stats_%s folder failed",
+				stats_dir_filename);
+			kobject_put(pdd->kobj_stats);
+			pdd->kobj_stats = NULL;
+			return;
+		}
+
+		kfd_sysfs_create_file(pdd->kobj_stats, &pdd->attr_evict,
+				      "evicted_ms");
+		/* Add sysfs file to report compute unit occupancy */
+		if (pdd->dev->kfd2kgd->get_cu_occupancy)
+			kfd_sysfs_create_file(pdd->kobj_stats,
+					      &pdd->attr_cu_occupancy,
+					      "cu_occupancy");
+	}
+}
+
+static void kfd_procfs_add_sysfs_counters(struct kfd_process *p)
+{
+	int ret = 0;
+	int i;
+	char counters_dir_filename[MAX_SYSFS_FILENAME_LEN];
+
+	if (!p || !p->kobj)
+		return;
+
+	/*
+	 * Create sysfs files for each GPU which supports SVM
+	 * - proc/<pid>/counters_<gpuid>/
+	 * - proc/<pid>/counters_<gpuid>/faults
+	 * - proc/<pid>/counters_<gpuid>/page_in
+	 * - proc/<pid>/counters_<gpuid>/page_out
+	 */
+	for_each_set_bit(i, p->svms.bitmap_supported, p->n_pdds) {
+		struct kfd_process_device *pdd = p->pdds[i];
+		struct kobject *kobj_counters;
+
+		snprintf(counters_dir_filename, MAX_SYSFS_FILENAME_LEN,
+			"counters_%u", pdd->dev->id);
+		kobj_counters = kfd_alloc_struct(kobj_counters);
+		if (!kobj_counters)
+			return;
+
+		ret = kobject_init_and_add(kobj_counters, &sysfs_counters_type,
+					   p->kobj, counters_dir_filename);
+		if (ret) {
+			pr_warn("Creating KFD proc/%s folder failed",
+				counters_dir_filename);
+			kobject_put(kobj_counters);
+			return;
+		}
+
+		pdd->kobj_counters = kobj_counters;
+		kfd_sysfs_create_file(kobj_counters, &pdd->attr_faults,
+				      "faults");
+		kfd_sysfs_create_file(kobj_counters, &pdd->attr_page_in,
+				      "page_in");
+		kfd_sysfs_create_file(kobj_counters, &pdd->attr_page_out,
+				      "page_out");
+	}
+}
+
+static void kfd_procfs_add_sysfs_files(struct kfd_process *p)
+{
+	int i;
+
+	if (!p || !p->kobj)
+		return;
+
+	/*
+	 * Create sysfs files for each GPU:
+	 * - proc/<pid>/vram_<gpuid>
+	 * - proc/<pid>/sdma_<gpuid>
+	 */
+	for (i = 0; i < p->n_pdds; i++) {
+		struct kfd_process_device *pdd = p->pdds[i];
+
+		snprintf(pdd->vram_filename, MAX_SYSFS_FILENAME_LEN, "vram_%u",
+			 pdd->dev->id);
+		kfd_sysfs_create_file(p->kobj, &pdd->attr_vram,
+				      pdd->vram_filename);
+
+		snprintf(pdd->sdma_filename, MAX_SYSFS_FILENAME_LEN, "sdma_%u",
+			 pdd->dev->id);
+		kfd_sysfs_create_file(p->kobj, &pdd->attr_sdma,
+					    pdd->sdma_filename);
+	}
+}
+
+void kfd_procfs_del_queue(struct queue *q)
+{
+	if (!q)
+		return;
+
+	kobject_del(&q->kobj);
+	kobject_put(&q->kobj);
+}
+
+int kfd_process_create_wq(void)
+{
+	if (!kfd_process_wq)
+		kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0);
+	if (!kfd_restore_wq)
+		kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq", 0);
+
+	if (!kfd_process_wq || !kfd_restore_wq) {
+		kfd_process_destroy_wq();
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+void kfd_process_destroy_wq(void)
+{
+	if (kfd_process_wq) {
+		destroy_workqueue(kfd_process_wq);
+		kfd_process_wq = NULL;
+	}
+	if (kfd_restore_wq) {
+		destroy_workqueue(kfd_restore_wq);
+		kfd_restore_wq = NULL;
+	}
+}
+
+static void kfd_process_free_gpuvm(struct kgd_mem *mem,
+			struct kfd_process_device *pdd, void **kptr)
+{
+	struct kfd_node *dev = pdd->dev;
+
+	if (kptr && *kptr) {
+		amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem);
+		*kptr = NULL;
+	}
+
+	amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->adev, mem, pdd->drm_priv);
+	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, mem, pdd->drm_priv,
+					       NULL);
+}
+
+/* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process
+ *	This function should be only called right after the process
+ *	is created and when kfd_processes_mutex is still being held
+ *	to avoid concurrency. Because of that exclusiveness, we do
+ *	not need to take p->mutex.
+ */
+static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
+				   uint64_t gpu_va, uint32_t size,
+				   uint32_t flags, struct kgd_mem **mem, void **kptr)
+{
+	struct kfd_node *kdev = pdd->dev;
+	int err;
+
+	err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->adev, gpu_va, size,
+						 pdd->drm_priv, mem, NULL,
+						 flags, false);
+	if (err)
+		goto err_alloc_mem;
+
+	err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->adev, *mem,
+			pdd->drm_priv);
+	if (err)
+		goto err_map_mem;
+
+	err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->adev, *mem, true);
+	if (err) {
+		pr_debug("Sync memory failed, wait interrupted by user signal\n");
+		goto sync_memory_failed;
+	}
+
+	if (kptr) {
+		err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(
+				(struct kgd_mem *)*mem, kptr, NULL);
+		if (err) {
+			pr_debug("Map GTT BO to kernel failed\n");
+			goto sync_memory_failed;
+		}
+	}
+
+	return err;
+
+sync_memory_failed:
+	amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(kdev->adev, *mem, pdd->drm_priv);
+
+err_map_mem:
+	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->adev, *mem, pdd->drm_priv,
+					       NULL);
+err_alloc_mem:
+	*mem = NULL;
+	*kptr = NULL;
+	return err;
+}
+
+/* kfd_process_device_reserve_ib_mem - Reserve memory inside the
+ *	process for IB usage The memory reserved is for KFD to submit
+ *	IB to AMDGPU from kernel.  If the memory is reserved
+ *	successfully, ib_kaddr will have the CPU/kernel
+ *	address. Check ib_kaddr before accessing the memory.
+ */
+static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd)
+{
+	struct qcm_process_device *qpd = &pdd->qpd;
+	uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT |
+			KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE |
+			KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE |
+			KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
+	struct kgd_mem *mem;
+	void *kaddr;
+	int ret;
+
+	if (qpd->ib_kaddr || !qpd->ib_base)
+		return 0;
+
+	/* ib_base is only set for dGPU */
+	ret = kfd_process_alloc_gpuvm(pdd, qpd->ib_base, PAGE_SIZE, flags,
+				      &mem, &kaddr);
+	if (ret)
+		return ret;
+
+	qpd->ib_mem = mem;
+	qpd->ib_kaddr = kaddr;
+
+	return 0;
+}
+
+static void kfd_process_device_destroy_ib_mem(struct kfd_process_device *pdd)
+{
+	struct qcm_process_device *qpd = &pdd->qpd;
+
+	if (!qpd->ib_kaddr || !qpd->ib_base)
+		return;
+
+	kfd_process_free_gpuvm(qpd->ib_mem, pdd, &qpd->ib_kaddr);
+}
+
+struct kfd_process *kfd_create_process(struct task_struct *thread)
+{
+	struct kfd_process *process;
+	int ret;
+
+	if (!(thread->mm && mmget_not_zero(thread->mm)))
+		return ERR_PTR(-EINVAL);
+
+	/* Only the pthreads threading model is supported. */
+	if (thread->group_leader->mm != thread->mm) {
+		mmput(thread->mm);
+		return ERR_PTR(-EINVAL);
+	}
+
+	/*
+	 * take kfd processes mutex before starting of process creation
+	 * so there won't be a case where two threads of the same process
+	 * create two kfd_process structures
+	 */
+	mutex_lock(&kfd_processes_mutex);
+
+	if (kfd_is_locked()) {
+		mutex_unlock(&kfd_processes_mutex);
+		pr_debug("KFD is locked! Cannot create process");
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* A prior open of /dev/kfd could have already created the process. */
+	process = find_process(thread, false);
+	if (process) {
+		pr_debug("Process already found\n");
+	} else {
+		process = create_process(thread);
+		if (IS_ERR(process))
+			goto out;
+
+		if (!procfs.kobj)
+			goto out;
+
+		process->kobj = kfd_alloc_struct(process->kobj);
+		if (!process->kobj) {
+			pr_warn("Creating procfs kobject failed");
+			goto out;
+		}
+		ret = kobject_init_and_add(process->kobj, &procfs_type,
+					   procfs.kobj, "%d",
+					   (int)process->lead_thread->pid);
+		if (ret) {
+			pr_warn("Creating procfs pid directory failed");
+			kobject_put(process->kobj);
+			goto out;
+		}
+
+		kfd_sysfs_create_file(process->kobj, &process->attr_pasid,
+				      "pasid");
+
+		process->kobj_queues = kobject_create_and_add("queues",
+							process->kobj);
+		if (!process->kobj_queues)
+			pr_warn("Creating KFD proc/queues folder failed");
+
+		kfd_procfs_add_sysfs_stats(process);
+		kfd_procfs_add_sysfs_files(process);
+		kfd_procfs_add_sysfs_counters(process);
+
+		init_waitqueue_head(&process->wait_irq_drain);
+	}
+out:
+	if (!IS_ERR(process))
+		kref_get(&process->ref);
+	mutex_unlock(&kfd_processes_mutex);
+	mmput(thread->mm);
+
+	return process;
+}
+
+struct kfd_process *kfd_get_process(const struct task_struct *thread)
+{
+	struct kfd_process *process;
+
+	if (!thread->mm)
+		return ERR_PTR(-EINVAL);
+
+	/* Only the pthreads threading model is supported. */
+	if (thread->group_leader->mm != thread->mm)
+		return ERR_PTR(-EINVAL);
+
+	process = find_process(thread, false);
+	if (!process)
+		return ERR_PTR(-EINVAL);
+
+	return process;
+}
+
+static struct kfd_process *find_process_by_mm(const struct mm_struct *mm)
+{
+	struct kfd_process *process;
+
+	hash_for_each_possible_rcu(kfd_processes_table, process,
+					kfd_processes, (uintptr_t)mm)
+		if (process->mm == mm)
+			return process;
+
+	return NULL;
+}
+
+static struct kfd_process *find_process(const struct task_struct *thread,
+					bool ref)
+{
+	struct kfd_process *p;
+	int idx;
+
+	idx = srcu_read_lock(&kfd_processes_srcu);
+	p = find_process_by_mm(thread->mm);
+	if (p && ref)
+		kref_get(&p->ref);
+	srcu_read_unlock(&kfd_processes_srcu, idx);
+
+	return p;
+}
+
+void kfd_unref_process(struct kfd_process *p)
+{
+	kref_put(&p->ref, kfd_process_ref_release);
+}
+
+/* This increments the process->ref counter. */
+struct kfd_process *kfd_lookup_process_by_pid(struct pid *pid)
+{
+	struct task_struct *task = NULL;
+	struct kfd_process *p    = NULL;
+
+	if (!pid) {
+		task = current;
+		get_task_struct(task);
+	} else {
+		task = get_pid_task(pid, PIDTYPE_PID);
+	}
+
+	if (task) {
+		p = find_process(task, true);
+		put_task_struct(task);
+	}
+
+	return p;
+}
+
+static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
+{
+	struct kfd_process *p = pdd->process;
+	void *mem;
+	int id;
+	int i;
+
+	/*
+	 * Remove all handles from idr and release appropriate
+	 * local memory object
+	 */
+	idr_for_each_entry(&pdd->alloc_idr, mem, id) {
+
+		for (i = 0; i < p->n_pdds; i++) {
+			struct kfd_process_device *peer_pdd = p->pdds[i];
+
+			if (!peer_pdd->drm_priv)
+				continue;
+			amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
+				peer_pdd->dev->adev, mem, peer_pdd->drm_priv);
+		}
+
+		amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, mem,
+						       pdd->drm_priv, NULL);
+		kfd_process_device_remove_obj_handle(pdd, id);
+	}
+}
+
+/*
+ * Just kunmap and unpin signal BO here. It will be freed in
+ * kfd_process_free_outstanding_kfd_bos()
+ */
+static void kfd_process_kunmap_signal_bo(struct kfd_process *p)
+{
+	struct kfd_process_device *pdd;
+	struct kfd_node *kdev;
+	void *mem;
+
+	kdev = kfd_device_by_id(GET_GPU_ID(p->signal_handle));
+	if (!kdev)
+		return;
+
+	mutex_lock(&p->mutex);
+
+	pdd = kfd_get_process_device_data(kdev, p);
+	if (!pdd)
+		goto out;
+
+	mem = kfd_process_device_translate_handle(
+		pdd, GET_IDR_HANDLE(p->signal_handle));
+	if (!mem)
+		goto out;
+
+	amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem);
+
+out:
+	mutex_unlock(&p->mutex);
+}
+
+static void kfd_process_free_outstanding_kfd_bos(struct kfd_process *p)
+{
+	int i;
+
+	for (i = 0; i < p->n_pdds; i++)
+		kfd_process_device_free_bos(p->pdds[i]);
+}
+
+static void kfd_process_destroy_pdds(struct kfd_process *p)
+{
+	int i;
+
+	for (i = 0; i < p->n_pdds; i++) {
+		struct kfd_process_device *pdd = p->pdds[i];
+
+		pr_debug("Releasing pdd (topology id %d) for process (pasid 0x%x)\n",
+				pdd->dev->id, p->pasid);
+
+		kfd_process_device_destroy_cwsr_dgpu(pdd);
+		kfd_process_device_destroy_ib_mem(pdd);
+
+		if (pdd->drm_file) {
+			amdgpu_amdkfd_gpuvm_release_process_vm(
+					pdd->dev->adev, pdd->drm_priv);
+			fput(pdd->drm_file);
+		}
+
+		if (pdd->qpd.cwsr_kaddr && !pdd->qpd.cwsr_base)
+			free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
+				get_order(KFD_CWSR_TBA_TMA_SIZE));
+
+		idr_destroy(&pdd->alloc_idr);
+
+		kfd_free_process_doorbells(pdd->dev->kfd, pdd);
+
+		if (pdd->dev->kfd->shared_resources.enable_mes)
+			amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev,
+						   pdd->proc_ctx_bo);
+		/*
+		 * before destroying pdd, make sure to report availability
+		 * for auto suspend
+		 */
+		if (pdd->runtime_inuse) {
+			pm_runtime_mark_last_busy(adev_to_drm(pdd->dev->adev)->dev);
+			pm_runtime_put_autosuspend(adev_to_drm(pdd->dev->adev)->dev);
+			pdd->runtime_inuse = false;
+		}
+
+		kfree(pdd);
+		p->pdds[i] = NULL;
+	}
+	p->n_pdds = 0;
+}
+
+static void kfd_process_remove_sysfs(struct kfd_process *p)
+{
+	struct kfd_process_device *pdd;
+	int i;
+
+	if (!p->kobj)
+		return;
+
+	sysfs_remove_file(p->kobj, &p->attr_pasid);
+	kobject_del(p->kobj_queues);
+	kobject_put(p->kobj_queues);
+	p->kobj_queues = NULL;
+
+	for (i = 0; i < p->n_pdds; i++) {
+		pdd = p->pdds[i];
+
+		sysfs_remove_file(p->kobj, &pdd->attr_vram);
+		sysfs_remove_file(p->kobj, &pdd->attr_sdma);
+
+		sysfs_remove_file(pdd->kobj_stats, &pdd->attr_evict);
+		if (pdd->dev->kfd2kgd->get_cu_occupancy)
+			sysfs_remove_file(pdd->kobj_stats,
+					  &pdd->attr_cu_occupancy);
+		kobject_del(pdd->kobj_stats);
+		kobject_put(pdd->kobj_stats);
+		pdd->kobj_stats = NULL;
+	}
+
+	for_each_set_bit(i, p->svms.bitmap_supported, p->n_pdds) {
+		pdd = p->pdds[i];
+
+		sysfs_remove_file(pdd->kobj_counters, &pdd->attr_faults);
+		sysfs_remove_file(pdd->kobj_counters, &pdd->attr_page_in);
+		sysfs_remove_file(pdd->kobj_counters, &pdd->attr_page_out);
+		kobject_del(pdd->kobj_counters);
+		kobject_put(pdd->kobj_counters);
+		pdd->kobj_counters = NULL;
+	}
+
+	kobject_del(p->kobj);
+	kobject_put(p->kobj);
+	p->kobj = NULL;
+}
+
+/* No process locking is needed in this function, because the process
+ * is not findable any more. We must assume that no other thread is
+ * using it any more, otherwise we couldn't safely free the process
+ * structure in the end.
+ */
+static void kfd_process_wq_release(struct work_struct *work)
+{
+	struct kfd_process *p = container_of(work, struct kfd_process,
+					     release_work);
+
+	kfd_process_dequeue_from_all_devices(p);
+	pqm_uninit(&p->pqm);
+
+	/* Signal the eviction fence after user mode queues are
+	 * destroyed. This allows any BOs to be freed without
+	 * triggering pointless evictions or waiting for fences.
+	 */
+	dma_fence_signal(p->ef);
+
+	kfd_process_remove_sysfs(p);
+
+	kfd_process_kunmap_signal_bo(p);
+	kfd_process_free_outstanding_kfd_bos(p);
+	svm_range_list_fini(p);
+
+	kfd_process_destroy_pdds(p);
+	dma_fence_put(p->ef);
+
+	kfd_event_free_process(p);
+
+	kfd_pasid_free(p->pasid);
+	mutex_destroy(&p->mutex);
+
+	put_task_struct(p->lead_thread);
+
+	kfree(p);
+}
+
+static void kfd_process_ref_release(struct kref *ref)
+{
+	struct kfd_process *p = container_of(ref, struct kfd_process, ref);
+
+	INIT_WORK(&p->release_work, kfd_process_wq_release);
+	queue_work(kfd_process_wq, &p->release_work);
+}
+
+static struct mmu_notifier *kfd_process_alloc_notifier(struct mm_struct *mm)
+{
+	int idx = srcu_read_lock(&kfd_processes_srcu);
+	struct kfd_process *p = find_process_by_mm(mm);
+
+	srcu_read_unlock(&kfd_processes_srcu, idx);
+
+	return p ? &p->mmu_notifier : ERR_PTR(-ESRCH);
+}
+
+static void kfd_process_free_notifier(struct mmu_notifier *mn)
+{
+	kfd_unref_process(container_of(mn, struct kfd_process, mmu_notifier));
+}
+
+static void kfd_process_notifier_release_internal(struct kfd_process *p)
+{
+	int i;
+
+	cancel_delayed_work_sync(&p->eviction_work);
+	cancel_delayed_work_sync(&p->restore_work);
+
+	for (i = 0; i < p->n_pdds; i++) {
+		struct kfd_process_device *pdd = p->pdds[i];
+
+		/* re-enable GFX OFF since runtime enable with ttmp setup disabled it. */
+		if (!kfd_dbg_is_rlc_restore_supported(pdd->dev) && p->runtime_info.ttmp_setup)
+			amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
+	}
+
+	/* Indicate to other users that MM is no longer valid */
+	p->mm = NULL;
+	kfd_dbg_trap_disable(p);
+
+	if (atomic_read(&p->debugged_process_count) > 0) {
+		struct kfd_process *target;
+		unsigned int temp;
+		int idx = srcu_read_lock(&kfd_processes_srcu);
+
+		hash_for_each_rcu(kfd_processes_table, temp, target, kfd_processes) {
+			if (target->debugger_process && target->debugger_process == p) {
+				mutex_lock_nested(&target->mutex, 1);
+				kfd_dbg_trap_disable(target);
+				mutex_unlock(&target->mutex);
+				if (atomic_read(&p->debugged_process_count) == 0)
+					break;
+			}
+		}
+
+		srcu_read_unlock(&kfd_processes_srcu, idx);
+	}
+
+	mmu_notifier_put(&p->mmu_notifier);
+}
+
+static void kfd_process_notifier_release(struct mmu_notifier *mn,
+					struct mm_struct *mm)
+{
+	struct kfd_process *p;
+
+	/*
+	 * The kfd_process structure can not be free because the
+	 * mmu_notifier srcu is read locked
+	 */
+	p = container_of(mn, struct kfd_process, mmu_notifier);
+	if (WARN_ON(p->mm != mm))
+		return;
+
+	mutex_lock(&kfd_processes_mutex);
+	/*
+	 * Do early return if table is empty.
+	 *
+	 * This could potentially happen if this function is called concurrently
+	 * by mmu_notifier and by kfd_cleanup_pocesses.
+	 *
+	 */
+	if (hash_empty(kfd_processes_table)) {
+		mutex_unlock(&kfd_processes_mutex);
+		return;
+	}
+	hash_del_rcu(&p->kfd_processes);
+	mutex_unlock(&kfd_processes_mutex);
+	synchronize_srcu(&kfd_processes_srcu);
+
+	kfd_process_notifier_release_internal(p);
+}
+
+static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
+	.release = kfd_process_notifier_release,
+	.alloc_notifier = kfd_process_alloc_notifier,
+	.free_notifier = kfd_process_free_notifier,
+};
+
+/*
+ * This code handles the case when driver is being unloaded before all
+ * mm_struct are released.  We need to safely free the kfd_process and
+ * avoid race conditions with mmu_notifier that might try to free them.
+ *
+ */
+void kfd_cleanup_processes(void)
+{
+	struct kfd_process *p;
+	struct hlist_node *p_temp;
+	unsigned int temp;
+	HLIST_HEAD(cleanup_list);
+
+	/*
+	 * Move all remaining kfd_process from the process table to a
+	 * temp list for processing.   Once done, callback from mmu_notifier
+	 * release will not see the kfd_process in the table and do early return,
+	 * avoiding double free issues.
+	 */
+	mutex_lock(&kfd_processes_mutex);
+	hash_for_each_safe(kfd_processes_table, temp, p_temp, p, kfd_processes) {
+		hash_del_rcu(&p->kfd_processes);
+		synchronize_srcu(&kfd_processes_srcu);
+		hlist_add_head(&p->kfd_processes, &cleanup_list);
+	}
+	mutex_unlock(&kfd_processes_mutex);
+
+	hlist_for_each_entry_safe(p, p_temp, &cleanup_list, kfd_processes)
+		kfd_process_notifier_release_internal(p);
+
+	/*
+	 * Ensures that all outstanding free_notifier get called, triggering
+	 * the release of the kfd_process struct.
+	 */
+	mmu_notifier_synchronize();
+}
+
+int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
+{
+	unsigned long  offset;
+	int i;
+
+	if (p->has_cwsr)
+		return 0;
+
+	for (i = 0; i < p->n_pdds; i++) {
+		struct kfd_node *dev = p->pdds[i]->dev;
+		struct qcm_process_device *qpd = &p->pdds[i]->qpd;
+
+		if (!dev->kfd->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
+			continue;
+
+		offset = KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id);
+		qpd->tba_addr = (int64_t)vm_mmap(filep, 0,
+			KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC,
+			MAP_SHARED, offset);
+
+		if (IS_ERR_VALUE(qpd->tba_addr)) {
+			int err = qpd->tba_addr;
+
+			pr_err("Failure to set tba address. error %d.\n", err);
+			qpd->tba_addr = 0;
+			qpd->cwsr_kaddr = NULL;
+			return err;
+		}
+
+		memcpy(qpd->cwsr_kaddr, dev->kfd->cwsr_isa, dev->kfd->cwsr_isa_size);
+
+		kfd_process_set_trap_debug_flag(qpd, p->debug_trap_enabled);
+
+		qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
+		pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
+			qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
+	}
+
+	p->has_cwsr = true;
+
+	return 0;
+}
+
+static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
+{
+	struct kfd_node *dev = pdd->dev;
+	struct qcm_process_device *qpd = &pdd->qpd;
+	uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT
+			| KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE
+			| KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
+	struct kgd_mem *mem;
+	void *kaddr;
+	int ret;
+
+	if (!dev->kfd->cwsr_enabled || qpd->cwsr_kaddr || !qpd->cwsr_base)
+		return 0;
+
+	/* cwsr_base is only set for dGPU */
+	ret = kfd_process_alloc_gpuvm(pdd, qpd->cwsr_base,
+				      KFD_CWSR_TBA_TMA_SIZE, flags, &mem, &kaddr);
+	if (ret)
+		return ret;
+
+	qpd->cwsr_mem = mem;
+	qpd->cwsr_kaddr = kaddr;
+	qpd->tba_addr = qpd->cwsr_base;
+
+	memcpy(qpd->cwsr_kaddr, dev->kfd->cwsr_isa, dev->kfd->cwsr_isa_size);
+
+	kfd_process_set_trap_debug_flag(&pdd->qpd,
+					pdd->process->debug_trap_enabled);
+
+	qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
+	pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
+		 qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
+
+	return 0;
+}
+
+static void kfd_process_device_destroy_cwsr_dgpu(struct kfd_process_device *pdd)
+{
+	struct kfd_node *dev = pdd->dev;
+	struct qcm_process_device *qpd = &pdd->qpd;
+
+	if (!dev->kfd->cwsr_enabled || !qpd->cwsr_kaddr || !qpd->cwsr_base)
+		return;
+
+	kfd_process_free_gpuvm(qpd->cwsr_mem, pdd, &qpd->cwsr_kaddr);
+}
+
+void kfd_process_set_trap_handler(struct qcm_process_device *qpd,
+				  uint64_t tba_addr,
+				  uint64_t tma_addr)
+{
+	if (qpd->cwsr_kaddr) {
+		/* KFD trap handler is bound, record as second-level TBA/TMA
+		 * in first-level TMA. First-level trap will jump to second.
+		 */
+		uint64_t *tma =
+			(uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET);
+		tma[0] = tba_addr;
+		tma[1] = tma_addr;
+	} else {
+		/* No trap handler bound, bind as first-level TBA/TMA. */
+		qpd->tba_addr = tba_addr;
+		qpd->tma_addr = tma_addr;
+	}
+}
+
+bool kfd_process_xnack_mode(struct kfd_process *p, bool supported)
+{
+	int i;
+
+	/* On most GFXv9 GPUs, the retry mode in the SQ must match the
+	 * boot time retry setting. Mixing processes with different
+	 * XNACK/retry settings can hang the GPU.
+	 *
+	 * Different GPUs can have different noretry settings depending
+	 * on HW bugs or limitations. We need to find at least one
+	 * XNACK mode for this process that's compatible with all GPUs.
+	 * Fortunately GPUs with retry enabled (noretry=0) can run code
+	 * built for XNACK-off. On GFXv9 it may perform slower.
+	 *
+	 * Therefore applications built for XNACK-off can always be
+	 * supported and will be our fallback if any GPU does not
+	 * support retry.
+	 */
+	for (i = 0; i < p->n_pdds; i++) {
+		struct kfd_node *dev = p->pdds[i]->dev;
+
+		/* Only consider GFXv9 and higher GPUs. Older GPUs don't
+		 * support the SVM APIs and don't need to be considered
+		 * for the XNACK mode selection.
+		 */
+		if (!KFD_IS_SOC15(dev))
+			continue;
+		/* Aldebaran can always support XNACK because it can support
+		 * per-process XNACK mode selection. But let the dev->noretry
+		 * setting still influence the default XNACK mode.
+		 */
+		if (supported && KFD_SUPPORT_XNACK_PER_PROCESS(dev))
+			continue;
+
+		/* GFXv10 and later GPUs do not support shader preemption
+		 * during page faults. This can lead to poor QoS for queue
+		 * management and memory-manager-related preemptions or
+		 * even deadlocks.
+		 */
+		if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1))
+			return false;
+
+		if (dev->kfd->noretry)
+			return false;
+	}
+
+	return true;
+}
+
+void kfd_process_set_trap_debug_flag(struct qcm_process_device *qpd,
+				     bool enabled)
+{
+	if (qpd->cwsr_kaddr) {
+		uint64_t *tma =
+			(uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET);
+		tma[2] = enabled;
+	}
+}
+
+/*
+ * On return the kfd_process is fully operational and will be freed when the
+ * mm is released
+ */
+static struct kfd_process *create_process(const struct task_struct *thread)
+{
+	struct kfd_process *process;
+	struct mmu_notifier *mn;
+	int err = -ENOMEM;
+
+	process = kzalloc(sizeof(*process), GFP_KERNEL);
+	if (!process)
+		goto err_alloc_process;
+
+	kref_init(&process->ref);
+	mutex_init(&process->mutex);
+	process->mm = thread->mm;
+	process->lead_thread = thread->group_leader;
+	process->n_pdds = 0;
+	process->queues_paused = false;
+	INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
+	INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
+	process->last_restore_timestamp = get_jiffies_64();
+	err = kfd_event_init_process(process);
+	if (err)
+		goto err_event_init;
+	process->is_32bit_user_mode = in_compat_syscall();
+	process->debug_trap_enabled = false;
+	process->debugger_process = NULL;
+	process->exception_enable_mask = 0;
+	atomic_set(&process->debugged_process_count, 0);
+	sema_init(&process->runtime_enable_sema, 0);
+
+	process->pasid = kfd_pasid_alloc();
+	if (process->pasid == 0) {
+		err = -ENOSPC;
+		goto err_alloc_pasid;
+	}
+
+	err = pqm_init(&process->pqm, process);
+	if (err != 0)
+		goto err_process_pqm_init;
+
+	/* init process apertures*/
+	err = kfd_init_apertures(process);
+	if (err != 0)
+		goto err_init_apertures;
+
+	/* Check XNACK support after PDDs are created in kfd_init_apertures */
+	process->xnack_enabled = kfd_process_xnack_mode(process, false);
+
+	err = svm_range_list_init(process);
+	if (err)
+		goto err_init_svm_range_list;
+
+	/* alloc_notifier needs to find the process in the hash table */
+	hash_add_rcu(kfd_processes_table, &process->kfd_processes,
+			(uintptr_t)process->mm);
+
+	/* Avoid free_notifier to start kfd_process_wq_release if
+	 * mmu_notifier_get failed because of pending signal.
+	 */
+	kref_get(&process->ref);
+
+	/* MMU notifier registration must be the last call that can fail
+	 * because after this point we cannot unwind the process creation.
+	 * After this point, mmu_notifier_put will trigger the cleanup by
+	 * dropping the last process reference in the free_notifier.
+	 */
+	mn = mmu_notifier_get(&kfd_process_mmu_notifier_ops, process->mm);
+	if (IS_ERR(mn)) {
+		err = PTR_ERR(mn);
+		goto err_register_notifier;
+	}
+	BUG_ON(mn != &process->mmu_notifier);
+
+	kfd_unref_process(process);
+	get_task_struct(process->lead_thread);
+
+	INIT_WORK(&process->debug_event_workarea, debug_event_write_work_handler);
+
+	return process;
+
+err_register_notifier:
+	hash_del_rcu(&process->kfd_processes);
+	svm_range_list_fini(process);
+err_init_svm_range_list:
+	kfd_process_free_outstanding_kfd_bos(process);
+	kfd_process_destroy_pdds(process);
+err_init_apertures:
+	pqm_uninit(&process->pqm);
+err_process_pqm_init:
+	kfd_pasid_free(process->pasid);
+err_alloc_pasid:
+	kfd_event_free_process(process);
+err_event_init:
+	mutex_destroy(&process->mutex);
+	kfree(process);
+err_alloc_process:
+	return ERR_PTR(err);
+}
+
+struct kfd_process_device *kfd_get_process_device_data(struct kfd_node *dev,
+							struct kfd_process *p)
+{
+	int i;
+
+	for (i = 0; i < p->n_pdds; i++)
+		if (p->pdds[i]->dev == dev)
+			return p->pdds[i];
+
+	return NULL;
+}
+
+struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
+							struct kfd_process *p)
+{
+	struct kfd_process_device *pdd = NULL;
+	int retval = 0;
+
+	if (WARN_ON_ONCE(p->n_pdds >= MAX_GPU_INSTANCE))
+		return NULL;
+	pdd = kzalloc(sizeof(*pdd), GFP_KERNEL);
+	if (!pdd)
+		return NULL;
+
+	pdd->dev = dev;
+	INIT_LIST_HEAD(&pdd->qpd.queues_list);
+	INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
+	pdd->qpd.dqm = dev->dqm;
+	pdd->qpd.pqm = &p->pqm;
+	pdd->qpd.evicted = 0;
+	pdd->qpd.mapped_gws_queue = false;
+	pdd->process = p;
+	pdd->bound = PDD_UNBOUND;
+	pdd->already_dequeued = false;
+	pdd->runtime_inuse = false;
+	pdd->vram_usage = 0;
+	pdd->sdma_past_activity_counter = 0;
+	pdd->user_gpu_id = dev->id;
+	atomic64_set(&pdd->evict_duration_counter, 0);
+
+	if (dev->kfd->shared_resources.enable_mes) {
+		retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
+						AMDGPU_MES_PROC_CTX_SIZE,
+						&pdd->proc_ctx_bo,
+						&pdd->proc_ctx_gpu_addr,
+						&pdd->proc_ctx_cpu_ptr,
+						false);
+		if (retval) {
+			pr_err("failed to allocate process context bo\n");
+			goto err_free_pdd;
+		}
+		memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
+	}
+
+	p->pdds[p->n_pdds++] = pdd;
+	if (kfd_dbg_is_per_vmid_supported(pdd->dev))
+		pdd->spi_dbg_override = pdd->dev->kfd2kgd->disable_debug_trap(
+							pdd->dev->adev,
+							false,
+							0);
+
+	/* Init idr used for memory handle translation */
+	idr_init(&pdd->alloc_idr);
+
+	return pdd;
+
+err_free_pdd:
+	kfree(pdd);
+	return NULL;
+}
+
+/**
+ * kfd_process_device_init_vm - Initialize a VM for a process-device
+ *
+ * @pdd: The process-device
+ * @drm_file: Optional pointer to a DRM file descriptor
+ *
+ * If @drm_file is specified, it will be used to acquire the VM from
+ * that file descriptor. If successful, the @pdd takes ownership of
+ * the file descriptor.
+ *
+ * If @drm_file is NULL, a new VM is created.
+ *
+ * Returns 0 on success, -errno on failure.
+ */
+int kfd_process_device_init_vm(struct kfd_process_device *pdd,
+			       struct file *drm_file)
+{
+	struct amdgpu_fpriv *drv_priv;
+	struct amdgpu_vm *avm;
+	struct kfd_process *p;
+	struct kfd_node *dev;
+	int ret;
+
+	if (!drm_file)
+		return -EINVAL;
+
+	if (pdd->drm_priv)
+		return -EBUSY;
+
+	ret = amdgpu_file_to_fpriv(drm_file, &drv_priv);
+	if (ret)
+		return ret;
+	avm = &drv_priv->vm;
+
+	p = pdd->process;
+	dev = pdd->dev;
+
+	ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(dev->adev, avm,
+						     &p->kgd_process_info,
+						     &p->ef);
+	if (ret) {
+		pr_err("Failed to create process VM object\n");
+		return ret;
+	}
+	pdd->drm_priv = drm_file->private_data;
+	atomic64_set(&pdd->tlb_seq, 0);
+
+	ret = kfd_process_device_reserve_ib_mem(pdd);
+	if (ret)
+		goto err_reserve_ib_mem;
+	ret = kfd_process_device_init_cwsr_dgpu(pdd);
+	if (ret)
+		goto err_init_cwsr;
+
+	ret = amdgpu_amdkfd_gpuvm_set_vm_pasid(dev->adev, avm, p->pasid);
+	if (ret)
+		goto err_set_pasid;
+
+	pdd->drm_file = drm_file;
+
+	return 0;
+
+err_set_pasid:
+	kfd_process_device_destroy_cwsr_dgpu(pdd);
+err_init_cwsr:
+	kfd_process_device_destroy_ib_mem(pdd);
+err_reserve_ib_mem:
+	pdd->drm_priv = NULL;
+	amdgpu_amdkfd_gpuvm_destroy_cb(dev->adev, avm);
+
+	return ret;
+}
+
+/*
+ * Direct the IOMMU to bind the process (specifically the pasid->mm)
+ * to the device.
+ * Unbinding occurs when the process dies or the device is removed.
+ *
+ * Assumes that the process lock is held.
+ */
+struct kfd_process_device *kfd_bind_process_to_device(struct kfd_node *dev,
+							struct kfd_process *p)
+{
+	struct kfd_process_device *pdd;
+	int err;
+
+	pdd = kfd_get_process_device_data(dev, p);
+	if (!pdd) {
+		pr_err("Process device data doesn't exist\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	if (!pdd->drm_priv)
+		return ERR_PTR(-ENODEV);
+
+	/*
+	 * signal runtime-pm system to auto resume and prevent
+	 * further runtime suspend once device pdd is created until
+	 * pdd is destroyed.
+	 */
+	if (!pdd->runtime_inuse) {
+		err = pm_runtime_get_sync(adev_to_drm(dev->adev)->dev);
+		if (err < 0) {
+			pm_runtime_put_autosuspend(adev_to_drm(dev->adev)->dev);
+			return ERR_PTR(err);
+		}
+	}
+
+	/*
+	 * make sure that runtime_usage counter is incremented just once
+	 * per pdd
+	 */
+	pdd->runtime_inuse = true;
+
+	return pdd;
+}
+
+/* Create specific handle mapped to mem from process local memory idr
+ * Assumes that the process lock is held.
+ */
+int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd,
+					void *mem)
+{
+	return idr_alloc(&pdd->alloc_idr, mem, 0, 0, GFP_KERNEL);
+}
+
+/* Translate specific handle from process local memory idr
+ * Assumes that the process lock is held.
+ */
+void *kfd_process_device_translate_handle(struct kfd_process_device *pdd,
+					int handle)
+{
+	if (handle < 0)
+		return NULL;
+
+	return idr_find(&pdd->alloc_idr, handle);
+}
+
+/* Remove specific handle from process local memory idr
+ * Assumes that the process lock is held.
+ */
+void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
+					int handle)
+{
+	if (handle >= 0)
+		idr_remove(&pdd->alloc_idr, handle);
+}
+
+/* This increments the process->ref counter. */
+struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid)
+{
+	struct kfd_process *p, *ret_p = NULL;
+	unsigned int temp;
+
+	int idx = srcu_read_lock(&kfd_processes_srcu);
+
+	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
+		if (p->pasid == pasid) {
+			kref_get(&p->ref);
+			ret_p = p;
+			break;
+		}
+	}
+
+	srcu_read_unlock(&kfd_processes_srcu, idx);
+
+	return ret_p;
+}
+
+/* This increments the process->ref counter. */
+struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm)
+{
+	struct kfd_process *p;
+
+	int idx = srcu_read_lock(&kfd_processes_srcu);
+
+	p = find_process_by_mm(mm);
+	if (p)
+		kref_get(&p->ref);
+
+	srcu_read_unlock(&kfd_processes_srcu, idx);
+
+	return p;
+}
+
+/* kfd_process_evict_queues - Evict all user queues of a process
+ *
+ * Eviction is reference-counted per process-device. This means multiple
+ * evictions from different sources can be nested safely.
+ */
+int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger)
+{
+	int r = 0;
+	int i;
+	unsigned int n_evicted = 0;
+
+	for (i = 0; i < p->n_pdds; i++) {
+		struct kfd_process_device *pdd = p->pdds[i];
+
+		kfd_smi_event_queue_eviction(pdd->dev, p->lead_thread->pid,
+					     trigger);
+
+		r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm,
+							    &pdd->qpd);
+		/* evict return -EIO if HWS is hang or asic is resetting, in this case
+		 * we would like to set all the queues to be in evicted state to prevent
+		 * them been add back since they actually not be saved right now.
+		 */
+		if (r && r != -EIO) {
+			pr_err("Failed to evict process queues\n");
+			goto fail;
+		}
+		n_evicted++;
+	}
+
+	return r;
+
+fail:
+	/* To keep state consistent, roll back partial eviction by
+	 * restoring queues
+	 */
+	for (i = 0; i < p->n_pdds; i++) {
+		struct kfd_process_device *pdd = p->pdds[i];
+
+		if (n_evicted == 0)
+			break;
+
+		kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid);
+
+		if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
+							      &pdd->qpd))
+			pr_err("Failed to restore queues\n");
+
+		n_evicted--;
+	}
+
+	return r;
+}
+
+/* kfd_process_restore_queues - Restore all user queues of a process */
+int kfd_process_restore_queues(struct kfd_process *p)
+{
+	int r, ret = 0;
+	int i;
+
+	for (i = 0; i < p->n_pdds; i++) {
+		struct kfd_process_device *pdd = p->pdds[i];
+
+		kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid);
+
+		r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
+							      &pdd->qpd);
+		if (r) {
+			pr_err("Failed to restore process queues\n");
+			if (!ret)
+				ret = r;
+		}
+	}
+
+	return ret;
+}
+
+int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id)
+{
+	int i;
+
+	for (i = 0; i < p->n_pdds; i++)
+		if (p->pdds[i] && gpu_id == p->pdds[i]->user_gpu_id)
+			return i;
+	return -EINVAL;
+}
+
+int
+kfd_process_gpuid_from_node(struct kfd_process *p, struct kfd_node *node,
+			    uint32_t *gpuid, uint32_t *gpuidx)
+{
+	int i;
+
+	for (i = 0; i < p->n_pdds; i++)
+		if (p->pdds[i] && p->pdds[i]->dev == node) {
+			*gpuid = p->pdds[i]->user_gpu_id;
+			*gpuidx = i;
+			return 0;
+		}
+	return -EINVAL;
+}
+
+static void evict_process_worker(struct work_struct *work)
+{
+	int ret;
+	struct kfd_process *p;
+	struct delayed_work *dwork;
+
+	dwork = to_delayed_work(work);
+
+	/* Process termination destroys this worker thread. So during the
+	 * lifetime of this thread, kfd_process p will be valid
+	 */
+	p = container_of(dwork, struct kfd_process, eviction_work);
+	WARN_ONCE(p->last_eviction_seqno != p->ef->seqno,
+		  "Eviction fence mismatch\n");
+
+	/* Narrow window of overlap between restore and evict work
+	 * item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos
+	 * unreserves KFD BOs, it is possible to evicted again. But
+	 * restore has few more steps of finish. So lets wait for any
+	 * previous restore work to complete
+	 */
+	flush_delayed_work(&p->restore_work);
+
+	pr_debug("Started evicting pasid 0x%x\n", p->pasid);
+	ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_TTM);
+	if (!ret) {
+		dma_fence_signal(p->ef);
+		dma_fence_put(p->ef);
+		p->ef = NULL;
+		queue_delayed_work(kfd_restore_wq, &p->restore_work,
+				msecs_to_jiffies(PROCESS_RESTORE_TIME_MS));
+
+		pr_debug("Finished evicting pasid 0x%x\n", p->pasid);
+	} else
+		pr_err("Failed to evict queues of pasid 0x%x\n", p->pasid);
+}
+
+static void restore_process_worker(struct work_struct *work)
+{
+	struct delayed_work *dwork;
+	struct kfd_process *p;
+	int ret = 0;
+
+	dwork = to_delayed_work(work);
+
+	/* Process termination destroys this worker thread. So during the
+	 * lifetime of this thread, kfd_process p will be valid
+	 */
+	p = container_of(dwork, struct kfd_process, restore_work);
+	pr_debug("Started restoring pasid 0x%x\n", p->pasid);
+
+	/* Setting last_restore_timestamp before successful restoration.
+	 * Otherwise this would have to be set by KGD (restore_process_bos)
+	 * before KFD BOs are unreserved. If not, the process can be evicted
+	 * again before the timestamp is set.
+	 * If restore fails, the timestamp will be set again in the next
+	 * attempt. This would mean that the minimum GPU quanta would be
+	 * PROCESS_ACTIVE_TIME_MS - (time to execute the following two
+	 * functions)
+	 */
+
+	p->last_restore_timestamp = get_jiffies_64();
+	/* VMs may not have been acquired yet during debugging. */
+	if (p->kgd_process_info)
+		ret = amdgpu_amdkfd_gpuvm_restore_process_bos(p->kgd_process_info,
+							     &p->ef);
+	if (ret) {
+		pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d ms\n",
+			 p->pasid, PROCESS_BACK_OFF_TIME_MS);
+		ret = queue_delayed_work(kfd_restore_wq, &p->restore_work,
+				msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS));
+		WARN(!ret, "reschedule restore work failed\n");
+		return;
+	}
+
+	ret = kfd_process_restore_queues(p);
+	if (!ret)
+		pr_debug("Finished restoring pasid 0x%x\n", p->pasid);
+	else
+		pr_err("Failed to restore queues of pasid 0x%x\n", p->pasid);
+}
+
+void kfd_suspend_all_processes(void)
+{
+	struct kfd_process *p;
+	unsigned int temp;
+	int idx = srcu_read_lock(&kfd_processes_srcu);
+
+	WARN(debug_evictions, "Evicting all processes");
+	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
+		cancel_delayed_work_sync(&p->eviction_work);
+		flush_delayed_work(&p->restore_work);
+
+		if (kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_SUSPEND))
+			pr_err("Failed to suspend process 0x%x\n", p->pasid);
+		dma_fence_signal(p->ef);
+		dma_fence_put(p->ef);
+		p->ef = NULL;
+	}
+	srcu_read_unlock(&kfd_processes_srcu, idx);
+}
+
+int kfd_resume_all_processes(void)
+{
+	struct kfd_process *p;
+	unsigned int temp;
+	int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu);
+
+	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
+		if (!queue_delayed_work(kfd_restore_wq, &p->restore_work, 0)) {
+			pr_err("Restore process %d failed during resume\n",
+			       p->pasid);
+			ret = -EFAULT;
+		}
+	}
+	srcu_read_unlock(&kfd_processes_srcu, idx);
+	return ret;
+}
+
+int kfd_reserved_mem_mmap(struct kfd_node *dev, struct kfd_process *process,
+			  struct vm_area_struct *vma)
+{
+	struct kfd_process_device *pdd;
+	struct qcm_process_device *qpd;
+
+	if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) {
+		pr_err("Incorrect CWSR mapping size.\n");
+		return -EINVAL;
+	}
+
+	pdd = kfd_get_process_device_data(dev, process);
+	if (!pdd)
+		return -EINVAL;
+	qpd = &pdd->qpd;
+
+	qpd->cwsr_kaddr = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+					get_order(KFD_CWSR_TBA_TMA_SIZE));
+	if (!qpd->cwsr_kaddr) {
+		pr_err("Error allocating per process CWSR buffer.\n");
+		return -ENOMEM;
+	}
+
+	vm_flags_set(vma, VM_IO | VM_DONTCOPY | VM_DONTEXPAND
+		| VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP);
+	/* Mapping pages to user process */
+	return remap_pfn_range(vma, vma->vm_start,
+			       PFN_DOWN(__pa(qpd->cwsr_kaddr)),
+			       KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
+}
+
+void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type)
+{
+	struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
+	uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm);
+	struct kfd_node *dev = pdd->dev;
+	uint32_t xcc_mask = dev->xcc_mask;
+	int xcc = 0;
+
+	/*
+	 * It can be that we race and lose here, but that is extremely unlikely
+	 * and the worst thing which could happen is that we flush the changes
+	 * into the TLB once more which is harmless.
+	 */
+	if (atomic64_xchg(&pdd->tlb_seq, tlb_seq) == tlb_seq)
+		return;
+
+	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
+		/* Nothing to flush until a VMID is assigned, which
+		 * only happens when the first queue is created.
+		 */
+		if (pdd->qpd.vmid)
+			amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->adev,
+							pdd->qpd.vmid);
+	} else {
+		for_each_inst(xcc, xcc_mask)
+			amdgpu_amdkfd_flush_gpu_tlb_pasid(
+				dev->adev, pdd->process->pasid, type, xcc);
+	}
+}
+
+/* assumes caller holds process lock. */
+int kfd_process_drain_interrupts(struct kfd_process_device *pdd)
+{
+	uint32_t irq_drain_fence[8];
+	uint8_t node_id = 0;
+	int r = 0;
+
+	if (!KFD_IS_SOC15(pdd->dev))
+		return 0;
+
+	pdd->process->irq_drain_is_open = true;
+
+	memset(irq_drain_fence, 0, sizeof(irq_drain_fence));
+	irq_drain_fence[0] = (KFD_IRQ_FENCE_SOURCEID << 8) |
+							KFD_IRQ_FENCE_CLIENTID;
+	irq_drain_fence[3] = pdd->process->pasid;
+
+	/*
+	 * For GFX 9.4.3, send the NodeId also in IH cookie DW[3]
+	 */
+	if (KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 3)) {
+		node_id = ffs(pdd->dev->interrupt_bitmap) - 1;
+		irq_drain_fence[3] |= node_id << 16;
+	}
+
+	/* ensure stale irqs scheduled KFD interrupts and send drain fence. */
+	if (amdgpu_amdkfd_send_close_event_drain_irq(pdd->dev->adev,
+						     irq_drain_fence)) {
+		pdd->process->irq_drain_is_open = false;
+		return 0;
+	}
+
+	r = wait_event_interruptible(pdd->process->wait_irq_drain,
+				     !READ_ONCE(pdd->process->irq_drain_is_open));
+	if (r)
+		pdd->process->irq_drain_is_open = false;
+
+	return r;
+}
+
+void kfd_process_close_interrupt_drain(unsigned int pasid)
+{
+	struct kfd_process *p;
+
+	p = kfd_lookup_process_by_pasid(pasid);
+
+	if (!p)
+		return;
+
+	WRITE_ONCE(p->irq_drain_is_open, false);
+	wake_up_all(&p->wait_irq_drain);
+	kfd_unref_process(p);
+}
+
+struct send_exception_work_handler_workarea {
+	struct work_struct work;
+	struct kfd_process *p;
+	unsigned int queue_id;
+	uint64_t error_reason;
+};
+
+static void send_exception_work_handler(struct work_struct *work)
+{
+	struct send_exception_work_handler_workarea *workarea;
+	struct kfd_process *p;
+	struct queue *q;
+	struct mm_struct *mm;
+	struct kfd_context_save_area_header __user *csa_header;
+	uint64_t __user *err_payload_ptr;
+	uint64_t cur_err;
+	uint32_t ev_id;
+
+	workarea = container_of(work,
+				struct send_exception_work_handler_workarea,
+				work);
+	p = workarea->p;
+
+	mm = get_task_mm(p->lead_thread);
+
+	if (!mm)
+		return;
+
+	kthread_use_mm(mm);
+
+	q = pqm_get_user_queue(&p->pqm, workarea->queue_id);
+
+	if (!q)
+		goto out;
+
+	csa_header = (void __user *)q->properties.ctx_save_restore_area_address;
+
+	get_user(err_payload_ptr, (uint64_t __user **)&csa_header->err_payload_addr);
+	get_user(cur_err, err_payload_ptr);
+	cur_err |= workarea->error_reason;
+	put_user(cur_err, err_payload_ptr);
+	get_user(ev_id, &csa_header->err_event_id);
+
+	kfd_set_event(p, ev_id);
+
+out:
+	kthread_unuse_mm(mm);
+	mmput(mm);
+}
+
+int kfd_send_exception_to_runtime(struct kfd_process *p,
+			unsigned int queue_id,
+			uint64_t error_reason)
+{
+	struct send_exception_work_handler_workarea worker;
+
+	INIT_WORK_ONSTACK(&worker.work, send_exception_work_handler);
+
+	worker.p = p;
+	worker.queue_id = queue_id;
+	worker.error_reason = error_reason;
+
+	schedule_work(&worker.work);
+	flush_work(&worker.work);
+	destroy_work_on_stack(&worker.work);
+
+	return 0;
+}
+
+struct kfd_process_device *kfd_process_device_data_by_id(struct kfd_process *p, uint32_t gpu_id)
+{
+	int i;
+
+	if (gpu_id) {
+		for (i = 0; i < p->n_pdds; i++) {
+			struct kfd_process_device *pdd = p->pdds[i];
+
+			if (pdd->user_gpu_id == gpu_id)
+				return pdd;
+		}
+	}
+	return NULL;
+}
+
+int kfd_process_get_user_gpu_id(struct kfd_process *p, uint32_t actual_gpu_id)
+{
+	int i;
+
+	if (!actual_gpu_id)
+		return 0;
+
+	for (i = 0; i < p->n_pdds; i++) {
+		struct kfd_process_device *pdd = p->pdds[i];
+
+		if (pdd->dev->id == actual_gpu_id)
+			return pdd->user_gpu_id;
+	}
+	return -EINVAL;
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data)
+{
+	struct kfd_process *p;
+	unsigned int temp;
+	int r = 0;
+
+	int idx = srcu_read_lock(&kfd_processes_srcu);
+
+	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
+		seq_printf(m, "Process %d PASID 0x%x:\n",
+			   p->lead_thread->tgid, p->pasid);
+
+		mutex_lock(&p->mutex);
+		r = pqm_debugfs_mqds(m, &p->pqm);
+		mutex_unlock(&p->mutex);
+
+		if (r)
+			break;
+	}
+
+	srcu_read_unlock(&kfd_processes_srcu, idx);
+
+	return r;
+}
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
new file mode 100644
index 000000000..77f493262
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -0,0 +1,1076 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/list.h>
+#include "kfd_device_queue_manager.h"
+#include "kfd_priv.h"
+#include "kfd_kernel_queue.h"
+#include "amdgpu_amdkfd.h"
+
+static inline struct process_queue_node *get_queue_by_qid(
+			struct process_queue_manager *pqm, unsigned int qid)
+{
+	struct process_queue_node *pqn;
+
+	list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
+		if ((pqn->q && pqn->q->properties.queue_id == qid) ||
+		    (pqn->kq && pqn->kq->queue->properties.queue_id == qid))
+			return pqn;
+	}
+
+	return NULL;
+}
+
+static int assign_queue_slot_by_qid(struct process_queue_manager *pqm,
+				    unsigned int qid)
+{
+	if (qid >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
+		return -EINVAL;
+
+	if (__test_and_set_bit(qid, pqm->queue_slot_bitmap)) {
+		pr_err("Cannot create new queue because requested qid(%u) is in use\n", qid);
+		return -ENOSPC;
+	}
+
+	return 0;
+}
+
+static int find_available_queue_slot(struct process_queue_manager *pqm,
+					unsigned int *qid)
+{
+	unsigned long found;
+
+	found = find_first_zero_bit(pqm->queue_slot_bitmap,
+			KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
+
+	pr_debug("The new slot id %lu\n", found);
+
+	if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
+		pr_info("Cannot open more queues for process with pasid 0x%x\n",
+				pqm->process->pasid);
+		return -ENOMEM;
+	}
+
+	set_bit(found, pqm->queue_slot_bitmap);
+	*qid = found;
+
+	return 0;
+}
+
+void kfd_process_dequeue_from_device(struct kfd_process_device *pdd)
+{
+	struct kfd_node *dev = pdd->dev;
+
+	if (pdd->already_dequeued)
+		return;
+
+	dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd);
+	pdd->already_dequeued = true;
+}
+
+int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
+			void *gws)
+{
+	struct kfd_node *dev = NULL;
+	struct process_queue_node *pqn;
+	struct kfd_process_device *pdd;
+	struct kgd_mem *mem = NULL;
+	int ret;
+
+	pqn = get_queue_by_qid(pqm, qid);
+	if (!pqn) {
+		pr_err("Queue id does not match any known queue\n");
+		return -EINVAL;
+	}
+
+	if (pqn->q)
+		dev = pqn->q->device;
+	if (WARN_ON(!dev))
+		return -ENODEV;
+
+	pdd = kfd_get_process_device_data(dev, pqm->process);
+	if (!pdd) {
+		pr_err("Process device data doesn't exist\n");
+		return -EINVAL;
+	}
+
+	/* Only allow one queue per process can have GWS assigned */
+	if (gws && pdd->qpd.num_gws)
+		return -EBUSY;
+
+	if (!gws && pdd->qpd.num_gws == 0)
+		return -EINVAL;
+
+	if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) && !dev->kfd->shared_resources.enable_mes) {
+		if (gws)
+			ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info,
+				gws, &mem);
+		else
+			ret = amdgpu_amdkfd_remove_gws_from_process(pdd->process->kgd_process_info,
+				pqn->q->gws);
+		if (unlikely(ret))
+			return ret;
+		pqn->q->gws = mem;
+	} else {
+		/*
+		 * Intentionally set GWS to a non-NULL value
+		 * for devices that do not use GWS for global wave
+		 * synchronization but require the formality
+		 * of setting GWS for cooperative groups.
+		 */
+		pqn->q->gws = gws ? ERR_PTR(-ENOMEM) : NULL;
+	}
+
+	pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0;
+
+	return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
+							pqn->q, NULL);
+}
+
+void kfd_process_dequeue_from_all_devices(struct kfd_process *p)
+{
+	int i;
+
+	for (i = 0; i < p->n_pdds; i++)
+		kfd_process_dequeue_from_device(p->pdds[i]);
+}
+
+int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p)
+{
+	INIT_LIST_HEAD(&pqm->queues);
+	pqm->queue_slot_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
+					       GFP_KERNEL);
+	if (!pqm->queue_slot_bitmap)
+		return -ENOMEM;
+	pqm->process = p;
+
+	return 0;
+}
+
+static void pqm_clean_queue_resource(struct process_queue_manager *pqm,
+				     struct process_queue_node *pqn)
+{
+	struct kfd_node *dev;
+	struct kfd_process_device *pdd;
+
+	dev = pqn->q->device;
+
+	pdd = kfd_get_process_device_data(dev, pqm->process);
+	if (!pdd) {
+		pr_err("Process device data doesn't exist\n");
+		return;
+	}
+
+	if (pqn->q->gws) {
+		if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
+		    !dev->kfd->shared_resources.enable_mes)
+			amdgpu_amdkfd_remove_gws_from_process(
+				pqm->process->kgd_process_info, pqn->q->gws);
+		pdd->qpd.num_gws = 0;
+	}
+
+	if (dev->kfd->shared_resources.enable_mes) {
+		amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->gang_ctx_bo);
+		if (pqn->q->wptr_bo)
+			amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->wptr_bo);
+	}
+}
+
+void pqm_uninit(struct process_queue_manager *pqm)
+{
+	struct process_queue_node *pqn, *next;
+
+	list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) {
+		if (pqn->q)
+			pqm_clean_queue_resource(pqm, pqn);
+
+		kfd_procfs_del_queue(pqn->q);
+		uninit_queue(pqn->q);
+		list_del(&pqn->process_queue_list);
+		kfree(pqn);
+	}
+
+	bitmap_free(pqm->queue_slot_bitmap);
+	pqm->queue_slot_bitmap = NULL;
+}
+
+static int init_user_queue(struct process_queue_manager *pqm,
+				struct kfd_node *dev, struct queue **q,
+				struct queue_properties *q_properties,
+				struct file *f, struct amdgpu_bo *wptr_bo,
+				unsigned int qid)
+{
+	int retval;
+
+	/* Doorbell initialized in user space*/
+	q_properties->doorbell_ptr = NULL;
+	q_properties->exception_status = KFD_EC_MASK(EC_QUEUE_NEW);
+
+	/* let DQM handle it*/
+	q_properties->vmid = 0;
+	q_properties->queue_id = qid;
+
+	retval = init_queue(q, q_properties);
+	if (retval != 0)
+		return retval;
+
+	(*q)->device = dev;
+	(*q)->process = pqm->process;
+
+	if (dev->kfd->shared_resources.enable_mes) {
+		retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
+						AMDGPU_MES_GANG_CTX_SIZE,
+						&(*q)->gang_ctx_bo,
+						&(*q)->gang_ctx_gpu_addr,
+						&(*q)->gang_ctx_cpu_ptr,
+						false);
+		if (retval) {
+			pr_err("failed to allocate gang context bo\n");
+			goto cleanup;
+		}
+		memset((*q)->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE);
+		(*q)->wptr_bo = wptr_bo;
+	}
+
+	pr_debug("PQM After init queue");
+	return 0;
+
+cleanup:
+	uninit_queue(*q);
+	*q = NULL;
+	return retval;
+}
+
+int pqm_create_queue(struct process_queue_manager *pqm,
+			    struct kfd_node *dev,
+			    struct file *f,
+			    struct queue_properties *properties,
+			    unsigned int *qid,
+			    struct amdgpu_bo *wptr_bo,
+			    const struct kfd_criu_queue_priv_data *q_data,
+			    const void *restore_mqd,
+			    const void *restore_ctl_stack,
+			    uint32_t *p_doorbell_offset_in_process)
+{
+	int retval;
+	struct kfd_process_device *pdd;
+	struct queue *q;
+	struct process_queue_node *pqn;
+	struct kernel_queue *kq;
+	enum kfd_queue_type type = properties->type;
+	unsigned int max_queues = 127; /* HWS limit */
+
+	/*
+	 * On GFX 9.4.3, increase the number of queues that
+	 * can be created to 255. No HWS limit on GFX 9.4.3.
+	 */
+	if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3))
+		max_queues = 255;
+
+	q = NULL;
+	kq = NULL;
+
+	pdd = kfd_get_process_device_data(dev, pqm->process);
+	if (!pdd) {
+		pr_err("Process device data doesn't exist\n");
+		return -1;
+	}
+
+	/*
+	 * for debug process, verify that it is within the static queues limit
+	 * currently limit is set to half of the total avail HQD slots
+	 * If we are just about to create DIQ, the is_debug flag is not set yet
+	 * Hence we also check the type as well
+	 */
+	if ((pdd->qpd.is_debug) || (type == KFD_QUEUE_TYPE_DIQ))
+		max_queues = dev->kfd->device_info.max_no_of_hqd/2;
+
+	if (pdd->qpd.queue_count >= max_queues)
+		return -ENOSPC;
+
+	if (q_data) {
+		retval = assign_queue_slot_by_qid(pqm, q_data->q_id);
+		*qid = q_data->q_id;
+	} else
+		retval = find_available_queue_slot(pqm, qid);
+
+	if (retval != 0)
+		return retval;
+
+	if (list_empty(&pdd->qpd.queues_list) &&
+	    list_empty(&pdd->qpd.priv_queue_list))
+		dev->dqm->ops.register_process(dev->dqm, &pdd->qpd);
+
+	pqn = kzalloc(sizeof(*pqn), GFP_KERNEL);
+	if (!pqn) {
+		retval = -ENOMEM;
+		goto err_allocate_pqn;
+	}
+
+	switch (type) {
+	case KFD_QUEUE_TYPE_SDMA:
+	case KFD_QUEUE_TYPE_SDMA_XGMI:
+		/* SDMA queues are always allocated statically no matter
+		 * which scheduler mode is used. We also do not need to
+		 * check whether a SDMA queue can be allocated here, because
+		 * allocate_sdma_queue() in create_queue() has the
+		 * corresponding check logic.
+		 */
+		retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid);
+		if (retval != 0)
+			goto err_create_queue;
+		pqn->q = q;
+		pqn->kq = NULL;
+		retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, q_data,
+						    restore_mqd, restore_ctl_stack);
+		print_queue(q);
+		break;
+
+	case KFD_QUEUE_TYPE_COMPUTE:
+		/* check if there is over subscription */
+		if ((dev->dqm->sched_policy ==
+		     KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
+		((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) ||
+		(dev->dqm->active_queue_count >= get_cp_queues_num(dev->dqm)))) {
+			pr_debug("Over-subscription is not allowed when amdkfd.sched_policy == 1\n");
+			retval = -EPERM;
+			goto err_create_queue;
+		}
+
+		retval = init_user_queue(pqm, dev, &q, properties, f, wptr_bo, *qid);
+		if (retval != 0)
+			goto err_create_queue;
+		pqn->q = q;
+		pqn->kq = NULL;
+		retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, q_data,
+						    restore_mqd, restore_ctl_stack);
+		print_queue(q);
+		break;
+	case KFD_QUEUE_TYPE_DIQ:
+		kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_DIQ);
+		if (!kq) {
+			retval = -ENOMEM;
+			goto err_create_queue;
+		}
+		kq->queue->properties.queue_id = *qid;
+		pqn->kq = kq;
+		pqn->q = NULL;
+		retval = kfd_process_drain_interrupts(pdd);
+		if (retval)
+			break;
+
+		retval = dev->dqm->ops.create_kernel_queue(dev->dqm,
+							kq, &pdd->qpd);
+		break;
+	default:
+		WARN(1, "Invalid queue type %d", type);
+		retval = -EINVAL;
+	}
+
+	if (retval != 0) {
+		pr_err("Pasid 0x%x DQM create queue type %d failed. ret %d\n",
+			pqm->process->pasid, type, retval);
+		goto err_create_queue;
+	}
+
+	if (q && p_doorbell_offset_in_process) {
+		/* Return the doorbell offset within the doorbell page
+		 * to the caller so it can be passed up to user mode
+		 * (in bytes).
+		 * relative doorbell index = Absolute doorbell index -
+		 * absolute index of first doorbell in the page.
+		 */
+		uint32_t first_db_index = amdgpu_doorbell_index_on_bar(pdd->dev->adev,
+								       pdd->qpd.proc_doorbells,
+								       0,
+								       pdd->dev->kfd->device_info.doorbell_size);
+
+		*p_doorbell_offset_in_process = (q->properties.doorbell_off
+						- first_db_index) * sizeof(uint32_t);
+	}
+
+	pr_debug("PQM After DQM create queue\n");
+
+	list_add(&pqn->process_queue_list, &pqm->queues);
+
+	if (q) {
+		pr_debug("PQM done creating queue\n");
+		kfd_procfs_add_queue(q);
+		print_queue_properties(&q->properties);
+	}
+
+	return retval;
+
+err_create_queue:
+	uninit_queue(q);
+	if (kq)
+		kernel_queue_uninit(kq, false);
+	kfree(pqn);
+err_allocate_pqn:
+	/* check if queues list is empty unregister process from device */
+	clear_bit(*qid, pqm->queue_slot_bitmap);
+	if (list_empty(&pdd->qpd.queues_list) &&
+	    list_empty(&pdd->qpd.priv_queue_list))
+		dev->dqm->ops.unregister_process(dev->dqm, &pdd->qpd);
+	return retval;
+}
+
+int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
+{
+	struct process_queue_node *pqn;
+	struct kfd_process_device *pdd;
+	struct device_queue_manager *dqm;
+	struct kfd_node *dev;
+	int retval;
+
+	dqm = NULL;
+
+	retval = 0;
+
+	pqn = get_queue_by_qid(pqm, qid);
+	if (!pqn) {
+		pr_err("Queue id does not match any known queue\n");
+		return -EINVAL;
+	}
+
+	dev = NULL;
+	if (pqn->kq)
+		dev = pqn->kq->dev;
+	if (pqn->q)
+		dev = pqn->q->device;
+	if (WARN_ON(!dev))
+		return -ENODEV;
+
+	pdd = kfd_get_process_device_data(dev, pqm->process);
+	if (!pdd) {
+		pr_err("Process device data doesn't exist\n");
+		return -1;
+	}
+
+	if (pqn->kq) {
+		/* destroy kernel queue (DIQ) */
+		dqm = pqn->kq->dev->dqm;
+		dqm->ops.destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd);
+		kernel_queue_uninit(pqn->kq, false);
+	}
+
+	if (pqn->q) {
+		kfd_procfs_del_queue(pqn->q);
+		dqm = pqn->q->device->dqm;
+		retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
+		if (retval) {
+			pr_err("Pasid 0x%x destroy queue %d failed, ret %d\n",
+				pqm->process->pasid,
+				pqn->q->properties.queue_id, retval);
+			if (retval != -ETIME)
+				goto err_destroy_queue;
+		}
+
+		pqm_clean_queue_resource(pqm, pqn);
+		uninit_queue(pqn->q);
+	}
+
+	list_del(&pqn->process_queue_list);
+	kfree(pqn);
+	clear_bit(qid, pqm->queue_slot_bitmap);
+
+	if (list_empty(&pdd->qpd.queues_list) &&
+	    list_empty(&pdd->qpd.priv_queue_list))
+		dqm->ops.unregister_process(dqm, &pdd->qpd);
+
+err_destroy_queue:
+	return retval;
+}
+
+int pqm_update_queue_properties(struct process_queue_manager *pqm,
+				unsigned int qid, struct queue_properties *p)
+{
+	int retval;
+	struct process_queue_node *pqn;
+
+	pqn = get_queue_by_qid(pqm, qid);
+	if (!pqn) {
+		pr_debug("No queue %d exists for update operation\n", qid);
+		return -EFAULT;
+	}
+
+	pqn->q->properties.queue_address = p->queue_address;
+	pqn->q->properties.queue_size = p->queue_size;
+	pqn->q->properties.queue_percent = p->queue_percent;
+	pqn->q->properties.priority = p->priority;
+	pqn->q->properties.pm4_target_xcc = p->pm4_target_xcc;
+
+	retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
+							pqn->q, NULL);
+	if (retval != 0)
+		return retval;
+
+	return 0;
+}
+
+int pqm_update_mqd(struct process_queue_manager *pqm,
+				unsigned int qid, struct mqd_update_info *minfo)
+{
+	int retval;
+	struct process_queue_node *pqn;
+
+	pqn = get_queue_by_qid(pqm, qid);
+	if (!pqn) {
+		pr_debug("No queue %d exists for update operation\n", qid);
+		return -EFAULT;
+	}
+
+	/* CUs are masked for debugger requirements so deny user mask  */
+	if (pqn->q->properties.is_dbg_wa && minfo && minfo->cu_mask.ptr)
+		return -EBUSY;
+
+	/* ASICs that have WGPs must enforce pairwise enabled mask checks. */
+	if (minfo && minfo->cu_mask.ptr &&
+			KFD_GC_VERSION(pqn->q->device) >= IP_VERSION(10, 0, 0)) {
+		int i;
+
+		for (i = 0; i < minfo->cu_mask.count; i += 2) {
+			uint32_t cu_pair = (minfo->cu_mask.ptr[i / 32] >> (i % 32)) & 0x3;
+
+			if (cu_pair && cu_pair != 0x3) {
+				pr_debug("CUs must be adjacent pairwise enabled.\n");
+				return -EINVAL;
+			}
+		}
+	}
+
+	retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
+							pqn->q, minfo);
+	if (retval != 0)
+		return retval;
+
+	if (minfo && minfo->cu_mask.ptr)
+		pqn->q->properties.is_user_cu_masked = true;
+
+	return 0;
+}
+
+struct kernel_queue *pqm_get_kernel_queue(
+					struct process_queue_manager *pqm,
+					unsigned int qid)
+{
+	struct process_queue_node *pqn;
+
+	pqn = get_queue_by_qid(pqm, qid);
+	if (pqn && pqn->kq)
+		return pqn->kq;
+
+	return NULL;
+}
+
+struct queue *pqm_get_user_queue(struct process_queue_manager *pqm,
+					unsigned int qid)
+{
+	struct process_queue_node *pqn;
+
+	pqn = get_queue_by_qid(pqm, qid);
+	return pqn ? pqn->q : NULL;
+}
+
+int pqm_get_wave_state(struct process_queue_manager *pqm,
+		       unsigned int qid,
+		       void __user *ctl_stack,
+		       u32 *ctl_stack_used_size,
+		       u32 *save_area_used_size)
+{
+	struct process_queue_node *pqn;
+
+	pqn = get_queue_by_qid(pqm, qid);
+	if (!pqn) {
+		pr_debug("amdkfd: No queue %d exists for operation\n",
+			 qid);
+		return -EFAULT;
+	}
+
+	return pqn->q->device->dqm->ops.get_wave_state(pqn->q->device->dqm,
+						       pqn->q,
+						       ctl_stack,
+						       ctl_stack_used_size,
+						       save_area_used_size);
+}
+
+int pqm_get_queue_snapshot(struct process_queue_manager *pqm,
+			   uint64_t exception_clear_mask,
+			   void __user *buf,
+			   int *num_qss_entries,
+			   uint32_t *entry_size)
+{
+	struct process_queue_node *pqn;
+	struct kfd_queue_snapshot_entry src;
+	uint32_t tmp_entry_size = *entry_size, tmp_qss_entries = *num_qss_entries;
+	int r = 0;
+
+	*num_qss_entries = 0;
+	if (!(*entry_size))
+		return -EINVAL;
+
+	*entry_size = min_t(size_t, *entry_size, sizeof(struct kfd_queue_snapshot_entry));
+	mutex_lock(&pqm->process->event_mutex);
+
+	memset(&src, 0, sizeof(src));
+
+	list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
+		if (!pqn->q)
+			continue;
+
+		if (*num_qss_entries < tmp_qss_entries) {
+			set_queue_snapshot_entry(pqn->q, exception_clear_mask, &src);
+
+			if (copy_to_user(buf, &src, *entry_size)) {
+				r = -EFAULT;
+				break;
+			}
+			buf += tmp_entry_size;
+		}
+		*num_qss_entries += 1;
+	}
+
+	mutex_unlock(&pqm->process->event_mutex);
+	return r;
+}
+
+static int get_queue_data_sizes(struct kfd_process_device *pdd,
+				struct queue *q,
+				uint32_t *mqd_size,
+				uint32_t *ctl_stack_size)
+{
+	int ret;
+
+	ret = pqm_get_queue_checkpoint_info(&pdd->process->pqm,
+					    q->properties.queue_id,
+					    mqd_size,
+					    ctl_stack_size);
+	if (ret)
+		pr_err("Failed to get queue dump info (%d)\n", ret);
+
+	return ret;
+}
+
+int kfd_process_get_queue_info(struct kfd_process *p,
+			       uint32_t *num_queues,
+			       uint64_t *priv_data_sizes)
+{
+	uint32_t extra_data_sizes = 0;
+	struct queue *q;
+	int i;
+	int ret;
+
+	*num_queues = 0;
+
+	/* Run over all PDDs of the process */
+	for (i = 0; i < p->n_pdds; i++) {
+		struct kfd_process_device *pdd = p->pdds[i];
+
+		list_for_each_entry(q, &pdd->qpd.queues_list, list) {
+			if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
+				q->properties.type == KFD_QUEUE_TYPE_SDMA ||
+				q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
+				uint32_t mqd_size, ctl_stack_size;
+
+				*num_queues = *num_queues + 1;
+
+				ret = get_queue_data_sizes(pdd, q, &mqd_size, &ctl_stack_size);
+				if (ret)
+					return ret;
+
+				extra_data_sizes += mqd_size + ctl_stack_size;
+			} else {
+				pr_err("Unsupported queue type (%d)\n", q->properties.type);
+				return -EOPNOTSUPP;
+			}
+		}
+	}
+	*priv_data_sizes = extra_data_sizes +
+				(*num_queues * sizeof(struct kfd_criu_queue_priv_data));
+
+	return 0;
+}
+
+static int pqm_checkpoint_mqd(struct process_queue_manager *pqm,
+			      unsigned int qid,
+			      void *mqd,
+			      void *ctl_stack)
+{
+	struct process_queue_node *pqn;
+
+	pqn = get_queue_by_qid(pqm, qid);
+	if (!pqn) {
+		pr_debug("amdkfd: No queue %d exists for operation\n", qid);
+		return -EFAULT;
+	}
+
+	if (!pqn->q->device->dqm->ops.checkpoint_mqd) {
+		pr_err("amdkfd: queue dumping not supported on this device\n");
+		return -EOPNOTSUPP;
+	}
+
+	return pqn->q->device->dqm->ops.checkpoint_mqd(pqn->q->device->dqm,
+						       pqn->q, mqd, ctl_stack);
+}
+
+static int criu_checkpoint_queue(struct kfd_process_device *pdd,
+			   struct queue *q,
+			   struct kfd_criu_queue_priv_data *q_data)
+{
+	uint8_t *mqd, *ctl_stack;
+	int ret;
+
+	mqd = (void *)(q_data + 1);
+	ctl_stack = mqd + q_data->mqd_size;
+
+	q_data->gpu_id = pdd->user_gpu_id;
+	q_data->type = q->properties.type;
+	q_data->format = q->properties.format;
+	q_data->q_id =  q->properties.queue_id;
+	q_data->q_address = q->properties.queue_address;
+	q_data->q_size = q->properties.queue_size;
+	q_data->priority = q->properties.priority;
+	q_data->q_percent = q->properties.queue_percent;
+	q_data->read_ptr_addr = (uint64_t)q->properties.read_ptr;
+	q_data->write_ptr_addr = (uint64_t)q->properties.write_ptr;
+	q_data->doorbell_id = q->doorbell_id;
+
+	q_data->sdma_id = q->sdma_id;
+
+	q_data->eop_ring_buffer_address =
+		q->properties.eop_ring_buffer_address;
+
+	q_data->eop_ring_buffer_size = q->properties.eop_ring_buffer_size;
+
+	q_data->ctx_save_restore_area_address =
+		q->properties.ctx_save_restore_area_address;
+
+	q_data->ctx_save_restore_area_size =
+		q->properties.ctx_save_restore_area_size;
+
+	q_data->gws = !!q->gws;
+
+	ret = pqm_checkpoint_mqd(&pdd->process->pqm, q->properties.queue_id, mqd, ctl_stack);
+	if (ret) {
+		pr_err("Failed checkpoint queue_mqd (%d)\n", ret);
+		return ret;
+	}
+
+	pr_debug("Dumping Queue: gpu_id:%x queue_id:%u\n", q_data->gpu_id, q_data->q_id);
+	return ret;
+}
+
+static int criu_checkpoint_queues_device(struct kfd_process_device *pdd,
+				   uint8_t __user *user_priv,
+				   unsigned int *q_index,
+				   uint64_t *queues_priv_data_offset)
+{
+	unsigned int q_private_data_size = 0;
+	uint8_t *q_private_data = NULL; /* Local buffer to store individual queue private data */
+	struct queue *q;
+	int ret = 0;
+
+	list_for_each_entry(q, &pdd->qpd.queues_list, list) {
+		struct kfd_criu_queue_priv_data *q_data;
+		uint64_t q_data_size;
+		uint32_t mqd_size;
+		uint32_t ctl_stack_size;
+
+		if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE &&
+			q->properties.type != KFD_QUEUE_TYPE_SDMA &&
+			q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI) {
+
+			pr_err("Unsupported queue type (%d)\n", q->properties.type);
+			ret = -EOPNOTSUPP;
+			break;
+		}
+
+		ret = get_queue_data_sizes(pdd, q, &mqd_size, &ctl_stack_size);
+		if (ret)
+			break;
+
+		q_data_size = sizeof(*q_data) + mqd_size + ctl_stack_size;
+
+		/* Increase local buffer space if needed */
+		if (q_private_data_size < q_data_size) {
+			kfree(q_private_data);
+
+			q_private_data = kzalloc(q_data_size, GFP_KERNEL);
+			if (!q_private_data) {
+				ret = -ENOMEM;
+				break;
+			}
+			q_private_data_size = q_data_size;
+		}
+
+		q_data = (struct kfd_criu_queue_priv_data *)q_private_data;
+
+		/* data stored in this order: priv_data, mqd, ctl_stack */
+		q_data->mqd_size = mqd_size;
+		q_data->ctl_stack_size = ctl_stack_size;
+
+		ret = criu_checkpoint_queue(pdd, q, q_data);
+		if (ret)
+			break;
+
+		q_data->object_type = KFD_CRIU_OBJECT_TYPE_QUEUE;
+
+		ret = copy_to_user(user_priv + *queues_priv_data_offset,
+				q_data, q_data_size);
+		if (ret) {
+			ret = -EFAULT;
+			break;
+		}
+		*queues_priv_data_offset += q_data_size;
+		*q_index = *q_index + 1;
+	}
+
+	kfree(q_private_data);
+
+	return ret;
+}
+
+int kfd_criu_checkpoint_queues(struct kfd_process *p,
+			 uint8_t __user *user_priv_data,
+			 uint64_t *priv_data_offset)
+{
+	int ret = 0, pdd_index, q_index = 0;
+
+	for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) {
+		struct kfd_process_device *pdd = p->pdds[pdd_index];
+
+		/*
+		 * criu_checkpoint_queues_device will copy data to user and update q_index and
+		 * queues_priv_data_offset
+		 */
+		ret = criu_checkpoint_queues_device(pdd, user_priv_data, &q_index,
+					      priv_data_offset);
+
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+static void set_queue_properties_from_criu(struct queue_properties *qp,
+					  struct kfd_criu_queue_priv_data *q_data)
+{
+	qp->is_interop = false;
+	qp->queue_percent = q_data->q_percent;
+	qp->priority = q_data->priority;
+	qp->queue_address = q_data->q_address;
+	qp->queue_size = q_data->q_size;
+	qp->read_ptr = (uint32_t *) q_data->read_ptr_addr;
+	qp->write_ptr = (uint32_t *) q_data->write_ptr_addr;
+	qp->eop_ring_buffer_address = q_data->eop_ring_buffer_address;
+	qp->eop_ring_buffer_size = q_data->eop_ring_buffer_size;
+	qp->ctx_save_restore_area_address = q_data->ctx_save_restore_area_address;
+	qp->ctx_save_restore_area_size = q_data->ctx_save_restore_area_size;
+	qp->ctl_stack_size = q_data->ctl_stack_size;
+	qp->type = q_data->type;
+	qp->format = q_data->format;
+}
+
+int kfd_criu_restore_queue(struct kfd_process *p,
+			   uint8_t __user *user_priv_ptr,
+			   uint64_t *priv_data_offset,
+			   uint64_t max_priv_data_size)
+{
+	uint8_t *mqd, *ctl_stack, *q_extra_data = NULL;
+	struct kfd_criu_queue_priv_data *q_data;
+	struct kfd_process_device *pdd;
+	uint64_t q_extra_data_size;
+	struct queue_properties qp;
+	unsigned int queue_id;
+	int ret = 0;
+
+	if (*priv_data_offset + sizeof(*q_data) > max_priv_data_size)
+		return -EINVAL;
+
+	q_data = kmalloc(sizeof(*q_data), GFP_KERNEL);
+	if (!q_data)
+		return -ENOMEM;
+
+	ret = copy_from_user(q_data, user_priv_ptr + *priv_data_offset, sizeof(*q_data));
+	if (ret) {
+		ret = -EFAULT;
+		goto exit;
+	}
+
+	*priv_data_offset += sizeof(*q_data);
+	q_extra_data_size = (uint64_t)q_data->ctl_stack_size + q_data->mqd_size;
+
+	if (*priv_data_offset + q_extra_data_size > max_priv_data_size) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	q_extra_data = kmalloc(q_extra_data_size, GFP_KERNEL);
+	if (!q_extra_data) {
+		ret = -ENOMEM;
+		goto exit;
+	}
+
+	ret = copy_from_user(q_extra_data, user_priv_ptr + *priv_data_offset, q_extra_data_size);
+	if (ret) {
+		ret = -EFAULT;
+		goto exit;
+	}
+
+	*priv_data_offset += q_extra_data_size;
+
+	pdd = kfd_process_device_data_by_id(p, q_data->gpu_id);
+	if (!pdd) {
+		pr_err("Failed to get pdd\n");
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	/* data stored in this order: mqd, ctl_stack */
+	mqd = q_extra_data;
+	ctl_stack = mqd + q_data->mqd_size;
+
+	memset(&qp, 0, sizeof(qp));
+	set_queue_properties_from_criu(&qp, q_data);
+
+	print_queue_properties(&qp);
+
+	ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, NULL, q_data, mqd, ctl_stack,
+				NULL);
+	if (ret) {
+		pr_err("Failed to create new queue err:%d\n", ret);
+		goto exit;
+	}
+
+	if (q_data->gws)
+		ret = pqm_set_gws(&p->pqm, q_data->q_id, pdd->dev->gws);
+
+exit:
+	if (ret)
+		pr_err("Failed to restore queue (%d)\n", ret);
+	else
+		pr_debug("Queue id %d was restored successfully\n", queue_id);
+
+	kfree(q_data);
+
+	return ret;
+}
+
+int pqm_get_queue_checkpoint_info(struct process_queue_manager *pqm,
+				  unsigned int qid,
+				  uint32_t *mqd_size,
+				  uint32_t *ctl_stack_size)
+{
+	struct process_queue_node *pqn;
+
+	pqn = get_queue_by_qid(pqm, qid);
+	if (!pqn) {
+		pr_debug("amdkfd: No queue %d exists for operation\n", qid);
+		return -EFAULT;
+	}
+
+	if (!pqn->q->device->dqm->ops.get_queue_checkpoint_info) {
+		pr_err("amdkfd: queue dumping not supported on this device\n");
+		return -EOPNOTSUPP;
+	}
+
+	pqn->q->device->dqm->ops.get_queue_checkpoint_info(pqn->q->device->dqm,
+						       pqn->q, mqd_size,
+						       ctl_stack_size);
+	return 0;
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+int pqm_debugfs_mqds(struct seq_file *m, void *data)
+{
+	struct process_queue_manager *pqm = data;
+	struct process_queue_node *pqn;
+	struct queue *q;
+	enum KFD_MQD_TYPE mqd_type;
+	struct mqd_manager *mqd_mgr;
+	int r = 0, xcc, num_xccs = 1;
+	void *mqd;
+	uint64_t size = 0;
+
+	list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
+		if (pqn->q) {
+			q = pqn->q;
+			switch (q->properties.type) {
+			case KFD_QUEUE_TYPE_SDMA:
+			case KFD_QUEUE_TYPE_SDMA_XGMI:
+				seq_printf(m, "  SDMA queue on device %x\n",
+					   q->device->id);
+				mqd_type = KFD_MQD_TYPE_SDMA;
+				break;
+			case KFD_QUEUE_TYPE_COMPUTE:
+				seq_printf(m, "  Compute queue on device %x\n",
+					   q->device->id);
+				mqd_type = KFD_MQD_TYPE_CP;
+				num_xccs = NUM_XCC(q->device->xcc_mask);
+				break;
+			default:
+				seq_printf(m,
+				"  Bad user queue type %d on device %x\n",
+					   q->properties.type, q->device->id);
+				continue;
+			}
+			mqd_mgr = q->device->dqm->mqd_mgrs[mqd_type];
+			size = mqd_mgr->mqd_stride(mqd_mgr,
+							&q->properties);
+		} else if (pqn->kq) {
+			q = pqn->kq->queue;
+			mqd_mgr = pqn->kq->mqd_mgr;
+			switch (q->properties.type) {
+			case KFD_QUEUE_TYPE_DIQ:
+				seq_printf(m, "  DIQ on device %x\n",
+					   pqn->kq->dev->id);
+				break;
+			default:
+				seq_printf(m,
+				"  Bad kernel queue type %d on device %x\n",
+					   q->properties.type,
+					   pqn->kq->dev->id);
+				continue;
+			}
+		} else {
+			seq_printf(m,
+		"  Weird: Queue node with neither kernel nor user queue\n");
+			continue;
+		}
+
+		for (xcc = 0; xcc < num_xccs; xcc++) {
+			mqd = q->mqd + size * xcc;
+			r = mqd_mgr->debugfs_show_mqd(m, mqd);
+			if (r != 0)
+				break;
+		}
+	}
+
+	return r;
+}
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
new file mode 100644
index 000000000..0f6992b18
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/slab.h>
+#include "kfd_priv.h"
+
+void print_queue_properties(struct queue_properties *q)
+{
+	if (!q)
+		return;
+
+	pr_debug("Printing queue properties:\n");
+	pr_debug("Queue Type: %u\n", q->type);
+	pr_debug("Queue Size: %llu\n", q->queue_size);
+	pr_debug("Queue percent: %u\n", q->queue_percent);
+	pr_debug("Queue Address: 0x%llX\n", q->queue_address);
+	pr_debug("Queue Id: %u\n", q->queue_id);
+	pr_debug("Queue Process Vmid: %u\n", q->vmid);
+	pr_debug("Queue Read Pointer: 0x%px\n", q->read_ptr);
+	pr_debug("Queue Write Pointer: 0x%px\n", q->write_ptr);
+	pr_debug("Queue Doorbell Pointer: 0x%p\n", q->doorbell_ptr);
+	pr_debug("Queue Doorbell Offset: %u\n", q->doorbell_off);
+}
+
+void print_queue(struct queue *q)
+{
+	if (!q)
+		return;
+	pr_debug("Printing queue:\n");
+	pr_debug("Queue Type: %u\n", q->properties.type);
+	pr_debug("Queue Size: %llu\n", q->properties.queue_size);
+	pr_debug("Queue percent: %u\n", q->properties.queue_percent);
+	pr_debug("Queue Address: 0x%llX\n", q->properties.queue_address);
+	pr_debug("Queue Id: %u\n", q->properties.queue_id);
+	pr_debug("Queue Process Vmid: %u\n", q->properties.vmid);
+	pr_debug("Queue Read Pointer: 0x%px\n", q->properties.read_ptr);
+	pr_debug("Queue Write Pointer: 0x%px\n", q->properties.write_ptr);
+	pr_debug("Queue Doorbell Pointer: 0x%p\n", q->properties.doorbell_ptr);
+	pr_debug("Queue Doorbell Offset: %u\n", q->properties.doorbell_off);
+	pr_debug("Queue MQD Address: 0x%p\n", q->mqd);
+	pr_debug("Queue MQD Gart: 0x%llX\n", q->gart_mqd_addr);
+	pr_debug("Queue Process Address: 0x%p\n", q->process);
+	pr_debug("Queue Device Address: 0x%p\n", q->device);
+}
+
+int init_queue(struct queue **q, const struct queue_properties *properties)
+{
+	struct queue *tmp_q;
+
+	tmp_q = kzalloc(sizeof(*tmp_q), GFP_KERNEL);
+	if (!tmp_q)
+		return -ENOMEM;
+
+	memcpy(&tmp_q->properties, properties, sizeof(*properties));
+
+	*q = tmp_q;
+	return 0;
+}
+
+void uninit_queue(struct queue *q)
+{
+	kfree(q);
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
new file mode 100644
index 000000000..d9953c2b2
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -0,0 +1,379 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2020-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/poll.h>
+#include <linux/wait.h>
+#include <linux/anon_inodes.h>
+#include <uapi/linux/kfd_ioctl.h>
+#include "amdgpu.h"
+#include "amdgpu_vm.h"
+#include "kfd_priv.h"
+#include "kfd_smi_events.h"
+
+struct kfd_smi_client {
+	struct list_head list;
+	struct kfifo fifo;
+	wait_queue_head_t wait_queue;
+	/* events enabled */
+	uint64_t events;
+	struct kfd_node *dev;
+	spinlock_t lock;
+	struct rcu_head rcu;
+	pid_t pid;
+	bool suser;
+};
+
+#define MAX_KFIFO_SIZE	1024
+
+static __poll_t kfd_smi_ev_poll(struct file *, struct poll_table_struct *);
+static ssize_t kfd_smi_ev_read(struct file *, char __user *, size_t, loff_t *);
+static ssize_t kfd_smi_ev_write(struct file *, const char __user *, size_t,
+				loff_t *);
+static int kfd_smi_ev_release(struct inode *, struct file *);
+
+static const char kfd_smi_name[] = "kfd_smi_ev";
+
+static const struct file_operations kfd_smi_ev_fops = {
+	.owner = THIS_MODULE,
+	.poll = kfd_smi_ev_poll,
+	.read = kfd_smi_ev_read,
+	.write = kfd_smi_ev_write,
+	.release = kfd_smi_ev_release
+};
+
+static __poll_t kfd_smi_ev_poll(struct file *filep,
+				struct poll_table_struct *wait)
+{
+	struct kfd_smi_client *client = filep->private_data;
+	__poll_t mask = 0;
+
+	poll_wait(filep, &client->wait_queue, wait);
+
+	spin_lock(&client->lock);
+	if (!kfifo_is_empty(&client->fifo))
+		mask = EPOLLIN | EPOLLRDNORM;
+	spin_unlock(&client->lock);
+
+	return mask;
+}
+
+static ssize_t kfd_smi_ev_read(struct file *filep, char __user *user,
+			       size_t size, loff_t *offset)
+{
+	int ret;
+	size_t to_copy;
+	struct kfd_smi_client *client = filep->private_data;
+	unsigned char *buf;
+
+	size = min_t(size_t, size, MAX_KFIFO_SIZE);
+	buf = kmalloc(size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	/* kfifo_to_user can sleep so we can't use spinlock protection around
+	 * it. Instead, we kfifo out as spinlocked then copy them to the user.
+	 */
+	spin_lock(&client->lock);
+	to_copy = kfifo_len(&client->fifo);
+	if (!to_copy) {
+		spin_unlock(&client->lock);
+		ret = -EAGAIN;
+		goto ret_err;
+	}
+	to_copy = min(size, to_copy);
+	ret = kfifo_out(&client->fifo, buf, to_copy);
+	spin_unlock(&client->lock);
+	if (ret <= 0) {
+		ret = -EAGAIN;
+		goto ret_err;
+	}
+
+	ret = copy_to_user(user, buf, to_copy);
+	if (ret) {
+		ret = -EFAULT;
+		goto ret_err;
+	}
+
+	kfree(buf);
+	return to_copy;
+
+ret_err:
+	kfree(buf);
+	return ret;
+}
+
+static ssize_t kfd_smi_ev_write(struct file *filep, const char __user *user,
+				size_t size, loff_t *offset)
+{
+	struct kfd_smi_client *client = filep->private_data;
+	uint64_t events;
+
+	if (!access_ok(user, size) || size < sizeof(events))
+		return -EFAULT;
+	if (copy_from_user(&events, user, sizeof(events)))
+		return -EFAULT;
+
+	WRITE_ONCE(client->events, events);
+
+	return sizeof(events);
+}
+
+static void kfd_smi_ev_client_free(struct rcu_head *p)
+{
+	struct kfd_smi_client *ev = container_of(p, struct kfd_smi_client, rcu);
+
+	kfifo_free(&ev->fifo);
+	kfree(ev);
+}
+
+static int kfd_smi_ev_release(struct inode *inode, struct file *filep)
+{
+	struct kfd_smi_client *client = filep->private_data;
+	struct kfd_node *dev = client->dev;
+
+	spin_lock(&dev->smi_lock);
+	list_del_rcu(&client->list);
+	spin_unlock(&dev->smi_lock);
+
+	call_rcu(&client->rcu, kfd_smi_ev_client_free);
+	return 0;
+}
+
+static bool kfd_smi_ev_enabled(pid_t pid, struct kfd_smi_client *client,
+			       unsigned int event)
+{
+	uint64_t all = KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_ALL_PROCESS);
+	uint64_t events = READ_ONCE(client->events);
+
+	if (pid && client->pid != pid && !(client->suser && (events & all)))
+		return false;
+
+	return events & KFD_SMI_EVENT_MASK_FROM_INDEX(event);
+}
+
+static void add_event_to_kfifo(pid_t pid, struct kfd_node *dev,
+			       unsigned int smi_event, char *event_msg, int len)
+{
+	struct kfd_smi_client *client;
+
+	rcu_read_lock();
+
+	list_for_each_entry_rcu(client, &dev->smi_clients, list) {
+		if (!kfd_smi_ev_enabled(pid, client, smi_event))
+			continue;
+		spin_lock(&client->lock);
+		if (kfifo_avail(&client->fifo) >= len) {
+			kfifo_in(&client->fifo, event_msg, len);
+			wake_up_all(&client->wait_queue);
+		} else {
+			pr_debug("smi_event(EventID: %u): no space left\n",
+					smi_event);
+		}
+		spin_unlock(&client->lock);
+	}
+
+	rcu_read_unlock();
+}
+
+__printf(4, 5)
+static void kfd_smi_event_add(pid_t pid, struct kfd_node *dev,
+			      unsigned int event, char *fmt, ...)
+{
+	char fifo_in[KFD_SMI_EVENT_MSG_SIZE];
+	int len;
+	va_list args;
+
+	if (list_empty(&dev->smi_clients))
+		return;
+
+	len = snprintf(fifo_in, sizeof(fifo_in), "%x ", event);
+
+	va_start(args, fmt);
+	len += vsnprintf(fifo_in + len, sizeof(fifo_in) - len, fmt, args);
+	va_end(args);
+
+	add_event_to_kfifo(pid, dev, event, fifo_in, len);
+}
+
+void kfd_smi_event_update_gpu_reset(struct kfd_node *dev, bool post_reset)
+{
+	unsigned int event;
+
+	if (post_reset) {
+		event = KFD_SMI_EVENT_GPU_POST_RESET;
+	} else {
+		event = KFD_SMI_EVENT_GPU_PRE_RESET;
+		++(dev->reset_seq_num);
+	}
+	kfd_smi_event_add(0, dev, event, "%x\n", dev->reset_seq_num);
+}
+
+void kfd_smi_event_update_thermal_throttling(struct kfd_node *dev,
+					     uint64_t throttle_bitmask)
+{
+	kfd_smi_event_add(0, dev, KFD_SMI_EVENT_THERMAL_THROTTLE, "%llx:%llx\n",
+			  throttle_bitmask,
+			  amdgpu_dpm_get_thermal_throttling_counter(dev->adev));
+}
+
+void kfd_smi_event_update_vmfault(struct kfd_node *dev, uint16_t pasid)
+{
+	struct amdgpu_task_info task_info;
+
+	memset(&task_info, 0, sizeof(struct amdgpu_task_info));
+	amdgpu_vm_get_task_info(dev->adev, pasid, &task_info);
+	/* Report VM faults from user applications, not retry from kernel */
+	if (!task_info.pid)
+		return;
+
+	kfd_smi_event_add(0, dev, KFD_SMI_EVENT_VMFAULT, "%x:%s\n",
+			  task_info.pid, task_info.task_name);
+}
+
+void kfd_smi_event_page_fault_start(struct kfd_node *node, pid_t pid,
+				    unsigned long address, bool write_fault,
+				    ktime_t ts)
+{
+	kfd_smi_event_add(pid, node, KFD_SMI_EVENT_PAGE_FAULT_START,
+			  "%lld -%d @%lx(%x) %c\n", ktime_to_ns(ts), pid,
+			  address, node->id, write_fault ? 'W' : 'R');
+}
+
+void kfd_smi_event_page_fault_end(struct kfd_node *node, pid_t pid,
+				  unsigned long address, bool migration)
+{
+	kfd_smi_event_add(pid, node, KFD_SMI_EVENT_PAGE_FAULT_END,
+			  "%lld -%d @%lx(%x) %c\n", ktime_get_boottime_ns(),
+			  pid, address, node->id, migration ? 'M' : 'U');
+}
+
+void kfd_smi_event_migration_start(struct kfd_node *node, pid_t pid,
+				   unsigned long start, unsigned long end,
+				   uint32_t from, uint32_t to,
+				   uint32_t prefetch_loc, uint32_t preferred_loc,
+				   uint32_t trigger)
+{
+	kfd_smi_event_add(pid, node, KFD_SMI_EVENT_MIGRATE_START,
+			  "%lld -%d @%lx(%lx) %x->%x %x:%x %d\n",
+			  ktime_get_boottime_ns(), pid, start, end - start,
+			  from, to, prefetch_loc, preferred_loc, trigger);
+}
+
+void kfd_smi_event_migration_end(struct kfd_node *node, pid_t pid,
+				 unsigned long start, unsigned long end,
+				 uint32_t from, uint32_t to, uint32_t trigger)
+{
+	kfd_smi_event_add(pid, node, KFD_SMI_EVENT_MIGRATE_END,
+			  "%lld -%d @%lx(%lx) %x->%x %d\n",
+			  ktime_get_boottime_ns(), pid, start, end - start,
+			  from, to, trigger);
+}
+
+void kfd_smi_event_queue_eviction(struct kfd_node *node, pid_t pid,
+				  uint32_t trigger)
+{
+	kfd_smi_event_add(pid, node, KFD_SMI_EVENT_QUEUE_EVICTION,
+			  "%lld -%d %x %d\n", ktime_get_boottime_ns(), pid,
+			  node->id, trigger);
+}
+
+void kfd_smi_event_queue_restore(struct kfd_node *node, pid_t pid)
+{
+	kfd_smi_event_add(pid, node, KFD_SMI_EVENT_QUEUE_RESTORE,
+			  "%lld -%d %x\n", ktime_get_boottime_ns(), pid,
+			  node->id);
+}
+
+void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm)
+{
+	struct kfd_process *p;
+	int i;
+
+	p = kfd_lookup_process_by_mm(mm);
+	if (!p)
+		return;
+
+	for (i = 0; i < p->n_pdds; i++) {
+		struct kfd_process_device *pdd = p->pdds[i];
+
+		kfd_smi_event_add(p->lead_thread->pid, pdd->dev,
+				  KFD_SMI_EVENT_QUEUE_RESTORE,
+				  "%lld -%d %x %c\n", ktime_get_boottime_ns(),
+				  p->lead_thread->pid, pdd->dev->id, 'R');
+	}
+	kfd_unref_process(p);
+}
+
+void kfd_smi_event_unmap_from_gpu(struct kfd_node *node, pid_t pid,
+				  unsigned long address, unsigned long last,
+				  uint32_t trigger)
+{
+	kfd_smi_event_add(pid, node, KFD_SMI_EVENT_UNMAP_FROM_GPU,
+			  "%lld -%d @%lx(%lx) %x %d\n", ktime_get_boottime_ns(),
+			  pid, address, last - address + 1, node->id, trigger);
+}
+
+int kfd_smi_event_open(struct kfd_node *dev, uint32_t *fd)
+{
+	struct kfd_smi_client *client;
+	int ret;
+
+	client = kzalloc(sizeof(struct kfd_smi_client), GFP_KERNEL);
+	if (!client)
+		return -ENOMEM;
+	INIT_LIST_HEAD(&client->list);
+
+	ret = kfifo_alloc(&client->fifo, MAX_KFIFO_SIZE, GFP_KERNEL);
+	if (ret) {
+		kfree(client);
+		return ret;
+	}
+
+	init_waitqueue_head(&client->wait_queue);
+	spin_lock_init(&client->lock);
+	client->events = 0;
+	client->dev = dev;
+	client->pid = current->tgid;
+	client->suser = capable(CAP_SYS_ADMIN);
+
+	spin_lock(&dev->smi_lock);
+	list_add_rcu(&client->list, &dev->smi_clients);
+	spin_unlock(&dev->smi_lock);
+
+	ret = anon_inode_getfd(kfd_smi_name, &kfd_smi_ev_fops, (void *)client,
+			       O_RDWR);
+	if (ret < 0) {
+		spin_lock(&dev->smi_lock);
+		list_del_rcu(&client->list);
+		spin_unlock(&dev->smi_lock);
+
+		synchronize_rcu();
+
+		kfifo_free(&client->fifo);
+		kfree(client);
+		return ret;
+	}
+	*fd = ret;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
new file mode 100644
index 000000000..fa95c2dfd
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright 2020-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef KFD_SMI_EVENTS_H_INCLUDED
+#define KFD_SMI_EVENTS_H_INCLUDED
+
+int kfd_smi_event_open(struct kfd_node *dev, uint32_t *fd);
+void kfd_smi_event_update_vmfault(struct kfd_node *dev, uint16_t pasid);
+void kfd_smi_event_update_thermal_throttling(struct kfd_node *dev,
+					     uint64_t throttle_bitmask);
+void kfd_smi_event_update_gpu_reset(struct kfd_node *dev, bool post_reset);
+void kfd_smi_event_page_fault_start(struct kfd_node *node, pid_t pid,
+				    unsigned long address, bool write_fault,
+				    ktime_t ts);
+void kfd_smi_event_page_fault_end(struct kfd_node *node, pid_t pid,
+				  unsigned long address, bool migration);
+void kfd_smi_event_migration_start(struct kfd_node *node, pid_t pid,
+			     unsigned long start, unsigned long end,
+			     uint32_t from, uint32_t to,
+			     uint32_t prefetch_loc, uint32_t preferred_loc,
+			     uint32_t trigger);
+void kfd_smi_event_migration_end(struct kfd_node *node, pid_t pid,
+			     unsigned long start, unsigned long end,
+			     uint32_t from, uint32_t to, uint32_t trigger);
+void kfd_smi_event_queue_eviction(struct kfd_node *node, pid_t pid,
+				  uint32_t trigger);
+void kfd_smi_event_queue_restore(struct kfd_node *node, pid_t pid);
+void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm);
+void kfd_smi_event_unmap_from_gpu(struct kfd_node *node, pid_t pid,
+				  unsigned long address, unsigned long last,
+				  uint32_t trigger);
+#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
new file mode 100644
index 000000000..8e368e465
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -0,0 +1,4131 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2020-2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/types.h>
+#include <linux/sched/task.h>
+#include <linux/dynamic_debug.h>
+#include <drm/ttm/ttm_tt.h>
+#include <drm/drm_exec.h>
+
+#include "amdgpu_sync.h"
+#include "amdgpu_object.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_hmm.h"
+#include "amdgpu.h"
+#include "amdgpu_xgmi.h"
+#include "kfd_priv.h"
+#include "kfd_svm.h"
+#include "kfd_migrate.h"
+#include "kfd_smi_events.h"
+
+#ifdef dev_fmt
+#undef dev_fmt
+#endif
+#define dev_fmt(fmt) "kfd_svm: %s: " fmt, __func__
+
+#define AMDGPU_SVM_RANGE_RESTORE_DELAY_MS 1
+
+/* Long enough to ensure no retry fault comes after svm range is restored and
+ * page table is updated.
+ */
+#define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING	(2UL * NSEC_PER_MSEC)
+#if IS_ENABLED(CONFIG_DYNAMIC_DEBUG)
+#define dynamic_svm_range_dump(svms) \
+	_dynamic_func_call_no_desc("svm_range_dump", svm_range_debug_dump, svms)
+#else
+#define dynamic_svm_range_dump(svms) \
+	do { if (0) svm_range_debug_dump(svms); } while (0)
+#endif
+
+/* Giant svm range split into smaller ranges based on this, it is decided using
+ * minimum of all dGPU/APU 1/32 VRAM size, between 2MB to 1GB and alignment to
+ * power of 2MB.
+ */
+static uint64_t max_svm_range_pages;
+
+struct criu_svm_metadata {
+	struct list_head list;
+	struct kfd_criu_svm_range_priv_data data;
+};
+
+static void svm_range_evict_svm_bo_worker(struct work_struct *work);
+static bool
+svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
+				    const struct mmu_notifier_range *range,
+				    unsigned long cur_seq);
+static int
+svm_range_check_vm(struct kfd_process *p, uint64_t start, uint64_t last,
+		   uint64_t *bo_s, uint64_t *bo_l);
+static const struct mmu_interval_notifier_ops svm_range_mn_ops = {
+	.invalidate = svm_range_cpu_invalidate_pagetables,
+};
+
+/**
+ * svm_range_unlink - unlink svm_range from lists and interval tree
+ * @prange: svm range structure to be removed
+ *
+ * Remove the svm_range from the svms and svm_bo lists and the svms
+ * interval tree.
+ *
+ * Context: The caller must hold svms->lock
+ */
+static void svm_range_unlink(struct svm_range *prange)
+{
+	pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms,
+		 prange, prange->start, prange->last);
+
+	if (prange->svm_bo) {
+		spin_lock(&prange->svm_bo->list_lock);
+		list_del(&prange->svm_bo_list);
+		spin_unlock(&prange->svm_bo->list_lock);
+	}
+
+	list_del(&prange->list);
+	if (prange->it_node.start != 0 && prange->it_node.last != 0)
+		interval_tree_remove(&prange->it_node, &prange->svms->objects);
+}
+
+static void
+svm_range_add_notifier_locked(struct mm_struct *mm, struct svm_range *prange)
+{
+	pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms,
+		 prange, prange->start, prange->last);
+
+	mmu_interval_notifier_insert_locked(&prange->notifier, mm,
+				     prange->start << PAGE_SHIFT,
+				     prange->npages << PAGE_SHIFT,
+				     &svm_range_mn_ops);
+}
+
+/**
+ * svm_range_add_to_svms - add svm range to svms
+ * @prange: svm range structure to be added
+ *
+ * Add the svm range to svms interval tree and link list
+ *
+ * Context: The caller must hold svms->lock
+ */
+static void svm_range_add_to_svms(struct svm_range *prange)
+{
+	pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms,
+		 prange, prange->start, prange->last);
+
+	list_move_tail(&prange->list, &prange->svms->list);
+	prange->it_node.start = prange->start;
+	prange->it_node.last = prange->last;
+	interval_tree_insert(&prange->it_node, &prange->svms->objects);
+}
+
+static void svm_range_remove_notifier(struct svm_range *prange)
+{
+	pr_debug("remove notifier svms 0x%p prange 0x%p [0x%lx 0x%lx]\n",
+		 prange->svms, prange,
+		 prange->notifier.interval_tree.start >> PAGE_SHIFT,
+		 prange->notifier.interval_tree.last >> PAGE_SHIFT);
+
+	if (prange->notifier.interval_tree.start != 0 &&
+	    prange->notifier.interval_tree.last != 0)
+		mmu_interval_notifier_remove(&prange->notifier);
+}
+
+static bool
+svm_is_valid_dma_mapping_addr(struct device *dev, dma_addr_t dma_addr)
+{
+	return dma_addr && !dma_mapping_error(dev, dma_addr) &&
+	       !(dma_addr & SVM_RANGE_VRAM_DOMAIN);
+}
+
+static int
+svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
+		      unsigned long offset, unsigned long npages,
+		      unsigned long *hmm_pfns, uint32_t gpuidx)
+{
+	enum dma_data_direction dir = DMA_BIDIRECTIONAL;
+	dma_addr_t *addr = prange->dma_addr[gpuidx];
+	struct device *dev = adev->dev;
+	struct page *page;
+	int i, r;
+
+	if (!addr) {
+		addr = kvcalloc(prange->npages, sizeof(*addr), GFP_KERNEL);
+		if (!addr)
+			return -ENOMEM;
+		prange->dma_addr[gpuidx] = addr;
+	}
+
+	addr += offset;
+	for (i = 0; i < npages; i++) {
+		if (svm_is_valid_dma_mapping_addr(dev, addr[i]))
+			dma_unmap_page(dev, addr[i], PAGE_SIZE, dir);
+
+		page = hmm_pfn_to_page(hmm_pfns[i]);
+		if (is_zone_device_page(page)) {
+			struct amdgpu_device *bo_adev = prange->svm_bo->node->adev;
+
+			addr[i] = (hmm_pfns[i] << PAGE_SHIFT) +
+				   bo_adev->vm_manager.vram_base_offset -
+				   bo_adev->kfd.pgmap.range.start;
+			addr[i] |= SVM_RANGE_VRAM_DOMAIN;
+			pr_debug_ratelimited("vram address: 0x%llx\n", addr[i]);
+			continue;
+		}
+		addr[i] = dma_map_page(dev, page, 0, PAGE_SIZE, dir);
+		r = dma_mapping_error(dev, addr[i]);
+		if (r) {
+			dev_err(dev, "failed %d dma_map_page\n", r);
+			return r;
+		}
+		pr_debug_ratelimited("dma mapping 0x%llx for page addr 0x%lx\n",
+				     addr[i] >> PAGE_SHIFT, page_to_pfn(page));
+	}
+	return 0;
+}
+
+static int
+svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
+		  unsigned long offset, unsigned long npages,
+		  unsigned long *hmm_pfns)
+{
+	struct kfd_process *p;
+	uint32_t gpuidx;
+	int r;
+
+	p = container_of(prange->svms, struct kfd_process, svms);
+
+	for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
+		struct kfd_process_device *pdd;
+
+		pr_debug("mapping to gpu idx 0x%x\n", gpuidx);
+		pdd = kfd_process_device_from_gpuidx(p, gpuidx);
+		if (!pdd) {
+			pr_debug("failed to find device idx %d\n", gpuidx);
+			return -EINVAL;
+		}
+
+		r = svm_range_dma_map_dev(pdd->dev->adev, prange, offset, npages,
+					  hmm_pfns, gpuidx);
+		if (r)
+			break;
+	}
+
+	return r;
+}
+
+void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr,
+			 unsigned long offset, unsigned long npages)
+{
+	enum dma_data_direction dir = DMA_BIDIRECTIONAL;
+	int i;
+
+	if (!dma_addr)
+		return;
+
+	for (i = offset; i < offset + npages; i++) {
+		if (!svm_is_valid_dma_mapping_addr(dev, dma_addr[i]))
+			continue;
+		pr_debug_ratelimited("unmap 0x%llx\n", dma_addr[i] >> PAGE_SHIFT);
+		dma_unmap_page(dev, dma_addr[i], PAGE_SIZE, dir);
+		dma_addr[i] = 0;
+	}
+}
+
+void svm_range_free_dma_mappings(struct svm_range *prange, bool unmap_dma)
+{
+	struct kfd_process_device *pdd;
+	dma_addr_t *dma_addr;
+	struct device *dev;
+	struct kfd_process *p;
+	uint32_t gpuidx;
+
+	p = container_of(prange->svms, struct kfd_process, svms);
+
+	for (gpuidx = 0; gpuidx < MAX_GPU_INSTANCE; gpuidx++) {
+		dma_addr = prange->dma_addr[gpuidx];
+		if (!dma_addr)
+			continue;
+
+		pdd = kfd_process_device_from_gpuidx(p, gpuidx);
+		if (!pdd) {
+			pr_debug("failed to find device idx %d\n", gpuidx);
+			continue;
+		}
+		dev = &pdd->dev->adev->pdev->dev;
+		if (unmap_dma)
+			svm_range_dma_unmap(dev, dma_addr, 0, prange->npages);
+		kvfree(dma_addr);
+		prange->dma_addr[gpuidx] = NULL;
+	}
+}
+
+static void svm_range_free(struct svm_range *prange, bool do_unmap)
+{
+	uint64_t size = (prange->last - prange->start + 1) << PAGE_SHIFT;
+	struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
+
+	pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange,
+		 prange->start, prange->last);
+
+	svm_range_vram_node_free(prange);
+	svm_range_free_dma_mappings(prange, do_unmap);
+
+	if (do_unmap && !p->xnack_enabled) {
+		pr_debug("unreserve prange 0x%p size: 0x%llx\n", prange, size);
+		amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
+					KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
+	}
+	mutex_destroy(&prange->lock);
+	mutex_destroy(&prange->migrate_mutex);
+	kfree(prange);
+}
+
+static void
+svm_range_set_default_attributes(int32_t *location, int32_t *prefetch_loc,
+				 uint8_t *granularity, uint32_t *flags)
+{
+	*location = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
+	*prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
+	*granularity = 9;
+	*flags =
+		KFD_IOCTL_SVM_FLAG_HOST_ACCESS | KFD_IOCTL_SVM_FLAG_COHERENT;
+}
+
+static struct
+svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
+			 uint64_t last, bool update_mem_usage)
+{
+	uint64_t size = last - start + 1;
+	struct svm_range *prange;
+	struct kfd_process *p;
+
+	prange = kzalloc(sizeof(*prange), GFP_KERNEL);
+	if (!prange)
+		return NULL;
+
+	p = container_of(svms, struct kfd_process, svms);
+	if (!p->xnack_enabled && update_mem_usage &&
+	    amdgpu_amdkfd_reserve_mem_limit(NULL, size << PAGE_SHIFT,
+				    KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0)) {
+		pr_info("SVM mapping failed, exceeds resident system memory limit\n");
+		kfree(prange);
+		return NULL;
+	}
+	prange->npages = size;
+	prange->svms = svms;
+	prange->start = start;
+	prange->last = last;
+	INIT_LIST_HEAD(&prange->list);
+	INIT_LIST_HEAD(&prange->update_list);
+	INIT_LIST_HEAD(&prange->svm_bo_list);
+	INIT_LIST_HEAD(&prange->deferred_list);
+	INIT_LIST_HEAD(&prange->child_list);
+	atomic_set(&prange->invalid, 0);
+	prange->validate_timestamp = 0;
+	mutex_init(&prange->migrate_mutex);
+	mutex_init(&prange->lock);
+
+	if (p->xnack_enabled)
+		bitmap_copy(prange->bitmap_access, svms->bitmap_supported,
+			    MAX_GPU_INSTANCE);
+
+	svm_range_set_default_attributes(&prange->preferred_loc,
+					 &prange->prefetch_loc,
+					 &prange->granularity, &prange->flags);
+
+	pr_debug("svms 0x%p [0x%llx 0x%llx]\n", svms, start, last);
+
+	return prange;
+}
+
+static bool svm_bo_ref_unless_zero(struct svm_range_bo *svm_bo)
+{
+	if (!svm_bo || !kref_get_unless_zero(&svm_bo->kref))
+		return false;
+
+	return true;
+}
+
+static void svm_range_bo_release(struct kref *kref)
+{
+	struct svm_range_bo *svm_bo;
+
+	svm_bo = container_of(kref, struct svm_range_bo, kref);
+	pr_debug("svm_bo 0x%p\n", svm_bo);
+
+	spin_lock(&svm_bo->list_lock);
+	while (!list_empty(&svm_bo->range_list)) {
+		struct svm_range *prange =
+				list_first_entry(&svm_bo->range_list,
+						struct svm_range, svm_bo_list);
+		/* list_del_init tells a concurrent svm_range_vram_node_new when
+		 * it's safe to reuse the svm_bo pointer and svm_bo_list head.
+		 */
+		list_del_init(&prange->svm_bo_list);
+		spin_unlock(&svm_bo->list_lock);
+
+		pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms,
+			 prange->start, prange->last);
+		mutex_lock(&prange->lock);
+		prange->svm_bo = NULL;
+		mutex_unlock(&prange->lock);
+
+		spin_lock(&svm_bo->list_lock);
+	}
+	spin_unlock(&svm_bo->list_lock);
+	if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base)) {
+		/* We're not in the eviction worker.
+		 * Signal the fence and synchronize with any
+		 * pending eviction work.
+		 */
+		dma_fence_signal(&svm_bo->eviction_fence->base);
+		cancel_work_sync(&svm_bo->eviction_work);
+	}
+	dma_fence_put(&svm_bo->eviction_fence->base);
+	amdgpu_bo_unref(&svm_bo->bo);
+	kfree(svm_bo);
+}
+
+static void svm_range_bo_wq_release(struct work_struct *work)
+{
+	struct svm_range_bo *svm_bo;
+
+	svm_bo = container_of(work, struct svm_range_bo, release_work);
+	svm_range_bo_release(&svm_bo->kref);
+}
+
+static void svm_range_bo_release_async(struct kref *kref)
+{
+	struct svm_range_bo *svm_bo;
+
+	svm_bo = container_of(kref, struct svm_range_bo, kref);
+	pr_debug("svm_bo 0x%p\n", svm_bo);
+	INIT_WORK(&svm_bo->release_work, svm_range_bo_wq_release);
+	schedule_work(&svm_bo->release_work);
+}
+
+void svm_range_bo_unref_async(struct svm_range_bo *svm_bo)
+{
+	kref_put(&svm_bo->kref, svm_range_bo_release_async);
+}
+
+static void svm_range_bo_unref(struct svm_range_bo *svm_bo)
+{
+	if (svm_bo)
+		kref_put(&svm_bo->kref, svm_range_bo_release);
+}
+
+static bool
+svm_range_validate_svm_bo(struct kfd_node *node, struct svm_range *prange)
+{
+	mutex_lock(&prange->lock);
+	if (!prange->svm_bo) {
+		mutex_unlock(&prange->lock);
+		return false;
+	}
+	if (prange->ttm_res) {
+		/* We still have a reference, all is well */
+		mutex_unlock(&prange->lock);
+		return true;
+	}
+	if (svm_bo_ref_unless_zero(prange->svm_bo)) {
+		/*
+		 * Migrate from GPU to GPU, remove range from source svm_bo->node
+		 * range list, and return false to allocate svm_bo from destination
+		 * node.
+		 */
+		if (prange->svm_bo->node != node) {
+			mutex_unlock(&prange->lock);
+
+			spin_lock(&prange->svm_bo->list_lock);
+			list_del_init(&prange->svm_bo_list);
+			spin_unlock(&prange->svm_bo->list_lock);
+
+			svm_range_bo_unref(prange->svm_bo);
+			return false;
+		}
+		if (READ_ONCE(prange->svm_bo->evicting)) {
+			struct dma_fence *f;
+			struct svm_range_bo *svm_bo;
+			/* The BO is getting evicted,
+			 * we need to get a new one
+			 */
+			mutex_unlock(&prange->lock);
+			svm_bo = prange->svm_bo;
+			f = dma_fence_get(&svm_bo->eviction_fence->base);
+			svm_range_bo_unref(prange->svm_bo);
+			/* wait for the fence to avoid long spin-loop
+			 * at list_empty_careful
+			 */
+			dma_fence_wait(f, false);
+			dma_fence_put(f);
+		} else {
+			/* The BO was still around and we got
+			 * a new reference to it
+			 */
+			mutex_unlock(&prange->lock);
+			pr_debug("reuse old bo svms 0x%p [0x%lx 0x%lx]\n",
+				 prange->svms, prange->start, prange->last);
+
+			prange->ttm_res = prange->svm_bo->bo->tbo.resource;
+			return true;
+		}
+
+	} else {
+		mutex_unlock(&prange->lock);
+	}
+
+	/* We need a new svm_bo. Spin-loop to wait for concurrent
+	 * svm_range_bo_release to finish removing this range from
+	 * its range list and set prange->svm_bo to null. After this,
+	 * it is safe to reuse the svm_bo pointer and svm_bo_list head.
+	 */
+	while (!list_empty_careful(&prange->svm_bo_list) || prange->svm_bo)
+		cond_resched();
+
+	return false;
+}
+
+static struct svm_range_bo *svm_range_bo_new(void)
+{
+	struct svm_range_bo *svm_bo;
+
+	svm_bo = kzalloc(sizeof(*svm_bo), GFP_KERNEL);
+	if (!svm_bo)
+		return NULL;
+
+	kref_init(&svm_bo->kref);
+	INIT_LIST_HEAD(&svm_bo->range_list);
+	spin_lock_init(&svm_bo->list_lock);
+
+	return svm_bo;
+}
+
+int
+svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
+			bool clear)
+{
+	struct amdgpu_bo_param bp;
+	struct svm_range_bo *svm_bo;
+	struct amdgpu_bo_user *ubo;
+	struct amdgpu_bo *bo;
+	struct kfd_process *p;
+	struct mm_struct *mm;
+	int r;
+
+	p = container_of(prange->svms, struct kfd_process, svms);
+	pr_debug("pasid: %x svms 0x%p [0x%lx 0x%lx]\n", p->pasid, prange->svms,
+		 prange->start, prange->last);
+
+	if (svm_range_validate_svm_bo(node, prange))
+		return 0;
+
+	svm_bo = svm_range_bo_new();
+	if (!svm_bo) {
+		pr_debug("failed to alloc svm bo\n");
+		return -ENOMEM;
+	}
+	mm = get_task_mm(p->lead_thread);
+	if (!mm) {
+		pr_debug("failed to get mm\n");
+		kfree(svm_bo);
+		return -ESRCH;
+	}
+	svm_bo->node = node;
+	svm_bo->eviction_fence =
+		amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
+					   mm,
+					   svm_bo);
+	mmput(mm);
+	INIT_WORK(&svm_bo->eviction_work, svm_range_evict_svm_bo_worker);
+	svm_bo->evicting = 0;
+	memset(&bp, 0, sizeof(bp));
+	bp.size = prange->npages * PAGE_SIZE;
+	bp.byte_align = PAGE_SIZE;
+	bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+	bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
+	bp.flags |= clear ? AMDGPU_GEM_CREATE_VRAM_CLEARED : 0;
+	bp.flags |= AMDGPU_GEM_CREATE_DISCARDABLE;
+	bp.type = ttm_bo_type_device;
+	bp.resv = NULL;
+	if (node->xcp)
+		bp.xcp_id_plus1 = node->xcp->id + 1;
+
+	r = amdgpu_bo_create_user(node->adev, &bp, &ubo);
+	if (r) {
+		pr_debug("failed %d to create bo\n", r);
+		goto create_bo_failed;
+	}
+	bo = &ubo->bo;
+
+	pr_debug("alloc bo at offset 0x%lx size 0x%lx on partition %d\n",
+		 bo->tbo.resource->start << PAGE_SHIFT, bp.size,
+		 bp.xcp_id_plus1 - 1);
+
+	r = amdgpu_bo_reserve(bo, true);
+	if (r) {
+		pr_debug("failed %d to reserve bo\n", r);
+		goto reserve_bo_failed;
+	}
+
+	if (clear) {
+		r = amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false);
+		if (r) {
+			pr_debug("failed %d to sync bo\n", r);
+			amdgpu_bo_unreserve(bo);
+			goto reserve_bo_failed;
+		}
+	}
+
+	r = dma_resv_reserve_fences(bo->tbo.base.resv, 1);
+	if (r) {
+		pr_debug("failed %d to reserve bo\n", r);
+		amdgpu_bo_unreserve(bo);
+		goto reserve_bo_failed;
+	}
+	amdgpu_bo_fence(bo, &svm_bo->eviction_fence->base, true);
+
+	amdgpu_bo_unreserve(bo);
+
+	svm_bo->bo = bo;
+	prange->svm_bo = svm_bo;
+	prange->ttm_res = bo->tbo.resource;
+	prange->offset = 0;
+
+	spin_lock(&svm_bo->list_lock);
+	list_add(&prange->svm_bo_list, &svm_bo->range_list);
+	spin_unlock(&svm_bo->list_lock);
+
+	return 0;
+
+reserve_bo_failed:
+	amdgpu_bo_unref(&bo);
+create_bo_failed:
+	dma_fence_put(&svm_bo->eviction_fence->base);
+	kfree(svm_bo);
+	prange->ttm_res = NULL;
+
+	return r;
+}
+
+void svm_range_vram_node_free(struct svm_range *prange)
+{
+	/* serialize prange->svm_bo unref */
+	mutex_lock(&prange->lock);
+	/* prange->svm_bo has not been unref */
+	if (prange->ttm_res) {
+		prange->ttm_res = NULL;
+		mutex_unlock(&prange->lock);
+		svm_range_bo_unref(prange->svm_bo);
+	} else
+		mutex_unlock(&prange->lock);
+}
+
+struct kfd_node *
+svm_range_get_node_by_id(struct svm_range *prange, uint32_t gpu_id)
+{
+	struct kfd_process *p;
+	struct kfd_process_device *pdd;
+
+	p = container_of(prange->svms, struct kfd_process, svms);
+	pdd = kfd_process_device_data_by_id(p, gpu_id);
+	if (!pdd) {
+		pr_debug("failed to get kfd process device by id 0x%x\n", gpu_id);
+		return NULL;
+	}
+
+	return pdd->dev;
+}
+
+struct kfd_process_device *
+svm_range_get_pdd_by_node(struct svm_range *prange, struct kfd_node *node)
+{
+	struct kfd_process *p;
+
+	p = container_of(prange->svms, struct kfd_process, svms);
+
+	return kfd_get_process_device_data(node, p);
+}
+
+static int svm_range_bo_validate(void *param, struct amdgpu_bo *bo)
+{
+	struct ttm_operation_ctx ctx = { false, false };
+
+	amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
+
+	return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+}
+
+static int
+svm_range_check_attr(struct kfd_process *p,
+		     uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
+{
+	uint32_t i;
+
+	for (i = 0; i < nattr; i++) {
+		uint32_t val = attrs[i].value;
+		int gpuidx = MAX_GPU_INSTANCE;
+
+		switch (attrs[i].type) {
+		case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC:
+			if (val != KFD_IOCTL_SVM_LOCATION_SYSMEM &&
+			    val != KFD_IOCTL_SVM_LOCATION_UNDEFINED)
+				gpuidx = kfd_process_gpuidx_from_gpuid(p, val);
+			break;
+		case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC:
+			if (val != KFD_IOCTL_SVM_LOCATION_SYSMEM)
+				gpuidx = kfd_process_gpuidx_from_gpuid(p, val);
+			break;
+		case KFD_IOCTL_SVM_ATTR_ACCESS:
+		case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
+		case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
+			gpuidx = kfd_process_gpuidx_from_gpuid(p, val);
+			break;
+		case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
+			break;
+		case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
+			break;
+		case KFD_IOCTL_SVM_ATTR_GRANULARITY:
+			break;
+		default:
+			pr_debug("unknown attr type 0x%x\n", attrs[i].type);
+			return -EINVAL;
+		}
+
+		if (gpuidx < 0) {
+			pr_debug("no GPU 0x%x found\n", val);
+			return -EINVAL;
+		} else if (gpuidx < MAX_GPU_INSTANCE &&
+			   !test_bit(gpuidx, p->svms.bitmap_supported)) {
+			pr_debug("GPU 0x%x not supported\n", val);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static void
+svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange,
+		      uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs,
+		      bool *update_mapping)
+{
+	uint32_t i;
+	int gpuidx;
+
+	for (i = 0; i < nattr; i++) {
+		switch (attrs[i].type) {
+		case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC:
+			prange->preferred_loc = attrs[i].value;
+			break;
+		case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC:
+			prange->prefetch_loc = attrs[i].value;
+			break;
+		case KFD_IOCTL_SVM_ATTR_ACCESS:
+		case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
+		case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
+			if (!p->xnack_enabled)
+				*update_mapping = true;
+
+			gpuidx = kfd_process_gpuidx_from_gpuid(p,
+							       attrs[i].value);
+			if (attrs[i].type == KFD_IOCTL_SVM_ATTR_NO_ACCESS) {
+				bitmap_clear(prange->bitmap_access, gpuidx, 1);
+				bitmap_clear(prange->bitmap_aip, gpuidx, 1);
+			} else if (attrs[i].type == KFD_IOCTL_SVM_ATTR_ACCESS) {
+				bitmap_set(prange->bitmap_access, gpuidx, 1);
+				bitmap_clear(prange->bitmap_aip, gpuidx, 1);
+			} else {
+				bitmap_clear(prange->bitmap_access, gpuidx, 1);
+				bitmap_set(prange->bitmap_aip, gpuidx, 1);
+			}
+			break;
+		case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
+			*update_mapping = true;
+			prange->flags |= attrs[i].value;
+			break;
+		case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
+			*update_mapping = true;
+			prange->flags &= ~attrs[i].value;
+			break;
+		case KFD_IOCTL_SVM_ATTR_GRANULARITY:
+			prange->granularity = min_t(uint32_t, attrs[i].value, 0x3F);
+			break;
+		default:
+			WARN_ONCE(1, "svm_range_check_attrs wasn't called?");
+		}
+	}
+}
+
+static bool
+svm_range_is_same_attrs(struct kfd_process *p, struct svm_range *prange,
+			uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
+{
+	uint32_t i;
+	int gpuidx;
+
+	for (i = 0; i < nattr; i++) {
+		switch (attrs[i].type) {
+		case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC:
+			if (prange->preferred_loc != attrs[i].value)
+				return false;
+			break;
+		case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC:
+			/* Prefetch should always trigger a migration even
+			 * if the value of the attribute didn't change.
+			 */
+			return false;
+		case KFD_IOCTL_SVM_ATTR_ACCESS:
+		case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
+		case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
+			gpuidx = kfd_process_gpuidx_from_gpuid(p,
+							       attrs[i].value);
+			if (attrs[i].type == KFD_IOCTL_SVM_ATTR_NO_ACCESS) {
+				if (test_bit(gpuidx, prange->bitmap_access) ||
+				    test_bit(gpuidx, prange->bitmap_aip))
+					return false;
+			} else if (attrs[i].type == KFD_IOCTL_SVM_ATTR_ACCESS) {
+				if (!test_bit(gpuidx, prange->bitmap_access))
+					return false;
+			} else {
+				if (!test_bit(gpuidx, prange->bitmap_aip))
+					return false;
+			}
+			break;
+		case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
+			if ((prange->flags & attrs[i].value) != attrs[i].value)
+				return false;
+			break;
+		case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
+			if ((prange->flags & attrs[i].value) != 0)
+				return false;
+			break;
+		case KFD_IOCTL_SVM_ATTR_GRANULARITY:
+			if (prange->granularity != attrs[i].value)
+				return false;
+			break;
+		default:
+			WARN_ONCE(1, "svm_range_check_attrs wasn't called?");
+		}
+	}
+
+	return true;
+}
+
+/**
+ * svm_range_debug_dump - print all range information from svms
+ * @svms: svm range list header
+ *
+ * debug output svm range start, end, prefetch location from svms
+ * interval tree and link list
+ *
+ * Context: The caller must hold svms->lock
+ */
+static void svm_range_debug_dump(struct svm_range_list *svms)
+{
+	struct interval_tree_node *node;
+	struct svm_range *prange;
+
+	pr_debug("dump svms 0x%p list\n", svms);
+	pr_debug("range\tstart\tpage\tend\t\tlocation\n");
+
+	list_for_each_entry(prange, &svms->list, list) {
+		pr_debug("0x%p 0x%lx\t0x%llx\t0x%llx\t0x%x\n",
+			 prange, prange->start, prange->npages,
+			 prange->start + prange->npages - 1,
+			 prange->actual_loc);
+	}
+
+	pr_debug("dump svms 0x%p interval tree\n", svms);
+	pr_debug("range\tstart\tpage\tend\t\tlocation\n");
+	node = interval_tree_iter_first(&svms->objects, 0, ~0ULL);
+	while (node) {
+		prange = container_of(node, struct svm_range, it_node);
+		pr_debug("0x%p 0x%lx\t0x%llx\t0x%llx\t0x%x\n",
+			 prange, prange->start, prange->npages,
+			 prange->start + prange->npages - 1,
+			 prange->actual_loc);
+		node = interval_tree_iter_next(node, 0, ~0ULL);
+	}
+}
+
+static void *
+svm_range_copy_array(void *psrc, size_t size, uint64_t num_elements,
+		     uint64_t offset)
+{
+	unsigned char *dst;
+
+	dst = kvmalloc_array(num_elements, size, GFP_KERNEL);
+	if (!dst)
+		return NULL;
+	memcpy(dst, (unsigned char *)psrc + offset, num_elements * size);
+
+	return (void *)dst;
+}
+
+static int
+svm_range_copy_dma_addrs(struct svm_range *dst, struct svm_range *src)
+{
+	int i;
+
+	for (i = 0; i < MAX_GPU_INSTANCE; i++) {
+		if (!src->dma_addr[i])
+			continue;
+		dst->dma_addr[i] = svm_range_copy_array(src->dma_addr[i],
+					sizeof(*src->dma_addr[i]), src->npages, 0);
+		if (!dst->dma_addr[i])
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int
+svm_range_split_array(void *ppnew, void *ppold, size_t size,
+		      uint64_t old_start, uint64_t old_n,
+		      uint64_t new_start, uint64_t new_n)
+{
+	unsigned char *new, *old, *pold;
+	uint64_t d;
+
+	if (!ppold)
+		return 0;
+	pold = *(unsigned char **)ppold;
+	if (!pold)
+		return 0;
+
+	d = (new_start - old_start) * size;
+	new = svm_range_copy_array(pold, size, new_n, d);
+	if (!new)
+		return -ENOMEM;
+	d = (new_start == old_start) ? new_n * size : 0;
+	old = svm_range_copy_array(pold, size, old_n, d);
+	if (!old) {
+		kvfree(new);
+		return -ENOMEM;
+	}
+	kvfree(pold);
+	*(void **)ppold = old;
+	*(void **)ppnew = new;
+
+	return 0;
+}
+
+static int
+svm_range_split_pages(struct svm_range *new, struct svm_range *old,
+		      uint64_t start, uint64_t last)
+{
+	uint64_t npages = last - start + 1;
+	int i, r;
+
+	for (i = 0; i < MAX_GPU_INSTANCE; i++) {
+		r = svm_range_split_array(&new->dma_addr[i], &old->dma_addr[i],
+					  sizeof(*old->dma_addr[i]), old->start,
+					  npages, new->start, new->npages);
+		if (r)
+			return r;
+	}
+
+	return 0;
+}
+
+static int
+svm_range_split_nodes(struct svm_range *new, struct svm_range *old,
+		      uint64_t start, uint64_t last)
+{
+	uint64_t npages = last - start + 1;
+
+	pr_debug("svms 0x%p new prange 0x%p start 0x%lx [0x%llx 0x%llx]\n",
+		 new->svms, new, new->start, start, last);
+
+	if (new->start == old->start) {
+		new->offset = old->offset;
+		old->offset += new->npages;
+	} else {
+		new->offset = old->offset + npages;
+	}
+
+	new->svm_bo = svm_range_bo_ref(old->svm_bo);
+	new->ttm_res = old->ttm_res;
+
+	spin_lock(&new->svm_bo->list_lock);
+	list_add(&new->svm_bo_list, &new->svm_bo->range_list);
+	spin_unlock(&new->svm_bo->list_lock);
+
+	return 0;
+}
+
+/**
+ * svm_range_split_adjust - split range and adjust
+ *
+ * @new: new range
+ * @old: the old range
+ * @start: the old range adjust to start address in pages
+ * @last: the old range adjust to last address in pages
+ *
+ * Copy system memory dma_addr or vram ttm_res in old range to new
+ * range from new_start up to size new->npages, the remaining old range is from
+ * start to last
+ *
+ * Return:
+ * 0 - OK, -ENOMEM - out of memory
+ */
+static int
+svm_range_split_adjust(struct svm_range *new, struct svm_range *old,
+		      uint64_t start, uint64_t last)
+{
+	int r;
+
+	pr_debug("svms 0x%p new 0x%lx old [0x%lx 0x%lx] => [0x%llx 0x%llx]\n",
+		 new->svms, new->start, old->start, old->last, start, last);
+
+	if (new->start < old->start ||
+	    new->last > old->last) {
+		WARN_ONCE(1, "invalid new range start or last\n");
+		return -EINVAL;
+	}
+
+	r = svm_range_split_pages(new, old, start, last);
+	if (r)
+		return r;
+
+	if (old->actual_loc && old->ttm_res) {
+		r = svm_range_split_nodes(new, old, start, last);
+		if (r)
+			return r;
+	}
+
+	old->npages = last - start + 1;
+	old->start = start;
+	old->last = last;
+	new->flags = old->flags;
+	new->preferred_loc = old->preferred_loc;
+	new->prefetch_loc = old->prefetch_loc;
+	new->actual_loc = old->actual_loc;
+	new->granularity = old->granularity;
+	new->mapped_to_gpu = old->mapped_to_gpu;
+	bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
+	bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE);
+
+	return 0;
+}
+
+/**
+ * svm_range_split - split a range in 2 ranges
+ *
+ * @prange: the svm range to split
+ * @start: the remaining range start address in pages
+ * @last: the remaining range last address in pages
+ * @new: the result new range generated
+ *
+ * Two cases only:
+ * case 1: if start == prange->start
+ *         prange ==> prange[start, last]
+ *         new range [last + 1, prange->last]
+ *
+ * case 2: if last == prange->last
+ *         prange ==> prange[start, last]
+ *         new range [prange->start, start - 1]
+ *
+ * Return:
+ * 0 - OK, -ENOMEM - out of memory, -EINVAL - invalid start, last
+ */
+static int
+svm_range_split(struct svm_range *prange, uint64_t start, uint64_t last,
+		struct svm_range **new)
+{
+	uint64_t old_start = prange->start;
+	uint64_t old_last = prange->last;
+	struct svm_range_list *svms;
+	int r = 0;
+
+	pr_debug("svms 0x%p [0x%llx 0x%llx] to [0x%llx 0x%llx]\n", prange->svms,
+		 old_start, old_last, start, last);
+
+	if (old_start != start && old_last != last)
+		return -EINVAL;
+	if (start < old_start || last > old_last)
+		return -EINVAL;
+
+	svms = prange->svms;
+	if (old_start == start)
+		*new = svm_range_new(svms, last + 1, old_last, false);
+	else
+		*new = svm_range_new(svms, old_start, start - 1, false);
+	if (!*new)
+		return -ENOMEM;
+
+	r = svm_range_split_adjust(*new, prange, start, last);
+	if (r) {
+		pr_debug("failed %d split [0x%llx 0x%llx] to [0x%llx 0x%llx]\n",
+			 r, old_start, old_last, start, last);
+		svm_range_free(*new, false);
+		*new = NULL;
+	}
+
+	return r;
+}
+
+static int
+svm_range_split_tail(struct svm_range *prange,
+		     uint64_t new_last, struct list_head *insert_list)
+{
+	struct svm_range *tail;
+	int r = svm_range_split(prange, prange->start, new_last, &tail);
+
+	if (!r)
+		list_add(&tail->list, insert_list);
+	return r;
+}
+
+static int
+svm_range_split_head(struct svm_range *prange,
+		     uint64_t new_start, struct list_head *insert_list)
+{
+	struct svm_range *head;
+	int r = svm_range_split(prange, new_start, prange->last, &head);
+
+	if (!r)
+		list_add(&head->list, insert_list);
+	return r;
+}
+
+static void
+svm_range_add_child(struct svm_range *prange, struct mm_struct *mm,
+		    struct svm_range *pchild, enum svm_work_list_ops op)
+{
+	pr_debug("add child 0x%p [0x%lx 0x%lx] to prange 0x%p child list %d\n",
+		 pchild, pchild->start, pchild->last, prange, op);
+
+	pchild->work_item.mm = mm;
+	pchild->work_item.op = op;
+	list_add_tail(&pchild->child_list, &prange->child_list);
+}
+
+/**
+ * svm_range_split_by_granularity - collect ranges within granularity boundary
+ *
+ * @p: the process with svms list
+ * @mm: mm structure
+ * @addr: the vm fault address in pages, to split the prange
+ * @parent: parent range if prange is from child list
+ * @prange: prange to split
+ *
+ * Trims @prange to be a single aligned block of prange->granularity if
+ * possible. The head and tail are added to the child_list in @parent.
+ *
+ * Context: caller must hold mmap_read_lock and prange->lock
+ *
+ * Return:
+ * 0 - OK, otherwise error code
+ */
+int
+svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm,
+			       unsigned long addr, struct svm_range *parent,
+			       struct svm_range *prange)
+{
+	struct svm_range *head, *tail;
+	unsigned long start, last, size;
+	int r;
+
+	/* Align splited range start and size to granularity size, then a single
+	 * PTE will be used for whole range, this reduces the number of PTE
+	 * updated and the L1 TLB space used for translation.
+	 */
+	size = 1UL << prange->granularity;
+	start = ALIGN_DOWN(addr, size);
+	last = ALIGN(addr + 1, size) - 1;
+
+	pr_debug("svms 0x%p split [0x%lx 0x%lx] to [0x%lx 0x%lx] size 0x%lx\n",
+		 prange->svms, prange->start, prange->last, start, last, size);
+
+	if (start > prange->start) {
+		r = svm_range_split(prange, start, prange->last, &head);
+		if (r)
+			return r;
+		svm_range_add_child(parent, mm, head, SVM_OP_ADD_RANGE);
+	}
+
+	if (last < prange->last) {
+		r = svm_range_split(prange, prange->start, last, &tail);
+		if (r)
+			return r;
+		svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE);
+	}
+
+	/* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */
+	if (p->xnack_enabled && prange->work_item.op == SVM_OP_ADD_RANGE) {
+		prange->work_item.op = SVM_OP_ADD_RANGE_AND_MAP;
+		pr_debug("change prange 0x%p [0x%lx 0x%lx] op %d\n",
+			 prange, prange->start, prange->last,
+			 SVM_OP_ADD_RANGE_AND_MAP);
+	}
+	return 0;
+}
+static bool
+svm_nodes_in_same_hive(struct kfd_node *node_a, struct kfd_node *node_b)
+{
+	return (node_a->adev == node_b->adev ||
+		amdgpu_xgmi_same_hive(node_a->adev, node_b->adev));
+}
+
+static uint64_t
+svm_range_get_pte_flags(struct kfd_node *node,
+			struct svm_range *prange, int domain)
+{
+	struct kfd_node *bo_node;
+	uint32_t flags = prange->flags;
+	uint32_t mapping_flags = 0;
+	uint64_t pte_flags;
+	bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN);
+	bool coherent = flags & KFD_IOCTL_SVM_FLAG_COHERENT;
+	bool uncached = false; /*flags & KFD_IOCTL_SVM_FLAG_UNCACHED;*/
+	unsigned int mtype_local;
+
+	if (domain == SVM_RANGE_VRAM_DOMAIN)
+		bo_node = prange->svm_bo->node;
+
+	switch (node->adev->ip_versions[GC_HWIP][0]) {
+	case IP_VERSION(9, 4, 1):
+		if (domain == SVM_RANGE_VRAM_DOMAIN) {
+			if (bo_node == node) {
+				mapping_flags |= coherent ?
+					AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
+			} else {
+				mapping_flags |= coherent ?
+					AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+				if (svm_nodes_in_same_hive(node, bo_node))
+					snoop = true;
+			}
+		} else {
+			mapping_flags |= coherent ?
+				AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+		}
+		break;
+	case IP_VERSION(9, 4, 2):
+		if (domain == SVM_RANGE_VRAM_DOMAIN) {
+			if (bo_node == node) {
+				mapping_flags |= coherent ?
+					AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
+				if (node->adev->gmc.xgmi.connected_to_cpu)
+					snoop = true;
+			} else {
+				mapping_flags |= coherent ?
+					AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+				if (svm_nodes_in_same_hive(node, bo_node))
+					snoop = true;
+			}
+		} else {
+			mapping_flags |= coherent ?
+				AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+		}
+		break;
+	case IP_VERSION(9, 4, 3):
+		mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC :
+			     (amdgpu_mtype_local == 2 ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW);
+		snoop = true;
+		if (uncached) {
+			mapping_flags |= AMDGPU_VM_MTYPE_UC;
+		} else if (domain == SVM_RANGE_VRAM_DOMAIN) {
+			/* local HBM region close to partition */
+			if (bo_node->adev == node->adev &&
+			    (!bo_node->xcp || !node->xcp || bo_node->xcp->mem_id == node->xcp->mem_id))
+				mapping_flags |= mtype_local;
+			/* local HBM region far from partition or remote XGMI GPU */
+			else if (svm_nodes_in_same_hive(bo_node, node))
+				mapping_flags |= AMDGPU_VM_MTYPE_NC;
+			/* PCIe P2P */
+			else
+				mapping_flags |= AMDGPU_VM_MTYPE_UC;
+		/* system memory accessed by the APU */
+		} else if (node->adev->flags & AMD_IS_APU) {
+			/* On NUMA systems, locality is determined per-page
+			 * in amdgpu_gmc_override_vm_pte_flags
+			 */
+			if (num_possible_nodes() <= 1)
+				mapping_flags |= mtype_local;
+			else
+				mapping_flags |= AMDGPU_VM_MTYPE_NC;
+		/* system memory accessed by the dGPU */
+		} else {
+			mapping_flags |= AMDGPU_VM_MTYPE_UC;
+		}
+		break;
+	default:
+		mapping_flags |= coherent ?
+			AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
+	}
+
+	mapping_flags |= AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE;
+
+	if (flags & KFD_IOCTL_SVM_FLAG_GPU_RO)
+		mapping_flags &= ~AMDGPU_VM_PAGE_WRITEABLE;
+	if (flags & KFD_IOCTL_SVM_FLAG_GPU_EXEC)
+		mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
+
+	pte_flags = AMDGPU_PTE_VALID;
+	pte_flags |= (domain == SVM_RANGE_VRAM_DOMAIN) ? 0 : AMDGPU_PTE_SYSTEM;
+	pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
+
+	pte_flags |= amdgpu_gem_va_map_flags(node->adev, mapping_flags);
+	return pte_flags;
+}
+
+static int
+svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+			 uint64_t start, uint64_t last,
+			 struct dma_fence **fence)
+{
+	uint64_t init_pte_value = 0;
+
+	pr_debug("[0x%llx 0x%llx]\n", start, last);
+
+	return amdgpu_vm_update_range(adev, vm, false, true, true, NULL, start,
+				      last, init_pte_value, 0, 0, NULL, NULL,
+				      fence);
+}
+
+static int
+svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
+			  unsigned long last, uint32_t trigger)
+{
+	DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
+	struct kfd_process_device *pdd;
+	struct dma_fence *fence = NULL;
+	struct kfd_process *p;
+	uint32_t gpuidx;
+	int r = 0;
+
+	if (!prange->mapped_to_gpu) {
+		pr_debug("prange 0x%p [0x%lx 0x%lx] not mapped to GPU\n",
+			 prange, prange->start, prange->last);
+		return 0;
+	}
+
+	if (prange->start == start && prange->last == last) {
+		pr_debug("unmap svms 0x%p prange 0x%p\n", prange->svms, prange);
+		prange->mapped_to_gpu = false;
+	}
+
+	bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip,
+		  MAX_GPU_INSTANCE);
+	p = container_of(prange->svms, struct kfd_process, svms);
+
+	for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
+		pr_debug("unmap from gpu idx 0x%x\n", gpuidx);
+		pdd = kfd_process_device_from_gpuidx(p, gpuidx);
+		if (!pdd) {
+			pr_debug("failed to find device idx %d\n", gpuidx);
+			return -EINVAL;
+		}
+
+		kfd_smi_event_unmap_from_gpu(pdd->dev, p->lead_thread->pid,
+					     start, last, trigger);
+
+		r = svm_range_unmap_from_gpu(pdd->dev->adev,
+					     drm_priv_to_vm(pdd->drm_priv),
+					     start, last, &fence);
+		if (r)
+			break;
+
+		if (fence) {
+			r = dma_fence_wait(fence, false);
+			dma_fence_put(fence);
+			fence = NULL;
+			if (r)
+				break;
+		}
+		kfd_flush_tlb(pdd, TLB_FLUSH_HEAVYWEIGHT);
+	}
+
+	return r;
+}
+
+static int
+svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
+		     unsigned long offset, unsigned long npages, bool readonly,
+		     dma_addr_t *dma_addr, struct amdgpu_device *bo_adev,
+		     struct dma_fence **fence, bool flush_tlb)
+{
+	struct amdgpu_device *adev = pdd->dev->adev;
+	struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
+	uint64_t pte_flags;
+	unsigned long last_start;
+	int last_domain;
+	int r = 0;
+	int64_t i, j;
+
+	last_start = prange->start + offset;
+
+	pr_debug("svms 0x%p [0x%lx 0x%lx] readonly %d\n", prange->svms,
+		 last_start, last_start + npages - 1, readonly);
+
+	for (i = offset; i < offset + npages; i++) {
+		last_domain = dma_addr[i] & SVM_RANGE_VRAM_DOMAIN;
+		dma_addr[i] &= ~SVM_RANGE_VRAM_DOMAIN;
+
+		/* Collect all pages in the same address range and memory domain
+		 * that can be mapped with a single call to update mapping.
+		 */
+		if (i < offset + npages - 1 &&
+		    last_domain == (dma_addr[i + 1] & SVM_RANGE_VRAM_DOMAIN))
+			continue;
+
+		pr_debug("Mapping range [0x%lx 0x%llx] on domain: %s\n",
+			 last_start, prange->start + i, last_domain ? "GPU" : "CPU");
+
+		pte_flags = svm_range_get_pte_flags(pdd->dev, prange, last_domain);
+		if (readonly)
+			pte_flags &= ~AMDGPU_PTE_WRITEABLE;
+
+		pr_debug("svms 0x%p map [0x%lx 0x%llx] vram %d PTE 0x%llx\n",
+			 prange->svms, last_start, prange->start + i,
+			 (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
+			 pte_flags);
+
+		/* For dGPU mode, we use same vm_manager to allocate VRAM for
+		 * different memory partition based on fpfn/lpfn, we should use
+		 * same vm_manager.vram_base_offset regardless memory partition.
+		 */
+		r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, NULL,
+					   last_start, prange->start + i,
+					   pte_flags,
+					   (last_start - prange->start) << PAGE_SHIFT,
+					   bo_adev ? bo_adev->vm_manager.vram_base_offset : 0,
+					   NULL, dma_addr, &vm->last_update);
+
+		for (j = last_start - prange->start; j <= i; j++)
+			dma_addr[j] |= last_domain;
+
+		if (r) {
+			pr_debug("failed %d to map to gpu 0x%lx\n", r, prange->start);
+			goto out;
+		}
+		last_start = prange->start + i + 1;
+	}
+
+	r = amdgpu_vm_update_pdes(adev, vm, false);
+	if (r) {
+		pr_debug("failed %d to update directories 0x%lx\n", r,
+			 prange->start);
+		goto out;
+	}
+
+	if (fence)
+		*fence = dma_fence_get(vm->last_update);
+
+out:
+	return r;
+}
+
+static int
+svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
+		      unsigned long npages, bool readonly,
+		      unsigned long *bitmap, bool wait, bool flush_tlb)
+{
+	struct kfd_process_device *pdd;
+	struct amdgpu_device *bo_adev = NULL;
+	struct kfd_process *p;
+	struct dma_fence *fence = NULL;
+	uint32_t gpuidx;
+	int r = 0;
+
+	if (prange->svm_bo && prange->ttm_res)
+		bo_adev = prange->svm_bo->node->adev;
+
+	p = container_of(prange->svms, struct kfd_process, svms);
+	for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
+		pr_debug("mapping to gpu idx 0x%x\n", gpuidx);
+		pdd = kfd_process_device_from_gpuidx(p, gpuidx);
+		if (!pdd) {
+			pr_debug("failed to find device idx %d\n", gpuidx);
+			return -EINVAL;
+		}
+
+		pdd = kfd_bind_process_to_device(pdd->dev, p);
+		if (IS_ERR(pdd))
+			return -EINVAL;
+
+		if (bo_adev && pdd->dev->adev != bo_adev &&
+		    !amdgpu_xgmi_same_hive(pdd->dev->adev, bo_adev)) {
+			pr_debug("cannot map to device idx %d\n", gpuidx);
+			continue;
+		}
+
+		r = svm_range_map_to_gpu(pdd, prange, offset, npages, readonly,
+					 prange->dma_addr[gpuidx],
+					 bo_adev, wait ? &fence : NULL,
+					 flush_tlb);
+		if (r)
+			break;
+
+		if (fence) {
+			r = dma_fence_wait(fence, false);
+			dma_fence_put(fence);
+			fence = NULL;
+			if (r) {
+				pr_debug("failed %d to dma fence wait\n", r);
+				break;
+			}
+		}
+
+		kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
+	}
+
+	return r;
+}
+
+struct svm_validate_context {
+	struct kfd_process *process;
+	struct svm_range *prange;
+	bool intr;
+	DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
+	struct drm_exec exec;
+};
+
+static int svm_range_reserve_bos(struct svm_validate_context *ctx, bool intr)
+{
+	struct kfd_process_device *pdd;
+	struct amdgpu_vm *vm;
+	uint32_t gpuidx;
+	int r;
+
+	drm_exec_init(&ctx->exec, intr ? DRM_EXEC_INTERRUPTIBLE_WAIT: 0);
+	drm_exec_until_all_locked(&ctx->exec) {
+		for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
+			pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx);
+			if (!pdd) {
+				pr_debug("failed to find device idx %d\n", gpuidx);
+				r = -EINVAL;
+				goto unreserve_out;
+			}
+			vm = drm_priv_to_vm(pdd->drm_priv);
+
+			r = amdgpu_vm_lock_pd(vm, &ctx->exec, 2);
+			drm_exec_retry_on_contention(&ctx->exec);
+			if (unlikely(r)) {
+				pr_debug("failed %d to reserve bo\n", r);
+				goto unreserve_out;
+			}
+		}
+	}
+
+	for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
+		pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx);
+		if (!pdd) {
+			pr_debug("failed to find device idx %d\n", gpuidx);
+			r = -EINVAL;
+			goto unreserve_out;
+		}
+
+		r = amdgpu_vm_validate_pt_bos(pdd->dev->adev,
+					      drm_priv_to_vm(pdd->drm_priv),
+					      svm_range_bo_validate, NULL);
+		if (r) {
+			pr_debug("failed %d validate pt bos\n", r);
+			goto unreserve_out;
+		}
+	}
+
+	return 0;
+
+unreserve_out:
+	drm_exec_fini(&ctx->exec);
+	return r;
+}
+
+static void svm_range_unreserve_bos(struct svm_validate_context *ctx)
+{
+	drm_exec_fini(&ctx->exec);
+}
+
+static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx)
+{
+	struct kfd_process_device *pdd;
+
+	pdd = kfd_process_device_from_gpuidx(p, gpuidx);
+	if (!pdd)
+		return NULL;
+
+	return SVM_ADEV_PGMAP_OWNER(pdd->dev->adev);
+}
+
+/*
+ * Validation+GPU mapping with concurrent invalidation (MMU notifiers)
+ *
+ * To prevent concurrent destruction or change of range attributes, the
+ * svm_read_lock must be held. The caller must not hold the svm_write_lock
+ * because that would block concurrent evictions and lead to deadlocks. To
+ * serialize concurrent migrations or validations of the same range, the
+ * prange->migrate_mutex must be held.
+ *
+ * For VRAM ranges, the SVM BO must be allocated and valid (protected by its
+ * eviction fence.
+ *
+ * The following sequence ensures race-free validation and GPU mapping:
+ *
+ * 1. Reserve page table (and SVM BO if range is in VRAM)
+ * 2. hmm_range_fault to get page addresses (if system memory)
+ * 3. DMA-map pages (if system memory)
+ * 4-a. Take notifier lock
+ * 4-b. Check that pages still valid (mmu_interval_read_retry)
+ * 4-c. Check that the range was not split or otherwise invalidated
+ * 4-d. Update GPU page table
+ * 4.e. Release notifier lock
+ * 5. Release page table (and SVM BO) reservation
+ */
+static int svm_range_validate_and_map(struct mm_struct *mm,
+				      struct svm_range *prange, int32_t gpuidx,
+				      bool intr, bool wait, bool flush_tlb)
+{
+	struct svm_validate_context *ctx;
+	unsigned long start, end, addr;
+	struct kfd_process *p;
+	void *owner;
+	int32_t idx;
+	int r = 0;
+
+	ctx = kzalloc(sizeof(struct svm_validate_context), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+	ctx->process = container_of(prange->svms, struct kfd_process, svms);
+	ctx->prange = prange;
+	ctx->intr = intr;
+
+	if (gpuidx < MAX_GPU_INSTANCE) {
+		bitmap_zero(ctx->bitmap, MAX_GPU_INSTANCE);
+		bitmap_set(ctx->bitmap, gpuidx, 1);
+	} else if (ctx->process->xnack_enabled) {
+		bitmap_copy(ctx->bitmap, prange->bitmap_aip, MAX_GPU_INSTANCE);
+
+		/* If prefetch range to GPU, or GPU retry fault migrate range to
+		 * GPU, which has ACCESS attribute to the range, create mapping
+		 * on that GPU.
+		 */
+		if (prange->actual_loc) {
+			gpuidx = kfd_process_gpuidx_from_gpuid(ctx->process,
+							prange->actual_loc);
+			if (gpuidx < 0) {
+				WARN_ONCE(1, "failed get device by id 0x%x\n",
+					 prange->actual_loc);
+				r = -EINVAL;
+				goto free_ctx;
+			}
+			if (test_bit(gpuidx, prange->bitmap_access))
+				bitmap_set(ctx->bitmap, gpuidx, 1);
+		}
+
+		/*
+		 * If prange is already mapped or with always mapped flag,
+		 * update mapping on GPUs with ACCESS attribute
+		 */
+		if (bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) {
+			if (prange->mapped_to_gpu ||
+			    prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)
+				bitmap_copy(ctx->bitmap, prange->bitmap_access, MAX_GPU_INSTANCE);
+		}
+	} else {
+		bitmap_or(ctx->bitmap, prange->bitmap_access,
+			  prange->bitmap_aip, MAX_GPU_INSTANCE);
+	}
+
+	if (bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) {
+		r = 0;
+		goto free_ctx;
+	}
+
+	if (prange->actual_loc && !prange->ttm_res) {
+		/* This should never happen. actual_loc gets set by
+		 * svm_migrate_ram_to_vram after allocating a BO.
+		 */
+		WARN_ONCE(1, "VRAM BO missing during validation\n");
+		r = -EINVAL;
+		goto free_ctx;
+	}
+
+	svm_range_reserve_bos(ctx, intr);
+
+	p = container_of(prange->svms, struct kfd_process, svms);
+	owner = kfd_svm_page_owner(p, find_first_bit(ctx->bitmap,
+						MAX_GPU_INSTANCE));
+	for_each_set_bit(idx, ctx->bitmap, MAX_GPU_INSTANCE) {
+		if (kfd_svm_page_owner(p, idx) != owner) {
+			owner = NULL;
+			break;
+		}
+	}
+
+	start = prange->start << PAGE_SHIFT;
+	end = (prange->last + 1) << PAGE_SHIFT;
+	for (addr = start; !r && addr < end; ) {
+		struct hmm_range *hmm_range;
+		struct vm_area_struct *vma;
+		unsigned long next = 0;
+		unsigned long offset;
+		unsigned long npages;
+		bool readonly;
+
+		vma = vma_lookup(mm, addr);
+		if (vma) {
+			readonly = !(vma->vm_flags & VM_WRITE);
+
+			next = min(vma->vm_end, end);
+			npages = (next - addr) >> PAGE_SHIFT;
+			WRITE_ONCE(p->svms.faulting_task, current);
+			r = amdgpu_hmm_range_get_pages(&prange->notifier, addr, npages,
+						       readonly, owner, NULL,
+						       &hmm_range);
+			WRITE_ONCE(p->svms.faulting_task, NULL);
+			if (r) {
+				pr_debug("failed %d to get svm range pages\n", r);
+				if (r == -EBUSY)
+					r = -EAGAIN;
+			}
+		} else {
+			r = -EFAULT;
+		}
+
+		if (!r) {
+			offset = (addr - start) >> PAGE_SHIFT;
+			r = svm_range_dma_map(prange, ctx->bitmap, offset, npages,
+					      hmm_range->hmm_pfns);
+			if (r)
+				pr_debug("failed %d to dma map range\n", r);
+		}
+
+		svm_range_lock(prange);
+		if (!r && amdgpu_hmm_range_get_pages_done(hmm_range)) {
+			pr_debug("hmm update the range, need validate again\n");
+			r = -EAGAIN;
+		}
+
+		if (!r && !list_empty(&prange->child_list)) {
+			pr_debug("range split by unmap in parallel, validate again\n");
+			r = -EAGAIN;
+		}
+
+		if (!r)
+			r = svm_range_map_to_gpus(prange, offset, npages, readonly,
+						  ctx->bitmap, wait, flush_tlb);
+
+		if (!r && next == end)
+			prange->mapped_to_gpu = true;
+
+		svm_range_unlock(prange);
+
+		addr = next;
+	}
+
+	svm_range_unreserve_bos(ctx);
+	if (!r)
+		prange->validate_timestamp = ktime_get_boottime();
+
+free_ctx:
+	kfree(ctx);
+
+	return r;
+}
+
+/**
+ * svm_range_list_lock_and_flush_work - flush pending deferred work
+ *
+ * @svms: the svm range list
+ * @mm: the mm structure
+ *
+ * Context: Returns with mmap write lock held, pending deferred work flushed
+ *
+ */
+void
+svm_range_list_lock_and_flush_work(struct svm_range_list *svms,
+				   struct mm_struct *mm)
+{
+retry_flush_work:
+	flush_work(&svms->deferred_list_work);
+	mmap_write_lock(mm);
+
+	if (list_empty(&svms->deferred_range_list))
+		return;
+	mmap_write_unlock(mm);
+	pr_debug("retry flush\n");
+	goto retry_flush_work;
+}
+
+static void svm_range_restore_work(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct amdkfd_process_info *process_info;
+	struct svm_range_list *svms;
+	struct svm_range *prange;
+	struct kfd_process *p;
+	struct mm_struct *mm;
+	int evicted_ranges;
+	int invalid;
+	int r;
+
+	svms = container_of(dwork, struct svm_range_list, restore_work);
+	evicted_ranges = atomic_read(&svms->evicted_ranges);
+	if (!evicted_ranges)
+		return;
+
+	pr_debug("restore svm ranges\n");
+
+	p = container_of(svms, struct kfd_process, svms);
+	process_info = p->kgd_process_info;
+
+	/* Keep mm reference when svm_range_validate_and_map ranges */
+	mm = get_task_mm(p->lead_thread);
+	if (!mm) {
+		pr_debug("svms 0x%p process mm gone\n", svms);
+		return;
+	}
+
+	mutex_lock(&process_info->lock);
+	svm_range_list_lock_and_flush_work(svms, mm);
+	mutex_lock(&svms->lock);
+
+	evicted_ranges = atomic_read(&svms->evicted_ranges);
+
+	list_for_each_entry(prange, &svms->list, list) {
+		invalid = atomic_read(&prange->invalid);
+		if (!invalid)
+			continue;
+
+		pr_debug("restoring svms 0x%p prange 0x%p [0x%lx %lx] inv %d\n",
+			 prange->svms, prange, prange->start, prange->last,
+			 invalid);
+
+		/*
+		 * If range is migrating, wait for migration is done.
+		 */
+		mutex_lock(&prange->migrate_mutex);
+
+		r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
+					       false, true, false);
+		if (r)
+			pr_debug("failed %d to map 0x%lx to gpus\n", r,
+				 prange->start);
+
+		mutex_unlock(&prange->migrate_mutex);
+		if (r)
+			goto out_reschedule;
+
+		if (atomic_cmpxchg(&prange->invalid, invalid, 0) != invalid)
+			goto out_reschedule;
+	}
+
+	if (atomic_cmpxchg(&svms->evicted_ranges, evicted_ranges, 0) !=
+	    evicted_ranges)
+		goto out_reschedule;
+
+	evicted_ranges = 0;
+
+	r = kgd2kfd_resume_mm(mm);
+	if (r) {
+		/* No recovery from this failure. Probably the CP is
+		 * hanging. No point trying again.
+		 */
+		pr_debug("failed %d to resume KFD\n", r);
+	}
+
+	pr_debug("restore svm ranges successfully\n");
+
+out_reschedule:
+	mutex_unlock(&svms->lock);
+	mmap_write_unlock(mm);
+	mutex_unlock(&process_info->lock);
+
+	/* If validation failed, reschedule another attempt */
+	if (evicted_ranges) {
+		pr_debug("reschedule to restore svm range\n");
+		schedule_delayed_work(&svms->restore_work,
+			msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
+
+		kfd_smi_event_queue_restore_rescheduled(mm);
+	}
+	mmput(mm);
+}
+
+/**
+ * svm_range_evict - evict svm range
+ * @prange: svm range structure
+ * @mm: current process mm_struct
+ * @start: starting process queue number
+ * @last: last process queue number
+ * @event: mmu notifier event when range is evicted or migrated
+ *
+ * Stop all queues of the process to ensure GPU doesn't access the memory, then
+ * return to let CPU evict the buffer and proceed CPU pagetable update.
+ *
+ * Don't need use lock to sync cpu pagetable invalidation with GPU execution.
+ * If invalidation happens while restore work is running, restore work will
+ * restart to ensure to get the latest CPU pages mapping to GPU, then start
+ * the queues.
+ */
+static int
+svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
+		unsigned long start, unsigned long last,
+		enum mmu_notifier_event event)
+{
+	struct svm_range_list *svms = prange->svms;
+	struct svm_range *pchild;
+	struct kfd_process *p;
+	int r = 0;
+
+	p = container_of(svms, struct kfd_process, svms);
+
+	pr_debug("invalidate svms 0x%p prange [0x%lx 0x%lx] [0x%lx 0x%lx]\n",
+		 svms, prange->start, prange->last, start, last);
+
+	if (!p->xnack_enabled ||
+	    (prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)) {
+		int evicted_ranges;
+		bool mapped = prange->mapped_to_gpu;
+
+		list_for_each_entry(pchild, &prange->child_list, child_list) {
+			if (!pchild->mapped_to_gpu)
+				continue;
+			mapped = true;
+			mutex_lock_nested(&pchild->lock, 1);
+			if (pchild->start <= last && pchild->last >= start) {
+				pr_debug("increment pchild invalid [0x%lx 0x%lx]\n",
+					 pchild->start, pchild->last);
+				atomic_inc(&pchild->invalid);
+			}
+			mutex_unlock(&pchild->lock);
+		}
+
+		if (!mapped)
+			return r;
+
+		if (prange->start <= last && prange->last >= start)
+			atomic_inc(&prange->invalid);
+
+		evicted_ranges = atomic_inc_return(&svms->evicted_ranges);
+		if (evicted_ranges != 1)
+			return r;
+
+		pr_debug("evicting svms 0x%p range [0x%lx 0x%lx]\n",
+			 prange->svms, prange->start, prange->last);
+
+		/* First eviction, stop the queues */
+		r = kgd2kfd_quiesce_mm(mm, KFD_QUEUE_EVICTION_TRIGGER_SVM);
+		if (r)
+			pr_debug("failed to quiesce KFD\n");
+
+		pr_debug("schedule to restore svm %p ranges\n", svms);
+		schedule_delayed_work(&svms->restore_work,
+			msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
+	} else {
+		unsigned long s, l;
+		uint32_t trigger;
+
+		if (event == MMU_NOTIFY_MIGRATE)
+			trigger = KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE;
+		else
+			trigger = KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY;
+
+		pr_debug("invalidate unmap svms 0x%p [0x%lx 0x%lx] from GPUs\n",
+			 prange->svms, start, last);
+		list_for_each_entry(pchild, &prange->child_list, child_list) {
+			mutex_lock_nested(&pchild->lock, 1);
+			s = max(start, pchild->start);
+			l = min(last, pchild->last);
+			if (l >= s)
+				svm_range_unmap_from_gpus(pchild, s, l, trigger);
+			mutex_unlock(&pchild->lock);
+		}
+		s = max(start, prange->start);
+		l = min(last, prange->last);
+		if (l >= s)
+			svm_range_unmap_from_gpus(prange, s, l, trigger);
+	}
+
+	return r;
+}
+
+static struct svm_range *svm_range_clone(struct svm_range *old)
+{
+	struct svm_range *new;
+
+	new = svm_range_new(old->svms, old->start, old->last, false);
+	if (!new)
+		return NULL;
+	if (svm_range_copy_dma_addrs(new, old)) {
+		svm_range_free(new, false);
+		return NULL;
+	}
+	if (old->svm_bo) {
+		new->ttm_res = old->ttm_res;
+		new->offset = old->offset;
+		new->svm_bo = svm_range_bo_ref(old->svm_bo);
+		spin_lock(&new->svm_bo->list_lock);
+		list_add(&new->svm_bo_list, &new->svm_bo->range_list);
+		spin_unlock(&new->svm_bo->list_lock);
+	}
+	new->flags = old->flags;
+	new->preferred_loc = old->preferred_loc;
+	new->prefetch_loc = old->prefetch_loc;
+	new->actual_loc = old->actual_loc;
+	new->granularity = old->granularity;
+	new->mapped_to_gpu = old->mapped_to_gpu;
+	bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
+	bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE);
+
+	return new;
+}
+
+void svm_range_set_max_pages(struct amdgpu_device *adev)
+{
+	uint64_t max_pages;
+	uint64_t pages, _pages;
+	uint64_t min_pages = 0;
+	int i, id;
+
+	for (i = 0; i < adev->kfd.dev->num_nodes; i++) {
+		if (adev->kfd.dev->nodes[i]->xcp)
+			id = adev->kfd.dev->nodes[i]->xcp->id;
+		else
+			id = -1;
+		pages = KFD_XCP_MEMORY_SIZE(adev, id) >> 17;
+		pages = clamp(pages, 1ULL << 9, 1ULL << 18);
+		pages = rounddown_pow_of_two(pages);
+		min_pages = min_not_zero(min_pages, pages);
+	}
+
+	do {
+		max_pages = READ_ONCE(max_svm_range_pages);
+		_pages = min_not_zero(max_pages, min_pages);
+	} while (cmpxchg(&max_svm_range_pages, max_pages, _pages) != max_pages);
+}
+
+static int
+svm_range_split_new(struct svm_range_list *svms, uint64_t start, uint64_t last,
+		    uint64_t max_pages, struct list_head *insert_list,
+		    struct list_head *update_list)
+{
+	struct svm_range *prange;
+	uint64_t l;
+
+	pr_debug("max_svm_range_pages 0x%llx adding [0x%llx 0x%llx]\n",
+		 max_pages, start, last);
+
+	while (last >= start) {
+		l = min(last, ALIGN_DOWN(start + max_pages, max_pages) - 1);
+
+		prange = svm_range_new(svms, start, l, true);
+		if (!prange)
+			return -ENOMEM;
+		list_add(&prange->list, insert_list);
+		list_add(&prange->update_list, update_list);
+
+		start = l + 1;
+	}
+	return 0;
+}
+
+/**
+ * svm_range_add - add svm range and handle overlap
+ * @p: the range add to this process svms
+ * @start: page size aligned
+ * @size: page size aligned
+ * @nattr: number of attributes
+ * @attrs: array of attributes
+ * @update_list: output, the ranges need validate and update GPU mapping
+ * @insert_list: output, the ranges need insert to svms
+ * @remove_list: output, the ranges are replaced and need remove from svms
+ *
+ * Check if the virtual address range has overlap with any existing ranges,
+ * split partly overlapping ranges and add new ranges in the gaps. All changes
+ * should be applied to the range_list and interval tree transactionally. If
+ * any range split or allocation fails, the entire update fails. Therefore any
+ * existing overlapping svm_ranges are cloned and the original svm_ranges left
+ * unchanged.
+ *
+ * If the transaction succeeds, the caller can update and insert clones and
+ * new ranges, then free the originals.
+ *
+ * Otherwise the caller can free the clones and new ranges, while the old
+ * svm_ranges remain unchanged.
+ *
+ * Context: Process context, caller must hold svms->lock
+ *
+ * Return:
+ * 0 - OK, otherwise error code
+ */
+static int
+svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
+	      uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs,
+	      struct list_head *update_list, struct list_head *insert_list,
+	      struct list_head *remove_list)
+{
+	unsigned long last = start + size - 1UL;
+	struct svm_range_list *svms = &p->svms;
+	struct interval_tree_node *node;
+	struct svm_range *prange;
+	struct svm_range *tmp;
+	struct list_head new_list;
+	int r = 0;
+
+	pr_debug("svms 0x%p [0x%llx 0x%lx]\n", &p->svms, start, last);
+
+	INIT_LIST_HEAD(update_list);
+	INIT_LIST_HEAD(insert_list);
+	INIT_LIST_HEAD(remove_list);
+	INIT_LIST_HEAD(&new_list);
+
+	node = interval_tree_iter_first(&svms->objects, start, last);
+	while (node) {
+		struct interval_tree_node *next;
+		unsigned long next_start;
+
+		pr_debug("found overlap node [0x%lx 0x%lx]\n", node->start,
+			 node->last);
+
+		prange = container_of(node, struct svm_range, it_node);
+		next = interval_tree_iter_next(node, start, last);
+		next_start = min(node->last, last) + 1;
+
+		if (svm_range_is_same_attrs(p, prange, nattr, attrs) &&
+		    prange->mapped_to_gpu) {
+			/* nothing to do */
+		} else if (node->start < start || node->last > last) {
+			/* node intersects the update range and its attributes
+			 * will change. Clone and split it, apply updates only
+			 * to the overlapping part
+			 */
+			struct svm_range *old = prange;
+
+			prange = svm_range_clone(old);
+			if (!prange) {
+				r = -ENOMEM;
+				goto out;
+			}
+
+			list_add(&old->update_list, remove_list);
+			list_add(&prange->list, insert_list);
+			list_add(&prange->update_list, update_list);
+
+			if (node->start < start) {
+				pr_debug("change old range start\n");
+				r = svm_range_split_head(prange, start,
+							 insert_list);
+				if (r)
+					goto out;
+			}
+			if (node->last > last) {
+				pr_debug("change old range last\n");
+				r = svm_range_split_tail(prange, last,
+							 insert_list);
+				if (r)
+					goto out;
+			}
+		} else {
+			/* The node is contained within start..last,
+			 * just update it
+			 */
+			list_add(&prange->update_list, update_list);
+		}
+
+		/* insert a new node if needed */
+		if (node->start > start) {
+			r = svm_range_split_new(svms, start, node->start - 1,
+						READ_ONCE(max_svm_range_pages),
+						&new_list, update_list);
+			if (r)
+				goto out;
+		}
+
+		node = next;
+		start = next_start;
+	}
+
+	/* add a final range at the end if needed */
+	if (start <= last)
+		r = svm_range_split_new(svms, start, last,
+					READ_ONCE(max_svm_range_pages),
+					&new_list, update_list);
+
+out:
+	if (r) {
+		list_for_each_entry_safe(prange, tmp, insert_list, list)
+			svm_range_free(prange, false);
+		list_for_each_entry_safe(prange, tmp, &new_list, list)
+			svm_range_free(prange, true);
+	} else {
+		list_splice(&new_list, insert_list);
+	}
+
+	return r;
+}
+
+static void
+svm_range_update_notifier_and_interval_tree(struct mm_struct *mm,
+					    struct svm_range *prange)
+{
+	unsigned long start;
+	unsigned long last;
+
+	start = prange->notifier.interval_tree.start >> PAGE_SHIFT;
+	last = prange->notifier.interval_tree.last >> PAGE_SHIFT;
+
+	if (prange->start == start && prange->last == last)
+		return;
+
+	pr_debug("up notifier 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n",
+		  prange->svms, prange, start, last, prange->start,
+		  prange->last);
+
+	if (start != 0 && last != 0) {
+		interval_tree_remove(&prange->it_node, &prange->svms->objects);
+		svm_range_remove_notifier(prange);
+	}
+	prange->it_node.start = prange->start;
+	prange->it_node.last = prange->last;
+
+	interval_tree_insert(&prange->it_node, &prange->svms->objects);
+	svm_range_add_notifier_locked(mm, prange);
+}
+
+static void
+svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange,
+			 struct mm_struct *mm)
+{
+	switch (prange->work_item.op) {
+	case SVM_OP_NULL:
+		pr_debug("NULL OP 0x%p prange 0x%p [0x%lx 0x%lx]\n",
+			 svms, prange, prange->start, prange->last);
+		break;
+	case SVM_OP_UNMAP_RANGE:
+		pr_debug("remove 0x%p prange 0x%p [0x%lx 0x%lx]\n",
+			 svms, prange, prange->start, prange->last);
+		svm_range_unlink(prange);
+		svm_range_remove_notifier(prange);
+		svm_range_free(prange, true);
+		break;
+	case SVM_OP_UPDATE_RANGE_NOTIFIER:
+		pr_debug("update notifier 0x%p prange 0x%p [0x%lx 0x%lx]\n",
+			 svms, prange, prange->start, prange->last);
+		svm_range_update_notifier_and_interval_tree(mm, prange);
+		break;
+	case SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP:
+		pr_debug("update and map 0x%p prange 0x%p [0x%lx 0x%lx]\n",
+			 svms, prange, prange->start, prange->last);
+		svm_range_update_notifier_and_interval_tree(mm, prange);
+		/* TODO: implement deferred validation and mapping */
+		break;
+	case SVM_OP_ADD_RANGE:
+		pr_debug("add 0x%p prange 0x%p [0x%lx 0x%lx]\n", svms, prange,
+			 prange->start, prange->last);
+		svm_range_add_to_svms(prange);
+		svm_range_add_notifier_locked(mm, prange);
+		break;
+	case SVM_OP_ADD_RANGE_AND_MAP:
+		pr_debug("add and map 0x%p prange 0x%p [0x%lx 0x%lx]\n", svms,
+			 prange, prange->start, prange->last);
+		svm_range_add_to_svms(prange);
+		svm_range_add_notifier_locked(mm, prange);
+		/* TODO: implement deferred validation and mapping */
+		break;
+	default:
+		WARN_ONCE(1, "Unknown prange 0x%p work op %d\n", prange,
+			 prange->work_item.op);
+	}
+}
+
+static void svm_range_drain_retry_fault(struct svm_range_list *svms)
+{
+	struct kfd_process_device *pdd;
+	struct kfd_process *p;
+	int drain;
+	uint32_t i;
+
+	p = container_of(svms, struct kfd_process, svms);
+
+restart:
+	drain = atomic_read(&svms->drain_pagefaults);
+	if (!drain)
+		return;
+
+	for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) {
+		pdd = p->pdds[i];
+		if (!pdd)
+			continue;
+
+		pr_debug("drain retry fault gpu %d svms %p\n", i, svms);
+
+		amdgpu_ih_wait_on_checkpoint_process_ts(pdd->dev->adev,
+				pdd->dev->adev->irq.retry_cam_enabled ?
+				&pdd->dev->adev->irq.ih :
+				&pdd->dev->adev->irq.ih1);
+
+		if (pdd->dev->adev->irq.retry_cam_enabled)
+			amdgpu_ih_wait_on_checkpoint_process_ts(pdd->dev->adev,
+				&pdd->dev->adev->irq.ih_soft);
+
+
+		pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms);
+	}
+	if (atomic_cmpxchg(&svms->drain_pagefaults, drain, 0) != drain)
+		goto restart;
+}
+
+static void svm_range_deferred_list_work(struct work_struct *work)
+{
+	struct svm_range_list *svms;
+	struct svm_range *prange;
+	struct mm_struct *mm;
+
+	svms = container_of(work, struct svm_range_list, deferred_list_work);
+	pr_debug("enter svms 0x%p\n", svms);
+
+	spin_lock(&svms->deferred_list_lock);
+	while (!list_empty(&svms->deferred_range_list)) {
+		prange = list_first_entry(&svms->deferred_range_list,
+					  struct svm_range, deferred_list);
+		spin_unlock(&svms->deferred_list_lock);
+
+		pr_debug("prange 0x%p [0x%lx 0x%lx] op %d\n", prange,
+			 prange->start, prange->last, prange->work_item.op);
+
+		mm = prange->work_item.mm;
+retry:
+		mmap_write_lock(mm);
+
+		/* Checking for the need to drain retry faults must be inside
+		 * mmap write lock to serialize with munmap notifiers.
+		 */
+		if (unlikely(atomic_read(&svms->drain_pagefaults))) {
+			mmap_write_unlock(mm);
+			svm_range_drain_retry_fault(svms);
+			goto retry;
+		}
+
+		/* Remove from deferred_list must be inside mmap write lock, for
+		 * two race cases:
+		 * 1. unmap_from_cpu may change work_item.op and add the range
+		 *    to deferred_list again, cause use after free bug.
+		 * 2. svm_range_list_lock_and_flush_work may hold mmap write
+		 *    lock and continue because deferred_list is empty, but
+		 *    deferred_list work is actually waiting for mmap lock.
+		 */
+		spin_lock(&svms->deferred_list_lock);
+		list_del_init(&prange->deferred_list);
+		spin_unlock(&svms->deferred_list_lock);
+
+		mutex_lock(&svms->lock);
+		mutex_lock(&prange->migrate_mutex);
+		while (!list_empty(&prange->child_list)) {
+			struct svm_range *pchild;
+
+			pchild = list_first_entry(&prange->child_list,
+						struct svm_range, child_list);
+			pr_debug("child prange 0x%p op %d\n", pchild,
+				 pchild->work_item.op);
+			list_del_init(&pchild->child_list);
+			svm_range_handle_list_op(svms, pchild, mm);
+		}
+		mutex_unlock(&prange->migrate_mutex);
+
+		svm_range_handle_list_op(svms, prange, mm);
+		mutex_unlock(&svms->lock);
+		mmap_write_unlock(mm);
+
+		/* Pairs with mmget in svm_range_add_list_work */
+		mmput(mm);
+
+		spin_lock(&svms->deferred_list_lock);
+	}
+	spin_unlock(&svms->deferred_list_lock);
+	pr_debug("exit svms 0x%p\n", svms);
+}
+
+void
+svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange,
+			struct mm_struct *mm, enum svm_work_list_ops op)
+{
+	spin_lock(&svms->deferred_list_lock);
+	/* if prange is on the deferred list */
+	if (!list_empty(&prange->deferred_list)) {
+		pr_debug("update exist prange 0x%p work op %d\n", prange, op);
+		WARN_ONCE(prange->work_item.mm != mm, "unmatch mm\n");
+		if (op != SVM_OP_NULL &&
+		    prange->work_item.op != SVM_OP_UNMAP_RANGE)
+			prange->work_item.op = op;
+	} else {
+		prange->work_item.op = op;
+
+		/* Pairs with mmput in deferred_list_work */
+		mmget(mm);
+		prange->work_item.mm = mm;
+		list_add_tail(&prange->deferred_list,
+			      &prange->svms->deferred_range_list);
+		pr_debug("add prange 0x%p [0x%lx 0x%lx] to work list op %d\n",
+			 prange, prange->start, prange->last, op);
+	}
+	spin_unlock(&svms->deferred_list_lock);
+}
+
+void schedule_deferred_list_work(struct svm_range_list *svms)
+{
+	spin_lock(&svms->deferred_list_lock);
+	if (!list_empty(&svms->deferred_range_list))
+		schedule_work(&svms->deferred_list_work);
+	spin_unlock(&svms->deferred_list_lock);
+}
+
+static void
+svm_range_unmap_split(struct mm_struct *mm, struct svm_range *parent,
+		      struct svm_range *prange, unsigned long start,
+		      unsigned long last)
+{
+	struct svm_range *head;
+	struct svm_range *tail;
+
+	if (prange->work_item.op == SVM_OP_UNMAP_RANGE) {
+		pr_debug("prange 0x%p [0x%lx 0x%lx] is already freed\n", prange,
+			 prange->start, prange->last);
+		return;
+	}
+	if (start > prange->last || last < prange->start)
+		return;
+
+	head = tail = prange;
+	if (start > prange->start)
+		svm_range_split(prange, prange->start, start - 1, &tail);
+	if (last < tail->last)
+		svm_range_split(tail, last + 1, tail->last, &head);
+
+	if (head != prange && tail != prange) {
+		svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE);
+		svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE);
+	} else if (tail != prange) {
+		svm_range_add_child(parent, mm, tail, SVM_OP_UNMAP_RANGE);
+	} else if (head != prange) {
+		svm_range_add_child(parent, mm, head, SVM_OP_UNMAP_RANGE);
+	} else if (parent != prange) {
+		prange->work_item.op = SVM_OP_UNMAP_RANGE;
+	}
+}
+
+static void
+svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
+			 unsigned long start, unsigned long last)
+{
+	uint32_t trigger = KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU;
+	struct svm_range_list *svms;
+	struct svm_range *pchild;
+	struct kfd_process *p;
+	unsigned long s, l;
+	bool unmap_parent;
+
+	p = kfd_lookup_process_by_mm(mm);
+	if (!p)
+		return;
+	svms = &p->svms;
+
+	pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n", svms,
+		 prange, prange->start, prange->last, start, last);
+
+	/* Make sure pending page faults are drained in the deferred worker
+	 * before the range is freed to avoid straggler interrupts on
+	 * unmapped memory causing "phantom faults".
+	 */
+	atomic_inc(&svms->drain_pagefaults);
+
+	unmap_parent = start <= prange->start && last >= prange->last;
+
+	list_for_each_entry(pchild, &prange->child_list, child_list) {
+		mutex_lock_nested(&pchild->lock, 1);
+		s = max(start, pchild->start);
+		l = min(last, pchild->last);
+		if (l >= s)
+			svm_range_unmap_from_gpus(pchild, s, l, trigger);
+		svm_range_unmap_split(mm, prange, pchild, start, last);
+		mutex_unlock(&pchild->lock);
+	}
+	s = max(start, prange->start);
+	l = min(last, prange->last);
+	if (l >= s)
+		svm_range_unmap_from_gpus(prange, s, l, trigger);
+	svm_range_unmap_split(mm, prange, prange, start, last);
+
+	if (unmap_parent)
+		svm_range_add_list_work(svms, prange, mm, SVM_OP_UNMAP_RANGE);
+	else
+		svm_range_add_list_work(svms, prange, mm,
+					SVM_OP_UPDATE_RANGE_NOTIFIER);
+	schedule_deferred_list_work(svms);
+
+	kfd_unref_process(p);
+}
+
+/**
+ * svm_range_cpu_invalidate_pagetables - interval notifier callback
+ * @mni: mmu_interval_notifier struct
+ * @range: mmu_notifier_range struct
+ * @cur_seq: value to pass to mmu_interval_set_seq()
+ *
+ * If event is MMU_NOTIFY_UNMAP, this is from CPU unmap range, otherwise, it
+ * is from migration, or CPU page invalidation callback.
+ *
+ * For unmap event, unmap range from GPUs, remove prange from svms in a delayed
+ * work thread, and split prange if only part of prange is unmapped.
+ *
+ * For invalidation event, if GPU retry fault is not enabled, evict the queues,
+ * then schedule svm_range_restore_work to update GPU mapping and resume queues.
+ * If GPU retry fault is enabled, unmap the svm range from GPU, retry fault will
+ * update GPU mapping to recover.
+ *
+ * Context: mmap lock, notifier_invalidate_start lock are held
+ *          for invalidate event, prange lock is held if this is from migration
+ */
+static bool
+svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
+				    const struct mmu_notifier_range *range,
+				    unsigned long cur_seq)
+{
+	struct svm_range *prange;
+	unsigned long start;
+	unsigned long last;
+
+	if (range->event == MMU_NOTIFY_RELEASE)
+		return true;
+	if (!mmget_not_zero(mni->mm))
+		return true;
+
+	start = mni->interval_tree.start;
+	last = mni->interval_tree.last;
+	start = max(start, range->start) >> PAGE_SHIFT;
+	last = min(last, range->end - 1) >> PAGE_SHIFT;
+	pr_debug("[0x%lx 0x%lx] range[0x%lx 0x%lx] notifier[0x%lx 0x%lx] %d\n",
+		 start, last, range->start >> PAGE_SHIFT,
+		 (range->end - 1) >> PAGE_SHIFT,
+		 mni->interval_tree.start >> PAGE_SHIFT,
+		 mni->interval_tree.last >> PAGE_SHIFT, range->event);
+
+	prange = container_of(mni, struct svm_range, notifier);
+
+	svm_range_lock(prange);
+	mmu_interval_set_seq(mni, cur_seq);
+
+	switch (range->event) {
+	case MMU_NOTIFY_UNMAP:
+		svm_range_unmap_from_cpu(mni->mm, prange, start, last);
+		break;
+	default:
+		svm_range_evict(prange, mni->mm, start, last, range->event);
+		break;
+	}
+
+	svm_range_unlock(prange);
+	mmput(mni->mm);
+
+	return true;
+}
+
+/**
+ * svm_range_from_addr - find svm range from fault address
+ * @svms: svm range list header
+ * @addr: address to search range interval tree, in pages
+ * @parent: parent range if range is on child list
+ *
+ * Context: The caller must hold svms->lock
+ *
+ * Return: the svm_range found or NULL
+ */
+struct svm_range *
+svm_range_from_addr(struct svm_range_list *svms, unsigned long addr,
+		    struct svm_range **parent)
+{
+	struct interval_tree_node *node;
+	struct svm_range *prange;
+	struct svm_range *pchild;
+
+	node = interval_tree_iter_first(&svms->objects, addr, addr);
+	if (!node)
+		return NULL;
+
+	prange = container_of(node, struct svm_range, it_node);
+	pr_debug("address 0x%lx prange [0x%lx 0x%lx] node [0x%lx 0x%lx]\n",
+		 addr, prange->start, prange->last, node->start, node->last);
+
+	if (addr >= prange->start && addr <= prange->last) {
+		if (parent)
+			*parent = prange;
+		return prange;
+	}
+	list_for_each_entry(pchild, &prange->child_list, child_list)
+		if (addr >= pchild->start && addr <= pchild->last) {
+			pr_debug("found address 0x%lx pchild [0x%lx 0x%lx]\n",
+				 addr, pchild->start, pchild->last);
+			if (parent)
+				*parent = prange;
+			return pchild;
+		}
+
+	return NULL;
+}
+
+/* svm_range_best_restore_location - decide the best fault restore location
+ * @prange: svm range structure
+ * @adev: the GPU on which vm fault happened
+ *
+ * This is only called when xnack is on, to decide the best location to restore
+ * the range mapping after GPU vm fault. Caller uses the best location to do
+ * migration if actual loc is not best location, then update GPU page table
+ * mapping to the best location.
+ *
+ * If the preferred loc is accessible by faulting GPU, use preferred loc.
+ * If vm fault gpu idx is on range ACCESSIBLE bitmap, best_loc is vm fault gpu
+ * If vm fault gpu idx is on range ACCESSIBLE_IN_PLACE bitmap, then
+ *    if range actual loc is cpu, best_loc is cpu
+ *    if vm fault gpu is on xgmi same hive of range actual loc gpu, best_loc is
+ *    range actual loc.
+ * Otherwise, GPU no access, best_loc is -1.
+ *
+ * Return:
+ * -1 means vm fault GPU no access
+ * 0 for CPU or GPU id
+ */
+static int32_t
+svm_range_best_restore_location(struct svm_range *prange,
+				struct kfd_node *node,
+				int32_t *gpuidx)
+{
+	struct kfd_node *bo_node, *preferred_node;
+	struct kfd_process *p;
+	uint32_t gpuid;
+	int r;
+
+	p = container_of(prange->svms, struct kfd_process, svms);
+
+	r = kfd_process_gpuid_from_node(p, node, &gpuid, gpuidx);
+	if (r < 0) {
+		pr_debug("failed to get gpuid from kgd\n");
+		return -1;
+	}
+
+	if (node->adev->gmc.is_app_apu)
+		return 0;
+
+	if (prange->preferred_loc == gpuid ||
+	    prange->preferred_loc == KFD_IOCTL_SVM_LOCATION_SYSMEM) {
+		return prange->preferred_loc;
+	} else if (prange->preferred_loc != KFD_IOCTL_SVM_LOCATION_UNDEFINED) {
+		preferred_node = svm_range_get_node_by_id(prange, prange->preferred_loc);
+		if (preferred_node && svm_nodes_in_same_hive(node, preferred_node))
+			return prange->preferred_loc;
+		/* fall through */
+	}
+
+	if (test_bit(*gpuidx, prange->bitmap_access))
+		return gpuid;
+
+	if (test_bit(*gpuidx, prange->bitmap_aip)) {
+		if (!prange->actual_loc)
+			return 0;
+
+		bo_node = svm_range_get_node_by_id(prange, prange->actual_loc);
+		if (bo_node && svm_nodes_in_same_hive(node, bo_node))
+			return prange->actual_loc;
+		else
+			return 0;
+	}
+
+	return -1;
+}
+
+static int
+svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
+			       unsigned long *start, unsigned long *last,
+			       bool *is_heap_stack)
+{
+	struct vm_area_struct *vma;
+	struct interval_tree_node *node;
+	unsigned long start_limit, end_limit;
+
+	vma = vma_lookup(p->mm, addr << PAGE_SHIFT);
+	if (!vma) {
+		pr_debug("VMA does not exist in address [0x%llx]\n", addr);
+		return -EFAULT;
+	}
+
+	*is_heap_stack = vma_is_initial_heap(vma) || vma_is_initial_stack(vma);
+
+	start_limit = max(vma->vm_start >> PAGE_SHIFT,
+		      (unsigned long)ALIGN_DOWN(addr, 2UL << 8));
+	end_limit = min(vma->vm_end >> PAGE_SHIFT,
+		    (unsigned long)ALIGN(addr + 1, 2UL << 8));
+	/* First range that starts after the fault address */
+	node = interval_tree_iter_first(&p->svms.objects, addr + 1, ULONG_MAX);
+	if (node) {
+		end_limit = min(end_limit, node->start);
+		/* Last range that ends before the fault address */
+		node = container_of(rb_prev(&node->rb),
+				    struct interval_tree_node, rb);
+	} else {
+		/* Last range must end before addr because
+		 * there was no range after addr
+		 */
+		node = container_of(rb_last(&p->svms.objects.rb_root),
+				    struct interval_tree_node, rb);
+	}
+	if (node) {
+		if (node->last >= addr) {
+			WARN(1, "Overlap with prev node and page fault addr\n");
+			return -EFAULT;
+		}
+		start_limit = max(start_limit, node->last + 1);
+	}
+
+	*start = start_limit;
+	*last = end_limit - 1;
+
+	pr_debug("vma [0x%lx 0x%lx] range [0x%lx 0x%lx] is_heap_stack %d\n",
+		 vma->vm_start >> PAGE_SHIFT, vma->vm_end >> PAGE_SHIFT,
+		 *start, *last, *is_heap_stack);
+
+	return 0;
+}
+
+static int
+svm_range_check_vm_userptr(struct kfd_process *p, uint64_t start, uint64_t last,
+			   uint64_t *bo_s, uint64_t *bo_l)
+{
+	struct amdgpu_bo_va_mapping *mapping;
+	struct interval_tree_node *node;
+	struct amdgpu_bo *bo = NULL;
+	unsigned long userptr;
+	uint32_t i;
+	int r;
+
+	for (i = 0; i < p->n_pdds; i++) {
+		struct amdgpu_vm *vm;
+
+		if (!p->pdds[i]->drm_priv)
+			continue;
+
+		vm = drm_priv_to_vm(p->pdds[i]->drm_priv);
+		r = amdgpu_bo_reserve(vm->root.bo, false);
+		if (r)
+			return r;
+
+		/* Check userptr by searching entire vm->va interval tree */
+		node = interval_tree_iter_first(&vm->va, 0, ~0ULL);
+		while (node) {
+			mapping = container_of((struct rb_node *)node,
+					       struct amdgpu_bo_va_mapping, rb);
+			bo = mapping->bo_va->base.bo;
+
+			if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
+							 start << PAGE_SHIFT,
+							 last << PAGE_SHIFT,
+							 &userptr)) {
+				node = interval_tree_iter_next(node, 0, ~0ULL);
+				continue;
+			}
+
+			pr_debug("[0x%llx 0x%llx] already userptr mapped\n",
+				 start, last);
+			if (bo_s && bo_l) {
+				*bo_s = userptr >> PAGE_SHIFT;
+				*bo_l = *bo_s + bo->tbo.ttm->num_pages - 1;
+			}
+			amdgpu_bo_unreserve(vm->root.bo);
+			return -EADDRINUSE;
+		}
+		amdgpu_bo_unreserve(vm->root.bo);
+	}
+	return 0;
+}
+
+static struct
+svm_range *svm_range_create_unregistered_range(struct kfd_node *node,
+						struct kfd_process *p,
+						struct mm_struct *mm,
+						int64_t addr)
+{
+	struct svm_range *prange = NULL;
+	unsigned long start, last;
+	uint32_t gpuid, gpuidx;
+	bool is_heap_stack;
+	uint64_t bo_s = 0;
+	uint64_t bo_l = 0;
+	int r;
+
+	if (svm_range_get_range_boundaries(p, addr, &start, &last,
+					   &is_heap_stack))
+		return NULL;
+
+	r = svm_range_check_vm(p, start, last, &bo_s, &bo_l);
+	if (r != -EADDRINUSE)
+		r = svm_range_check_vm_userptr(p, start, last, &bo_s, &bo_l);
+
+	if (r == -EADDRINUSE) {
+		if (addr >= bo_s && addr <= bo_l)
+			return NULL;
+
+		/* Create one page svm range if 2MB range overlapping */
+		start = addr;
+		last = addr;
+	}
+
+	prange = svm_range_new(&p->svms, start, last, true);
+	if (!prange) {
+		pr_debug("Failed to create prange in address [0x%llx]\n", addr);
+		return NULL;
+	}
+	if (kfd_process_gpuid_from_node(p, node, &gpuid, &gpuidx)) {
+		pr_debug("failed to get gpuid from kgd\n");
+		svm_range_free(prange, true);
+		return NULL;
+	}
+
+	if (is_heap_stack)
+		prange->preferred_loc = KFD_IOCTL_SVM_LOCATION_SYSMEM;
+
+	svm_range_add_to_svms(prange);
+	svm_range_add_notifier_locked(mm, prange);
+
+	return prange;
+}
+
+/* svm_range_skip_recover - decide if prange can be recovered
+ * @prange: svm range structure
+ *
+ * GPU vm retry fault handle skip recover the range for cases:
+ * 1. prange is on deferred list to be removed after unmap, it is stale fault,
+ *    deferred list work will drain the stale fault before free the prange.
+ * 2. prange is on deferred list to add interval notifier after split, or
+ * 3. prange is child range, it is split from parent prange, recover later
+ *    after interval notifier is added.
+ *
+ * Return: true to skip recover, false to recover
+ */
+static bool svm_range_skip_recover(struct svm_range *prange)
+{
+	struct svm_range_list *svms = prange->svms;
+
+	spin_lock(&svms->deferred_list_lock);
+	if (list_empty(&prange->deferred_list) &&
+	    list_empty(&prange->child_list)) {
+		spin_unlock(&svms->deferred_list_lock);
+		return false;
+	}
+	spin_unlock(&svms->deferred_list_lock);
+
+	if (prange->work_item.op == SVM_OP_UNMAP_RANGE) {
+		pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] unmapped\n",
+			 svms, prange, prange->start, prange->last);
+		return true;
+	}
+	if (prange->work_item.op == SVM_OP_ADD_RANGE_AND_MAP ||
+	    prange->work_item.op == SVM_OP_ADD_RANGE) {
+		pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] not added yet\n",
+			 svms, prange, prange->start, prange->last);
+		return true;
+	}
+	return false;
+}
+
+static void
+svm_range_count_fault(struct kfd_node *node, struct kfd_process *p,
+		      int32_t gpuidx)
+{
+	struct kfd_process_device *pdd;
+
+	/* fault is on different page of same range
+	 * or fault is skipped to recover later
+	 * or fault is on invalid virtual address
+	 */
+	if (gpuidx == MAX_GPU_INSTANCE) {
+		uint32_t gpuid;
+		int r;
+
+		r = kfd_process_gpuid_from_node(p, node, &gpuid, &gpuidx);
+		if (r < 0)
+			return;
+	}
+
+	/* fault is recovered
+	 * or fault cannot recover because GPU no access on the range
+	 */
+	pdd = kfd_process_device_from_gpuidx(p, gpuidx);
+	if (pdd)
+		WRITE_ONCE(pdd->faults, pdd->faults + 1);
+}
+
+static bool
+svm_fault_allowed(struct vm_area_struct *vma, bool write_fault)
+{
+	unsigned long requested = VM_READ;
+
+	if (write_fault)
+		requested |= VM_WRITE;
+
+	pr_debug("requested 0x%lx, vma permission flags 0x%lx\n", requested,
+		vma->vm_flags);
+	return (vma->vm_flags & requested) == requested;
+}
+
+int
+svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
+			uint32_t vmid, uint32_t node_id,
+			uint64_t addr, bool write_fault)
+{
+	struct mm_struct *mm = NULL;
+	struct svm_range_list *svms;
+	struct svm_range *prange;
+	struct kfd_process *p;
+	ktime_t timestamp = ktime_get_boottime();
+	struct kfd_node *node;
+	int32_t best_loc;
+	int32_t gpuidx = MAX_GPU_INSTANCE;
+	bool write_locked = false;
+	struct vm_area_struct *vma;
+	bool migration = false;
+	int r = 0;
+
+	if (!KFD_IS_SVM_API_SUPPORTED(adev)) {
+		pr_debug("device does not support SVM\n");
+		return -EFAULT;
+	}
+
+	p = kfd_lookup_process_by_pasid(pasid);
+	if (!p) {
+		pr_debug("kfd process not founded pasid 0x%x\n", pasid);
+		return 0;
+	}
+	svms = &p->svms;
+
+	pr_debug("restoring svms 0x%p fault address 0x%llx\n", svms, addr);
+
+	if (atomic_read(&svms->drain_pagefaults)) {
+		pr_debug("draining retry fault, drop fault 0x%llx\n", addr);
+		r = 0;
+		goto out;
+	}
+
+	if (!p->xnack_enabled) {
+		pr_debug("XNACK not enabled for pasid 0x%x\n", pasid);
+		r = -EFAULT;
+		goto out;
+	}
+
+	/* p->lead_thread is available as kfd_process_wq_release flush the work
+	 * before releasing task ref.
+	 */
+	mm = get_task_mm(p->lead_thread);
+	if (!mm) {
+		pr_debug("svms 0x%p failed to get mm\n", svms);
+		r = 0;
+		goto out;
+	}
+
+	node = kfd_node_by_irq_ids(adev, node_id, vmid);
+	if (!node) {
+		pr_debug("kfd node does not exist node_id: %d, vmid: %d\n", node_id,
+			 vmid);
+		r = -EFAULT;
+		goto out;
+	}
+	mmap_read_lock(mm);
+retry_write_locked:
+	mutex_lock(&svms->lock);
+	prange = svm_range_from_addr(svms, addr, NULL);
+	if (!prange) {
+		pr_debug("failed to find prange svms 0x%p address [0x%llx]\n",
+			 svms, addr);
+		if (!write_locked) {
+			/* Need the write lock to create new range with MMU notifier.
+			 * Also flush pending deferred work to make sure the interval
+			 * tree is up to date before we add a new range
+			 */
+			mutex_unlock(&svms->lock);
+			mmap_read_unlock(mm);
+			mmap_write_lock(mm);
+			write_locked = true;
+			goto retry_write_locked;
+		}
+		prange = svm_range_create_unregistered_range(node, p, mm, addr);
+		if (!prange) {
+			pr_debug("failed to create unregistered range svms 0x%p address [0x%llx]\n",
+				 svms, addr);
+			mmap_write_downgrade(mm);
+			r = -EFAULT;
+			goto out_unlock_svms;
+		}
+	}
+	if (write_locked)
+		mmap_write_downgrade(mm);
+
+	mutex_lock(&prange->migrate_mutex);
+
+	if (svm_range_skip_recover(prange)) {
+		amdgpu_gmc_filter_faults_remove(node->adev, addr, pasid);
+		r = 0;
+		goto out_unlock_range;
+	}
+
+	/* skip duplicate vm fault on different pages of same range */
+	if (ktime_before(timestamp, ktime_add_ns(prange->validate_timestamp,
+				AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING))) {
+		pr_debug("svms 0x%p [0x%lx %lx] already restored\n",
+			 svms, prange->start, prange->last);
+		r = 0;
+		goto out_unlock_range;
+	}
+
+	/* __do_munmap removed VMA, return success as we are handling stale
+	 * retry fault.
+	 */
+	vma = vma_lookup(mm, addr << PAGE_SHIFT);
+	if (!vma) {
+		pr_debug("address 0x%llx VMA is removed\n", addr);
+		r = 0;
+		goto out_unlock_range;
+	}
+
+	if (!svm_fault_allowed(vma, write_fault)) {
+		pr_debug("fault addr 0x%llx no %s permission\n", addr,
+			write_fault ? "write" : "read");
+		r = -EPERM;
+		goto out_unlock_range;
+	}
+
+	best_loc = svm_range_best_restore_location(prange, node, &gpuidx);
+	if (best_loc == -1) {
+		pr_debug("svms %p failed get best restore loc [0x%lx 0x%lx]\n",
+			 svms, prange->start, prange->last);
+		r = -EACCES;
+		goto out_unlock_range;
+	}
+
+	pr_debug("svms %p [0x%lx 0x%lx] best restore 0x%x, actual loc 0x%x\n",
+		 svms, prange->start, prange->last, best_loc,
+		 prange->actual_loc);
+
+	kfd_smi_event_page_fault_start(node, p->lead_thread->pid, addr,
+				       write_fault, timestamp);
+
+	if (prange->actual_loc != best_loc) {
+		migration = true;
+		if (best_loc) {
+			r = svm_migrate_to_vram(prange, best_loc, mm,
+					KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU);
+			if (r) {
+				pr_debug("svm_migrate_to_vram failed (%d) at %llx, falling back to system memory\n",
+					 r, addr);
+				/* Fallback to system memory if migration to
+				 * VRAM failed
+				 */
+				if (prange->actual_loc)
+					r = svm_migrate_vram_to_ram(prange, mm,
+					   KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU,
+					   NULL);
+				else
+					r = 0;
+			}
+		} else {
+			r = svm_migrate_vram_to_ram(prange, mm,
+					KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU,
+					NULL);
+		}
+		if (r) {
+			pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n",
+				 r, svms, prange->start, prange->last);
+			goto out_unlock_range;
+		}
+	}
+
+	r = svm_range_validate_and_map(mm, prange, gpuidx, false, false, false);
+	if (r)
+		pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
+			 r, svms, prange->start, prange->last);
+
+	kfd_smi_event_page_fault_end(node, p->lead_thread->pid, addr,
+				     migration);
+
+out_unlock_range:
+	mutex_unlock(&prange->migrate_mutex);
+out_unlock_svms:
+	mutex_unlock(&svms->lock);
+	mmap_read_unlock(mm);
+
+	svm_range_count_fault(node, p, gpuidx);
+
+	mmput(mm);
+out:
+	kfd_unref_process(p);
+
+	if (r == -EAGAIN) {
+		pr_debug("recover vm fault later\n");
+		amdgpu_gmc_filter_faults_remove(node->adev, addr, pasid);
+		r = 0;
+	}
+	return r;
+}
+
+int
+svm_range_switch_xnack_reserve_mem(struct kfd_process *p, bool xnack_enabled)
+{
+	struct svm_range *prange, *pchild;
+	uint64_t reserved_size = 0;
+	uint64_t size;
+	int r = 0;
+
+	pr_debug("switching xnack from %d to %d\n", p->xnack_enabled, xnack_enabled);
+
+	mutex_lock(&p->svms.lock);
+
+	list_for_each_entry(prange, &p->svms.list, list) {
+		svm_range_lock(prange);
+		list_for_each_entry(pchild, &prange->child_list, child_list) {
+			size = (pchild->last - pchild->start + 1) << PAGE_SHIFT;
+			if (xnack_enabled) {
+				amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
+					KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
+			} else {
+				r = amdgpu_amdkfd_reserve_mem_limit(NULL, size,
+					KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
+				if (r)
+					goto out_unlock;
+				reserved_size += size;
+			}
+		}
+
+		size = (prange->last - prange->start + 1) << PAGE_SHIFT;
+		if (xnack_enabled) {
+			amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
+					KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
+		} else {
+			r = amdgpu_amdkfd_reserve_mem_limit(NULL, size,
+					KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
+			if (r)
+				goto out_unlock;
+			reserved_size += size;
+		}
+out_unlock:
+		svm_range_unlock(prange);
+		if (r)
+			break;
+	}
+
+	if (r)
+		amdgpu_amdkfd_unreserve_mem_limit(NULL, reserved_size,
+					KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
+	else
+		/* Change xnack mode must be inside svms lock, to avoid race with
+		 * svm_range_deferred_list_work unreserve memory in parallel.
+		 */
+		p->xnack_enabled = xnack_enabled;
+
+	mutex_unlock(&p->svms.lock);
+	return r;
+}
+
+void svm_range_list_fini(struct kfd_process *p)
+{
+	struct svm_range *prange;
+	struct svm_range *next;
+
+	pr_debug("pasid 0x%x svms 0x%p\n", p->pasid, &p->svms);
+
+	cancel_delayed_work_sync(&p->svms.restore_work);
+
+	/* Ensure list work is finished before process is destroyed */
+	flush_work(&p->svms.deferred_list_work);
+
+	/*
+	 * Ensure no retry fault comes in afterwards, as page fault handler will
+	 * not find kfd process and take mm lock to recover fault.
+	 */
+	atomic_inc(&p->svms.drain_pagefaults);
+	svm_range_drain_retry_fault(&p->svms);
+
+	list_for_each_entry_safe(prange, next, &p->svms.list, list) {
+		svm_range_unlink(prange);
+		svm_range_remove_notifier(prange);
+		svm_range_free(prange, true);
+	}
+
+	mutex_destroy(&p->svms.lock);
+
+	pr_debug("pasid 0x%x svms 0x%p done\n", p->pasid, &p->svms);
+}
+
+int svm_range_list_init(struct kfd_process *p)
+{
+	struct svm_range_list *svms = &p->svms;
+	int i;
+
+	svms->objects = RB_ROOT_CACHED;
+	mutex_init(&svms->lock);
+	INIT_LIST_HEAD(&svms->list);
+	atomic_set(&svms->evicted_ranges, 0);
+	atomic_set(&svms->drain_pagefaults, 0);
+	INIT_DELAYED_WORK(&svms->restore_work, svm_range_restore_work);
+	INIT_WORK(&svms->deferred_list_work, svm_range_deferred_list_work);
+	INIT_LIST_HEAD(&svms->deferred_range_list);
+	INIT_LIST_HEAD(&svms->criu_svm_metadata_list);
+	spin_lock_init(&svms->deferred_list_lock);
+
+	for (i = 0; i < p->n_pdds; i++)
+		if (KFD_IS_SVM_API_SUPPORTED(p->pdds[i]->dev->adev))
+			bitmap_set(svms->bitmap_supported, i, 1);
+
+	return 0;
+}
+
+/**
+ * svm_range_check_vm - check if virtual address range mapped already
+ * @p: current kfd_process
+ * @start: range start address, in pages
+ * @last: range last address, in pages
+ * @bo_s: mapping start address in pages if address range already mapped
+ * @bo_l: mapping last address in pages if address range already mapped
+ *
+ * The purpose is to avoid virtual address ranges already allocated by
+ * kfd_ioctl_alloc_memory_of_gpu ioctl.
+ * It looks for each pdd in the kfd_process.
+ *
+ * Context: Process context
+ *
+ * Return 0 - OK, if the range is not mapped.
+ * Otherwise error code:
+ * -EADDRINUSE - if address is mapped already by kfd_ioctl_alloc_memory_of_gpu
+ * -ERESTARTSYS - A wait for the buffer to become unreserved was interrupted by
+ * a signal. Release all buffer reservations and return to user-space.
+ */
+static int
+svm_range_check_vm(struct kfd_process *p, uint64_t start, uint64_t last,
+		   uint64_t *bo_s, uint64_t *bo_l)
+{
+	struct amdgpu_bo_va_mapping *mapping;
+	struct interval_tree_node *node;
+	uint32_t i;
+	int r;
+
+	for (i = 0; i < p->n_pdds; i++) {
+		struct amdgpu_vm *vm;
+
+		if (!p->pdds[i]->drm_priv)
+			continue;
+
+		vm = drm_priv_to_vm(p->pdds[i]->drm_priv);
+		r = amdgpu_bo_reserve(vm->root.bo, false);
+		if (r)
+			return r;
+
+		node = interval_tree_iter_first(&vm->va, start, last);
+		if (node) {
+			pr_debug("range [0x%llx 0x%llx] already TTM mapped\n",
+				 start, last);
+			mapping = container_of((struct rb_node *)node,
+					       struct amdgpu_bo_va_mapping, rb);
+			if (bo_s && bo_l) {
+				*bo_s = mapping->start;
+				*bo_l = mapping->last;
+			}
+			amdgpu_bo_unreserve(vm->root.bo);
+			return -EADDRINUSE;
+		}
+		amdgpu_bo_unreserve(vm->root.bo);
+	}
+
+	return 0;
+}
+
+/**
+ * svm_range_is_valid - check if virtual address range is valid
+ * @p: current kfd_process
+ * @start: range start address, in pages
+ * @size: range size, in pages
+ *
+ * Valid virtual address range means it belongs to one or more VMAs
+ *
+ * Context: Process context
+ *
+ * Return:
+ *  0 - OK, otherwise error code
+ */
+static int
+svm_range_is_valid(struct kfd_process *p, uint64_t start, uint64_t size)
+{
+	const unsigned long device_vma = VM_IO | VM_PFNMAP | VM_MIXEDMAP;
+	struct vm_area_struct *vma;
+	unsigned long end;
+	unsigned long start_unchg = start;
+
+	start <<= PAGE_SHIFT;
+	end = start + (size << PAGE_SHIFT);
+	do {
+		vma = vma_lookup(p->mm, start);
+		if (!vma || (vma->vm_flags & device_vma))
+			return -EFAULT;
+		start = min(end, vma->vm_end);
+	} while (start < end);
+
+	return svm_range_check_vm(p, start_unchg, (end - 1) >> PAGE_SHIFT, NULL,
+				  NULL);
+}
+
+/**
+ * svm_range_best_prefetch_location - decide the best prefetch location
+ * @prange: svm range structure
+ *
+ * For xnack off:
+ * If range map to single GPU, the best prefetch location is prefetch_loc, which
+ * can be CPU or GPU.
+ *
+ * If range is ACCESS or ACCESS_IN_PLACE by mGPUs, only if mGPU connection on
+ * XGMI same hive, the best prefetch location is prefetch_loc GPU, othervise
+ * the best prefetch location is always CPU, because GPU can not have coherent
+ * mapping VRAM of other GPUs even with large-BAR PCIe connection.
+ *
+ * For xnack on:
+ * If range is not ACCESS_IN_PLACE by mGPUs, the best prefetch location is
+ * prefetch_loc, other GPU access will generate vm fault and trigger migration.
+ *
+ * If range is ACCESS_IN_PLACE by mGPUs, only if mGPU connection on XGMI same
+ * hive, the best prefetch location is prefetch_loc GPU, otherwise the best
+ * prefetch location is always CPU.
+ *
+ * Context: Process context
+ *
+ * Return:
+ * 0 for CPU or GPU id
+ */
+static uint32_t
+svm_range_best_prefetch_location(struct svm_range *prange)
+{
+	DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
+	uint32_t best_loc = prange->prefetch_loc;
+	struct kfd_process_device *pdd;
+	struct kfd_node *bo_node;
+	struct kfd_process *p;
+	uint32_t gpuidx;
+
+	p = container_of(prange->svms, struct kfd_process, svms);
+
+	if (!best_loc || best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED)
+		goto out;
+
+	bo_node = svm_range_get_node_by_id(prange, best_loc);
+	if (!bo_node) {
+		WARN_ONCE(1, "failed to get valid kfd node at id%x\n", best_loc);
+		best_loc = 0;
+		goto out;
+	}
+
+	if (bo_node->adev->gmc.is_app_apu) {
+		best_loc = 0;
+		goto out;
+	}
+
+	if (p->xnack_enabled)
+		bitmap_copy(bitmap, prange->bitmap_aip, MAX_GPU_INSTANCE);
+	else
+		bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip,
+			  MAX_GPU_INSTANCE);
+
+	for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
+		pdd = kfd_process_device_from_gpuidx(p, gpuidx);
+		if (!pdd) {
+			pr_debug("failed to get device by idx 0x%x\n", gpuidx);
+			continue;
+		}
+
+		if (pdd->dev->adev == bo_node->adev)
+			continue;
+
+		if (!svm_nodes_in_same_hive(pdd->dev, bo_node)) {
+			best_loc = 0;
+			break;
+		}
+	}
+
+out:
+	pr_debug("xnack %d svms 0x%p [0x%lx 0x%lx] best loc 0x%x\n",
+		 p->xnack_enabled, &p->svms, prange->start, prange->last,
+		 best_loc);
+
+	return best_loc;
+}
+
+/* svm_range_trigger_migration - start page migration if prefetch loc changed
+ * @mm: current process mm_struct
+ * @prange: svm range structure
+ * @migrated: output, true if migration is triggered
+ *
+ * If range perfetch_loc is GPU, actual loc is cpu 0, then migrate the range
+ * from ram to vram.
+ * If range prefetch_loc is cpu 0, actual loc is GPU, then migrate the range
+ * from vram to ram.
+ *
+ * If GPU vm fault retry is not enabled, migration interact with MMU notifier
+ * and restore work:
+ * 1. migrate_vma_setup invalidate pages, MMU notifier callback svm_range_evict
+ *    stops all queues, schedule restore work
+ * 2. svm_range_restore_work wait for migration is done by
+ *    a. svm_range_validate_vram takes prange->migrate_mutex
+ *    b. svm_range_validate_ram HMM get pages wait for CPU fault handle returns
+ * 3. restore work update mappings of GPU, resume all queues.
+ *
+ * Context: Process context
+ *
+ * Return:
+ * 0 - OK, otherwise - error code of migration
+ */
+static int
+svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange,
+			    bool *migrated)
+{
+	uint32_t best_loc;
+	int r = 0;
+
+	*migrated = false;
+	best_loc = svm_range_best_prefetch_location(prange);
+
+	if (best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED ||
+	    best_loc == prange->actual_loc)
+		return 0;
+
+	if (!best_loc) {
+		r = svm_migrate_vram_to_ram(prange, mm,
+					KFD_MIGRATE_TRIGGER_PREFETCH, NULL);
+		*migrated = !r;
+		return r;
+	}
+
+	r = svm_migrate_to_vram(prange, best_loc, mm, KFD_MIGRATE_TRIGGER_PREFETCH);
+	*migrated = !r;
+
+	return r;
+}
+
+int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence)
+{
+	if (!fence)
+		return -EINVAL;
+
+	if (dma_fence_is_signaled(&fence->base))
+		return 0;
+
+	if (fence->svm_bo) {
+		WRITE_ONCE(fence->svm_bo->evicting, 1);
+		schedule_work(&fence->svm_bo->eviction_work);
+	}
+
+	return 0;
+}
+
+static void svm_range_evict_svm_bo_worker(struct work_struct *work)
+{
+	struct svm_range_bo *svm_bo;
+	struct mm_struct *mm;
+	int r = 0;
+
+	svm_bo = container_of(work, struct svm_range_bo, eviction_work);
+	if (!svm_bo_ref_unless_zero(svm_bo))
+		return; /* svm_bo was freed while eviction was pending */
+
+	if (mmget_not_zero(svm_bo->eviction_fence->mm)) {
+		mm = svm_bo->eviction_fence->mm;
+	} else {
+		svm_range_bo_unref(svm_bo);
+		return;
+	}
+
+	mmap_read_lock(mm);
+	spin_lock(&svm_bo->list_lock);
+	while (!list_empty(&svm_bo->range_list) && !r) {
+		struct svm_range *prange =
+				list_first_entry(&svm_bo->range_list,
+						struct svm_range, svm_bo_list);
+		int retries = 3;
+
+		list_del_init(&prange->svm_bo_list);
+		spin_unlock(&svm_bo->list_lock);
+
+		pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms,
+			 prange->start, prange->last);
+
+		mutex_lock(&prange->migrate_mutex);
+		do {
+			r = svm_migrate_vram_to_ram(prange, mm,
+					KFD_MIGRATE_TRIGGER_TTM_EVICTION, NULL);
+		} while (!r && prange->actual_loc && --retries);
+
+		if (!r && prange->actual_loc)
+			pr_info_once("Migration failed during eviction");
+
+		if (!prange->actual_loc) {
+			mutex_lock(&prange->lock);
+			prange->svm_bo = NULL;
+			mutex_unlock(&prange->lock);
+		}
+		mutex_unlock(&prange->migrate_mutex);
+
+		spin_lock(&svm_bo->list_lock);
+	}
+	spin_unlock(&svm_bo->list_lock);
+	mmap_read_unlock(mm);
+	mmput(mm);
+
+	dma_fence_signal(&svm_bo->eviction_fence->base);
+
+	/* This is the last reference to svm_bo, after svm_range_vram_node_free
+	 * has been called in svm_migrate_vram_to_ram
+	 */
+	WARN_ONCE(!r && kref_read(&svm_bo->kref) != 1, "This was not the last reference\n");
+	svm_range_bo_unref(svm_bo);
+}
+
+static int
+svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
+		   uint64_t start, uint64_t size, uint32_t nattr,
+		   struct kfd_ioctl_svm_attribute *attrs)
+{
+	struct amdkfd_process_info *process_info = p->kgd_process_info;
+	struct list_head update_list;
+	struct list_head insert_list;
+	struct list_head remove_list;
+	struct svm_range_list *svms;
+	struct svm_range *prange;
+	struct svm_range *next;
+	bool update_mapping = false;
+	bool flush_tlb;
+	int r, ret = 0;
+
+	pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] pages 0x%llx\n",
+		 p->pasid, &p->svms, start, start + size - 1, size);
+
+	r = svm_range_check_attr(p, nattr, attrs);
+	if (r)
+		return r;
+
+	svms = &p->svms;
+
+	mutex_lock(&process_info->lock);
+
+	svm_range_list_lock_and_flush_work(svms, mm);
+
+	r = svm_range_is_valid(p, start, size);
+	if (r) {
+		pr_debug("invalid range r=%d\n", r);
+		mmap_write_unlock(mm);
+		goto out;
+	}
+
+	mutex_lock(&svms->lock);
+
+	/* Add new range and split existing ranges as needed */
+	r = svm_range_add(p, start, size, nattr, attrs, &update_list,
+			  &insert_list, &remove_list);
+	if (r) {
+		mutex_unlock(&svms->lock);
+		mmap_write_unlock(mm);
+		goto out;
+	}
+	/* Apply changes as a transaction */
+	list_for_each_entry_safe(prange, next, &insert_list, list) {
+		svm_range_add_to_svms(prange);
+		svm_range_add_notifier_locked(mm, prange);
+	}
+	list_for_each_entry(prange, &update_list, update_list) {
+		svm_range_apply_attrs(p, prange, nattr, attrs, &update_mapping);
+		/* TODO: unmap ranges from GPU that lost access */
+	}
+	list_for_each_entry_safe(prange, next, &remove_list, update_list) {
+		pr_debug("unlink old 0x%p prange 0x%p [0x%lx 0x%lx]\n",
+			 prange->svms, prange, prange->start,
+			 prange->last);
+		svm_range_unlink(prange);
+		svm_range_remove_notifier(prange);
+		svm_range_free(prange, false);
+	}
+
+	mmap_write_downgrade(mm);
+	/* Trigger migrations and revalidate and map to GPUs as needed. If
+	 * this fails we may be left with partially completed actions. There
+	 * is no clean way of rolling back to the previous state in such a
+	 * case because the rollback wouldn't be guaranteed to work either.
+	 */
+	list_for_each_entry(prange, &update_list, update_list) {
+		bool migrated;
+
+		mutex_lock(&prange->migrate_mutex);
+
+		r = svm_range_trigger_migration(mm, prange, &migrated);
+		if (r)
+			goto out_unlock_range;
+
+		if (migrated && (!p->xnack_enabled ||
+		    (prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)) &&
+		    prange->mapped_to_gpu) {
+			pr_debug("restore_work will update mappings of GPUs\n");
+			mutex_unlock(&prange->migrate_mutex);
+			continue;
+		}
+
+		if (!migrated && !update_mapping) {
+			mutex_unlock(&prange->migrate_mutex);
+			continue;
+		}
+
+		flush_tlb = !migrated && update_mapping && prange->mapped_to_gpu;
+
+		r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
+					       true, true, flush_tlb);
+		if (r)
+			pr_debug("failed %d to map svm range\n", r);
+
+out_unlock_range:
+		mutex_unlock(&prange->migrate_mutex);
+		if (r)
+			ret = r;
+	}
+
+	dynamic_svm_range_dump(svms);
+
+	mutex_unlock(&svms->lock);
+	mmap_read_unlock(mm);
+out:
+	mutex_unlock(&process_info->lock);
+
+	pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] done, r=%d\n", p->pasid,
+		 &p->svms, start, start + size - 1, r);
+
+	return ret ? ret : r;
+}
+
+static int
+svm_range_get_attr(struct kfd_process *p, struct mm_struct *mm,
+		   uint64_t start, uint64_t size, uint32_t nattr,
+		   struct kfd_ioctl_svm_attribute *attrs)
+{
+	DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE);
+	DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE);
+	bool get_preferred_loc = false;
+	bool get_prefetch_loc = false;
+	bool get_granularity = false;
+	bool get_accessible = false;
+	bool get_flags = false;
+	uint64_t last = start + size - 1UL;
+	uint8_t granularity = 0xff;
+	struct interval_tree_node *node;
+	struct svm_range_list *svms;
+	struct svm_range *prange;
+	uint32_t prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
+	uint32_t location = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
+	uint32_t flags_and = 0xffffffff;
+	uint32_t flags_or = 0;
+	int gpuidx;
+	uint32_t i;
+	int r = 0;
+
+	pr_debug("svms 0x%p [0x%llx 0x%llx] nattr 0x%x\n", &p->svms, start,
+		 start + size - 1, nattr);
+
+	/* Flush pending deferred work to avoid racing with deferred actions from
+	 * previous memory map changes (e.g. munmap). Concurrent memory map changes
+	 * can still race with get_attr because we don't hold the mmap lock. But that
+	 * would be a race condition in the application anyway, and undefined
+	 * behaviour is acceptable in that case.
+	 */
+	flush_work(&p->svms.deferred_list_work);
+
+	mmap_read_lock(mm);
+	r = svm_range_is_valid(p, start, size);
+	mmap_read_unlock(mm);
+	if (r) {
+		pr_debug("invalid range r=%d\n", r);
+		return r;
+	}
+
+	for (i = 0; i < nattr; i++) {
+		switch (attrs[i].type) {
+		case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC:
+			get_preferred_loc = true;
+			break;
+		case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC:
+			get_prefetch_loc = true;
+			break;
+		case KFD_IOCTL_SVM_ATTR_ACCESS:
+			get_accessible = true;
+			break;
+		case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
+		case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
+			get_flags = true;
+			break;
+		case KFD_IOCTL_SVM_ATTR_GRANULARITY:
+			get_granularity = true;
+			break;
+		case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
+		case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
+			fallthrough;
+		default:
+			pr_debug("get invalid attr type 0x%x\n", attrs[i].type);
+			return -EINVAL;
+		}
+	}
+
+	svms = &p->svms;
+
+	mutex_lock(&svms->lock);
+
+	node = interval_tree_iter_first(&svms->objects, start, last);
+	if (!node) {
+		pr_debug("range attrs not found return default values\n");
+		svm_range_set_default_attributes(&location, &prefetch_loc,
+						 &granularity, &flags_and);
+		flags_or = flags_and;
+		if (p->xnack_enabled)
+			bitmap_copy(bitmap_access, svms->bitmap_supported,
+				    MAX_GPU_INSTANCE);
+		else
+			bitmap_zero(bitmap_access, MAX_GPU_INSTANCE);
+		bitmap_zero(bitmap_aip, MAX_GPU_INSTANCE);
+		goto fill_values;
+	}
+	bitmap_copy(bitmap_access, svms->bitmap_supported, MAX_GPU_INSTANCE);
+	bitmap_copy(bitmap_aip, svms->bitmap_supported, MAX_GPU_INSTANCE);
+
+	while (node) {
+		struct interval_tree_node *next;
+
+		prange = container_of(node, struct svm_range, it_node);
+		next = interval_tree_iter_next(node, start, last);
+
+		if (get_preferred_loc) {
+			if (prange->preferred_loc ==
+					KFD_IOCTL_SVM_LOCATION_UNDEFINED ||
+			    (location != KFD_IOCTL_SVM_LOCATION_UNDEFINED &&
+			     location != prange->preferred_loc)) {
+				location = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
+				get_preferred_loc = false;
+			} else {
+				location = prange->preferred_loc;
+			}
+		}
+		if (get_prefetch_loc) {
+			if (prange->prefetch_loc ==
+					KFD_IOCTL_SVM_LOCATION_UNDEFINED ||
+			    (prefetch_loc != KFD_IOCTL_SVM_LOCATION_UNDEFINED &&
+			     prefetch_loc != prange->prefetch_loc)) {
+				prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
+				get_prefetch_loc = false;
+			} else {
+				prefetch_loc = prange->prefetch_loc;
+			}
+		}
+		if (get_accessible) {
+			bitmap_and(bitmap_access, bitmap_access,
+				   prange->bitmap_access, MAX_GPU_INSTANCE);
+			bitmap_and(bitmap_aip, bitmap_aip,
+				   prange->bitmap_aip, MAX_GPU_INSTANCE);
+		}
+		if (get_flags) {
+			flags_and &= prange->flags;
+			flags_or |= prange->flags;
+		}
+
+		if (get_granularity && prange->granularity < granularity)
+			granularity = prange->granularity;
+
+		node = next;
+	}
+fill_values:
+	mutex_unlock(&svms->lock);
+
+	for (i = 0; i < nattr; i++) {
+		switch (attrs[i].type) {
+		case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC:
+			attrs[i].value = location;
+			break;
+		case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC:
+			attrs[i].value = prefetch_loc;
+			break;
+		case KFD_IOCTL_SVM_ATTR_ACCESS:
+			gpuidx = kfd_process_gpuidx_from_gpuid(p,
+							       attrs[i].value);
+			if (gpuidx < 0) {
+				pr_debug("invalid gpuid %x\n", attrs[i].value);
+				return -EINVAL;
+			}
+			if (test_bit(gpuidx, bitmap_access))
+				attrs[i].type = KFD_IOCTL_SVM_ATTR_ACCESS;
+			else if (test_bit(gpuidx, bitmap_aip))
+				attrs[i].type =
+					KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE;
+			else
+				attrs[i].type = KFD_IOCTL_SVM_ATTR_NO_ACCESS;
+			break;
+		case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
+			attrs[i].value = flags_and;
+			break;
+		case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
+			attrs[i].value = ~flags_or;
+			break;
+		case KFD_IOCTL_SVM_ATTR_GRANULARITY:
+			attrs[i].value = (uint32_t)granularity;
+			break;
+		}
+	}
+
+	return 0;
+}
+
+int kfd_criu_resume_svm(struct kfd_process *p)
+{
+	struct kfd_ioctl_svm_attribute *set_attr_new, *set_attr = NULL;
+	int nattr_common = 4, nattr_accessibility = 1;
+	struct criu_svm_metadata *criu_svm_md = NULL;
+	struct svm_range_list *svms = &p->svms;
+	struct criu_svm_metadata *next = NULL;
+	uint32_t set_flags = 0xffffffff;
+	int i, j, num_attrs, ret = 0;
+	uint64_t set_attr_size;
+	struct mm_struct *mm;
+
+	if (list_empty(&svms->criu_svm_metadata_list)) {
+		pr_debug("No SVM data from CRIU restore stage 2\n");
+		return ret;
+	}
+
+	mm = get_task_mm(p->lead_thread);
+	if (!mm) {
+		pr_err("failed to get mm for the target process\n");
+		return -ESRCH;
+	}
+
+	num_attrs = nattr_common + (nattr_accessibility * p->n_pdds);
+
+	i = j = 0;
+	list_for_each_entry(criu_svm_md, &svms->criu_svm_metadata_list, list) {
+		pr_debug("criu_svm_md[%d]\n\tstart: 0x%llx size: 0x%llx (npages)\n",
+			 i, criu_svm_md->data.start_addr, criu_svm_md->data.size);
+
+		for (j = 0; j < num_attrs; j++) {
+			pr_debug("\ncriu_svm_md[%d]->attrs[%d].type : 0x%x\ncriu_svm_md[%d]->attrs[%d].value : 0x%x\n",
+				 i, j, criu_svm_md->data.attrs[j].type,
+				 i, j, criu_svm_md->data.attrs[j].value);
+			switch (criu_svm_md->data.attrs[j].type) {
+			/* During Checkpoint operation, the query for
+			 * KFD_IOCTL_SVM_ATTR_PREFETCH_LOC attribute might
+			 * return KFD_IOCTL_SVM_LOCATION_UNDEFINED if they were
+			 * not used by the range which was checkpointed. Care
+			 * must be taken to not restore with an invalid value
+			 * otherwise the gpuidx value will be invalid and
+			 * set_attr would eventually fail so just replace those
+			 * with another dummy attribute such as
+			 * KFD_IOCTL_SVM_ATTR_SET_FLAGS.
+			 */
+			case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC:
+				if (criu_svm_md->data.attrs[j].value ==
+				    KFD_IOCTL_SVM_LOCATION_UNDEFINED) {
+					criu_svm_md->data.attrs[j].type =
+						KFD_IOCTL_SVM_ATTR_SET_FLAGS;
+					criu_svm_md->data.attrs[j].value = 0;
+				}
+				break;
+			case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
+				set_flags = criu_svm_md->data.attrs[j].value;
+				break;
+			default:
+				break;
+			}
+		}
+
+		/* CLR_FLAGS is not available via get_attr during checkpoint but
+		 * it needs to be inserted before restoring the ranges so
+		 * allocate extra space for it before calling set_attr
+		 */
+		set_attr_size = sizeof(struct kfd_ioctl_svm_attribute) *
+						(num_attrs + 1);
+		set_attr_new = krealloc(set_attr, set_attr_size,
+					    GFP_KERNEL);
+		if (!set_attr_new) {
+			ret = -ENOMEM;
+			goto exit;
+		}
+		set_attr = set_attr_new;
+
+		memcpy(set_attr, criu_svm_md->data.attrs, num_attrs *
+					sizeof(struct kfd_ioctl_svm_attribute));
+		set_attr[num_attrs].type = KFD_IOCTL_SVM_ATTR_CLR_FLAGS;
+		set_attr[num_attrs].value = ~set_flags;
+
+		ret = svm_range_set_attr(p, mm, criu_svm_md->data.start_addr,
+					 criu_svm_md->data.size, num_attrs + 1,
+					 set_attr);
+		if (ret) {
+			pr_err("CRIU: failed to set range attributes\n");
+			goto exit;
+		}
+
+		i++;
+	}
+exit:
+	kfree(set_attr);
+	list_for_each_entry_safe(criu_svm_md, next, &svms->criu_svm_metadata_list, list) {
+		pr_debug("freeing criu_svm_md[]\n\tstart: 0x%llx\n",
+						criu_svm_md->data.start_addr);
+		kfree(criu_svm_md);
+	}
+
+	mmput(mm);
+	return ret;
+
+}
+
+int kfd_criu_restore_svm(struct kfd_process *p,
+			 uint8_t __user *user_priv_ptr,
+			 uint64_t *priv_data_offset,
+			 uint64_t max_priv_data_size)
+{
+	uint64_t svm_priv_data_size, svm_object_md_size, svm_attrs_size;
+	int nattr_common = 4, nattr_accessibility = 1;
+	struct criu_svm_metadata *criu_svm_md = NULL;
+	struct svm_range_list *svms = &p->svms;
+	uint32_t num_devices;
+	int ret = 0;
+
+	num_devices = p->n_pdds;
+	/* Handle one SVM range object at a time, also the number of gpus are
+	 * assumed to be same on the restore node, checking must be done while
+	 * evaluating the topology earlier
+	 */
+
+	svm_attrs_size = sizeof(struct kfd_ioctl_svm_attribute) *
+		(nattr_common + nattr_accessibility * num_devices);
+	svm_object_md_size = sizeof(struct criu_svm_metadata) + svm_attrs_size;
+
+	svm_priv_data_size = sizeof(struct kfd_criu_svm_range_priv_data) +
+								svm_attrs_size;
+
+	criu_svm_md = kzalloc(svm_object_md_size, GFP_KERNEL);
+	if (!criu_svm_md) {
+		pr_err("failed to allocate memory to store svm metadata\n");
+		return -ENOMEM;
+	}
+	if (*priv_data_offset + svm_priv_data_size > max_priv_data_size) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	ret = copy_from_user(&criu_svm_md->data, user_priv_ptr + *priv_data_offset,
+			     svm_priv_data_size);
+	if (ret) {
+		ret = -EFAULT;
+		goto exit;
+	}
+	*priv_data_offset += svm_priv_data_size;
+
+	list_add_tail(&criu_svm_md->list, &svms->criu_svm_metadata_list);
+
+	return 0;
+
+
+exit:
+	kfree(criu_svm_md);
+	return ret;
+}
+
+int svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
+		       uint64_t *svm_priv_data_size)
+{
+	uint64_t total_size, accessibility_size, common_attr_size;
+	int nattr_common = 4, nattr_accessibility = 1;
+	int num_devices = p->n_pdds;
+	struct svm_range_list *svms;
+	struct svm_range *prange;
+	uint32_t count = 0;
+
+	*svm_priv_data_size = 0;
+
+	svms = &p->svms;
+	if (!svms)
+		return -EINVAL;
+
+	mutex_lock(&svms->lock);
+	list_for_each_entry(prange, &svms->list, list) {
+		pr_debug("prange: 0x%p start: 0x%lx\t npages: 0x%llx\t end: 0x%llx\n",
+			 prange, prange->start, prange->npages,
+			 prange->start + prange->npages - 1);
+		count++;
+	}
+	mutex_unlock(&svms->lock);
+
+	*num_svm_ranges = count;
+	/* Only the accessbility attributes need to be queried for all the gpus
+	 * individually, remaining ones are spanned across the entire process
+	 * regardless of the various gpu nodes. Of the remaining attributes,
+	 * KFD_IOCTL_SVM_ATTR_CLR_FLAGS need not be saved.
+	 *
+	 * KFD_IOCTL_SVM_ATTR_PREFERRED_LOC
+	 * KFD_IOCTL_SVM_ATTR_PREFETCH_LOC
+	 * KFD_IOCTL_SVM_ATTR_SET_FLAGS
+	 * KFD_IOCTL_SVM_ATTR_GRANULARITY
+	 *
+	 * ** ACCESSBILITY ATTRIBUTES **
+	 * (Considered as one, type is altered during query, value is gpuid)
+	 * KFD_IOCTL_SVM_ATTR_ACCESS
+	 * KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE
+	 * KFD_IOCTL_SVM_ATTR_NO_ACCESS
+	 */
+	if (*num_svm_ranges > 0) {
+		common_attr_size = sizeof(struct kfd_ioctl_svm_attribute) *
+			nattr_common;
+		accessibility_size = sizeof(struct kfd_ioctl_svm_attribute) *
+			nattr_accessibility * num_devices;
+
+		total_size = sizeof(struct kfd_criu_svm_range_priv_data) +
+			common_attr_size + accessibility_size;
+
+		*svm_priv_data_size = *num_svm_ranges * total_size;
+	}
+
+	pr_debug("num_svm_ranges %u total_priv_size %llu\n", *num_svm_ranges,
+		 *svm_priv_data_size);
+	return 0;
+}
+
+int kfd_criu_checkpoint_svm(struct kfd_process *p,
+			    uint8_t __user *user_priv_data,
+			    uint64_t *priv_data_offset)
+{
+	struct kfd_criu_svm_range_priv_data *svm_priv = NULL;
+	struct kfd_ioctl_svm_attribute *query_attr = NULL;
+	uint64_t svm_priv_data_size, query_attr_size = 0;
+	int index, nattr_common = 4, ret = 0;
+	struct svm_range_list *svms;
+	int num_devices = p->n_pdds;
+	struct svm_range *prange;
+	struct mm_struct *mm;
+
+	svms = &p->svms;
+	if (!svms)
+		return -EINVAL;
+
+	mm = get_task_mm(p->lead_thread);
+	if (!mm) {
+		pr_err("failed to get mm for the target process\n");
+		return -ESRCH;
+	}
+
+	query_attr_size = sizeof(struct kfd_ioctl_svm_attribute) *
+				(nattr_common + num_devices);
+
+	query_attr = kzalloc(query_attr_size, GFP_KERNEL);
+	if (!query_attr) {
+		ret = -ENOMEM;
+		goto exit;
+	}
+
+	query_attr[0].type = KFD_IOCTL_SVM_ATTR_PREFERRED_LOC;
+	query_attr[1].type = KFD_IOCTL_SVM_ATTR_PREFETCH_LOC;
+	query_attr[2].type = KFD_IOCTL_SVM_ATTR_SET_FLAGS;
+	query_attr[3].type = KFD_IOCTL_SVM_ATTR_GRANULARITY;
+
+	for (index = 0; index < num_devices; index++) {
+		struct kfd_process_device *pdd = p->pdds[index];
+
+		query_attr[index + nattr_common].type =
+			KFD_IOCTL_SVM_ATTR_ACCESS;
+		query_attr[index + nattr_common].value = pdd->user_gpu_id;
+	}
+
+	svm_priv_data_size = sizeof(*svm_priv) + query_attr_size;
+
+	svm_priv = kzalloc(svm_priv_data_size, GFP_KERNEL);
+	if (!svm_priv) {
+		ret = -ENOMEM;
+		goto exit_query;
+	}
+
+	index = 0;
+	list_for_each_entry(prange, &svms->list, list) {
+
+		svm_priv->object_type = KFD_CRIU_OBJECT_TYPE_SVM_RANGE;
+		svm_priv->start_addr = prange->start;
+		svm_priv->size = prange->npages;
+		memcpy(&svm_priv->attrs, query_attr, query_attr_size);
+		pr_debug("CRIU: prange: 0x%p start: 0x%lx\t npages: 0x%llx end: 0x%llx\t size: 0x%llx\n",
+			 prange, prange->start, prange->npages,
+			 prange->start + prange->npages - 1,
+			 prange->npages * PAGE_SIZE);
+
+		ret = svm_range_get_attr(p, mm, svm_priv->start_addr,
+					 svm_priv->size,
+					 (nattr_common + num_devices),
+					 svm_priv->attrs);
+		if (ret) {
+			pr_err("CRIU: failed to obtain range attributes\n");
+			goto exit_priv;
+		}
+
+		if (copy_to_user(user_priv_data + *priv_data_offset, svm_priv,
+				 svm_priv_data_size)) {
+			pr_err("Failed to copy svm priv to user\n");
+			ret = -EFAULT;
+			goto exit_priv;
+		}
+
+		*priv_data_offset += svm_priv_data_size;
+
+	}
+
+
+exit_priv:
+	kfree(svm_priv);
+exit_query:
+	kfree(query_attr);
+exit:
+	mmput(mm);
+	return ret;
+}
+
+int
+svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start,
+	  uint64_t size, uint32_t nattrs, struct kfd_ioctl_svm_attribute *attrs)
+{
+	struct mm_struct *mm = current->mm;
+	int r;
+
+	start >>= PAGE_SHIFT;
+	size >>= PAGE_SHIFT;
+
+	switch (op) {
+	case KFD_IOCTL_SVM_OP_SET_ATTR:
+		r = svm_range_set_attr(p, mm, start, size, nattrs, attrs);
+		break;
+	case KFD_IOCTL_SVM_OP_GET_ATTR:
+		r = svm_range_get_attr(p, mm, start, size, nattrs, attrs);
+		break;
+	default:
+		r = EINVAL;
+		break;
+	}
+
+	return r;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
new file mode 100644
index 000000000..25f711905
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -0,0 +1,275 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright 2020-2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef KFD_SVM_H_
+#define KFD_SVM_H_
+
+#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
+
+#include <linux/rwsem.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/sched/mm.h>
+#include <linux/hmm.h>
+#include "amdgpu.h"
+#include "kfd_priv.h"
+
+#define SVM_RANGE_VRAM_DOMAIN (1UL << 0)
+#define SVM_ADEV_PGMAP_OWNER(adev)\
+			((adev)->hive ? (void *)(adev)->hive : (void *)(adev))
+
+struct svm_range_bo {
+	struct amdgpu_bo		*bo;
+	struct kref			kref;
+	struct list_head		range_list; /* all svm ranges shared this bo */
+	spinlock_t			list_lock;
+	struct amdgpu_amdkfd_fence	*eviction_fence;
+	struct work_struct		eviction_work;
+	uint32_t			evicting;
+	struct work_struct		release_work;
+	struct kfd_node			*node;
+};
+
+enum svm_work_list_ops {
+	SVM_OP_NULL,
+	SVM_OP_UNMAP_RANGE,
+	SVM_OP_UPDATE_RANGE_NOTIFIER,
+	SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP,
+	SVM_OP_ADD_RANGE,
+	SVM_OP_ADD_RANGE_AND_MAP
+};
+
+struct svm_work_list_item {
+	enum svm_work_list_ops op;
+	struct mm_struct *mm;
+};
+
+/**
+ * struct svm_range - shared virtual memory range
+ *
+ * @svms:       list of svm ranges, structure defined in kfd_process
+ * @migrate_mutex: to serialize range migration, validation and mapping update
+ * @start:      range start address in pages
+ * @last:       range last address in pages
+ * @it_node:    node [start, last] stored in interval tree, start, last are page
+ *              aligned, page size is (last - start + 1)
+ * @list:       link list node, used to scan all ranges of svms
+ * @update_list:link list node used to add to update_list
+ * @mapping:    bo_va mapping structure to create and update GPU page table
+ * @npages:     number of pages
+ * @dma_addr:   dma mapping address on each GPU for system memory physical page
+ * @ttm_res:    vram ttm resource map
+ * @offset:     range start offset within mm_nodes
+ * @svm_bo:     struct to manage splited amdgpu_bo
+ * @svm_bo_list:link list node, to scan all ranges which share same svm_bo
+ * @lock:       protect prange start, last, child_list, svm_bo_list
+ * @saved_flags:save/restore current PF_MEMALLOC flags
+ * @flags:      flags defined as KFD_IOCTL_SVM_FLAG_*
+ * @perferred_loc: perferred location, 0 for CPU, or GPU id
+ * @perfetch_loc: last prefetch location, 0 for CPU, or GPU id
+ * @actual_loc: the actual location, 0 for CPU, or GPU id
+ * @granularity:migration granularity, log2 num pages
+ * @invalid:    not 0 means cpu page table is invalidated
+ * @validate_timestamp: system timestamp when range is validated
+ * @notifier:   register mmu interval notifier
+ * @work_item:  deferred work item information
+ * @deferred_list: list header used to add range to deferred list
+ * @child_list: list header for split ranges which are not added to svms yet
+ * @bitmap_access: index bitmap of GPUs which can access the range
+ * @bitmap_aip: index bitmap of GPUs which can access the range in place
+ *
+ * Data structure for virtual memory range shared by CPU and GPUs, it can be
+ * allocated from system memory ram or device vram, and migrate from ram to vram
+ * or from vram to ram.
+ */
+struct svm_range {
+	struct svm_range_list		*svms;
+	struct mutex			migrate_mutex;
+	unsigned long			start;
+	unsigned long			last;
+	struct interval_tree_node	it_node;
+	struct list_head		list;
+	struct list_head		update_list;
+	uint64_t			npages;
+	dma_addr_t			*dma_addr[MAX_GPU_INSTANCE];
+	struct ttm_resource		*ttm_res;
+	uint64_t			offset;
+	struct svm_range_bo		*svm_bo;
+	struct list_head		svm_bo_list;
+	struct mutex                    lock;
+	unsigned int                    saved_flags;
+	uint32_t			flags;
+	uint32_t			preferred_loc;
+	uint32_t			prefetch_loc;
+	uint32_t			actual_loc;
+	uint8_t				granularity;
+	atomic_t			invalid;
+	ktime_t				validate_timestamp;
+	struct mmu_interval_notifier	notifier;
+	struct svm_work_list_item	work_item;
+	struct list_head		deferred_list;
+	struct list_head		child_list;
+	DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE);
+	DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE);
+	bool				mapped_to_gpu;
+};
+
+static inline void svm_range_lock(struct svm_range *prange)
+{
+	mutex_lock(&prange->lock);
+	prange->saved_flags = memalloc_noreclaim_save();
+
+}
+static inline void svm_range_unlock(struct svm_range *prange)
+{
+	memalloc_noreclaim_restore(prange->saved_flags);
+	mutex_unlock(&prange->lock);
+}
+
+static inline struct svm_range_bo *svm_range_bo_ref(struct svm_range_bo *svm_bo)
+{
+	if (svm_bo)
+		kref_get(&svm_bo->kref);
+
+	return svm_bo;
+}
+
+int svm_range_list_init(struct kfd_process *p);
+void svm_range_list_fini(struct kfd_process *p);
+int svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start,
+	      uint64_t size, uint32_t nattrs,
+	      struct kfd_ioctl_svm_attribute *attrs);
+struct svm_range *svm_range_from_addr(struct svm_range_list *svms,
+				      unsigned long addr,
+				      struct svm_range **parent);
+struct kfd_node *svm_range_get_node_by_id(struct svm_range *prange,
+					  uint32_t gpu_id);
+int svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
+			    bool clear);
+void svm_range_vram_node_free(struct svm_range *prange);
+int svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm,
+			       unsigned long addr, struct svm_range *parent,
+			       struct svm_range *prange);
+int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
+			    uint32_t vmid, uint32_t node_id, uint64_t addr,
+			    bool write_fault);
+int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence);
+void svm_range_add_list_work(struct svm_range_list *svms,
+			     struct svm_range *prange, struct mm_struct *mm,
+			     enum svm_work_list_ops op);
+void schedule_deferred_list_work(struct svm_range_list *svms);
+void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr,
+			 unsigned long offset, unsigned long npages);
+void svm_range_free_dma_mappings(struct svm_range *prange, bool unmap_dma);
+int svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
+		       uint64_t *svm_priv_data_size);
+int kfd_criu_checkpoint_svm(struct kfd_process *p,
+			    uint8_t __user *user_priv_data,
+			    uint64_t *priv_offset);
+int kfd_criu_restore_svm(struct kfd_process *p,
+			 uint8_t __user *user_priv_ptr,
+			 uint64_t *priv_data_offset,
+			 uint64_t max_priv_data_size);
+int kfd_criu_resume_svm(struct kfd_process *p);
+struct kfd_process_device *
+svm_range_get_pdd_by_node(struct svm_range *prange, struct kfd_node *node);
+void svm_range_list_lock_and_flush_work(struct svm_range_list *svms, struct mm_struct *mm);
+
+/* SVM API and HMM page migration work together, device memory type
+ * is initialized to not 0 when page migration register device memory.
+ */
+#define KFD_IS_SVM_API_SUPPORTED(adev) ((adev)->kfd.pgmap.type != 0 ||\
+					(adev)->gmc.is_app_apu)
+
+void svm_range_bo_unref_async(struct svm_range_bo *svm_bo);
+
+void svm_range_set_max_pages(struct amdgpu_device *adev);
+int svm_range_switch_xnack_reserve_mem(struct kfd_process *p, bool xnack_enabled);
+
+#else
+
+struct kfd_process;
+
+static inline int svm_range_list_init(struct kfd_process *p)
+{
+	return 0;
+}
+static inline void svm_range_list_fini(struct kfd_process *p)
+{
+	/* empty */
+}
+
+static inline int svm_range_restore_pages(struct amdgpu_device *adev,
+					  unsigned int pasid,
+					  uint32_t client_id, uint32_t node_id,
+					  uint64_t addr, bool write_fault)
+{
+	return -EFAULT;
+}
+
+static inline int svm_range_schedule_evict_svm_bo(
+		struct amdgpu_amdkfd_fence *fence)
+{
+	WARN_ONCE(1, "SVM eviction fence triggered, but SVM is disabled");
+	return -EINVAL;
+}
+
+static inline int svm_range_get_info(struct kfd_process *p,
+				     uint32_t *num_svm_ranges,
+				     uint64_t *svm_priv_data_size)
+{
+	*num_svm_ranges = 0;
+	*svm_priv_data_size = 0;
+	return 0;
+}
+
+static inline int kfd_criu_checkpoint_svm(struct kfd_process *p,
+					  uint8_t __user *user_priv_data,
+					  uint64_t *priv_offset)
+{
+	return 0;
+}
+
+static inline int kfd_criu_restore_svm(struct kfd_process *p,
+				       uint8_t __user *user_priv_ptr,
+				       uint64_t *priv_data_offset,
+				       uint64_t max_priv_data_size)
+{
+	return -EINVAL;
+}
+
+static inline int kfd_criu_resume_svm(struct kfd_process *p)
+{
+	return 0;
+}
+
+static inline void svm_range_set_max_pages(struct amdgpu_device *adev)
+{
+}
+
+#define KFD_IS_SVM_API_SUPPORTED(dev) false
+
+#endif /* IS_ENABLED(CONFIG_HSA_AMD_SVM) */
+
+#endif /* KFD_SVM_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
new file mode 100644
index 000000000..6e75e8fa1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -0,0 +1,2295 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/errno.h>
+#include <linux/acpi.h>
+#include <linux/hash.h>
+#include <linux/cpufreq.h>
+#include <linux/log2.h>
+#include <linux/dmi.h>
+#include <linux/atomic.h>
+
+#include "kfd_priv.h"
+#include "kfd_crat.h"
+#include "kfd_topology.h"
+#include "kfd_device_queue_manager.h"
+#include "kfd_svm.h"
+#include "kfd_debug.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_ras.h"
+#include "amdgpu.h"
+
+/* topology_device_list - Master list of all topology devices */
+static struct list_head topology_device_list;
+static struct kfd_system_properties sys_props;
+
+static DECLARE_RWSEM(topology_lock);
+static uint32_t topology_crat_proximity_domain;
+
+struct kfd_topology_device *kfd_topology_device_by_proximity_domain_no_lock(
+						uint32_t proximity_domain)
+{
+	struct kfd_topology_device *top_dev;
+	struct kfd_topology_device *device = NULL;
+
+	list_for_each_entry(top_dev, &topology_device_list, list)
+		if (top_dev->proximity_domain == proximity_domain) {
+			device = top_dev;
+			break;
+		}
+
+	return device;
+}
+
+struct kfd_topology_device *kfd_topology_device_by_proximity_domain(
+						uint32_t proximity_domain)
+{
+	struct kfd_topology_device *device = NULL;
+
+	down_read(&topology_lock);
+
+	device = kfd_topology_device_by_proximity_domain_no_lock(
+							proximity_domain);
+	up_read(&topology_lock);
+
+	return device;
+}
+
+struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id)
+{
+	struct kfd_topology_device *top_dev = NULL;
+	struct kfd_topology_device *ret = NULL;
+
+	down_read(&topology_lock);
+
+	list_for_each_entry(top_dev, &topology_device_list, list)
+		if (top_dev->gpu_id == gpu_id) {
+			ret = top_dev;
+			break;
+		}
+
+	up_read(&topology_lock);
+
+	return ret;
+}
+
+struct kfd_node *kfd_device_by_id(uint32_t gpu_id)
+{
+	struct kfd_topology_device *top_dev;
+
+	top_dev = kfd_topology_device_by_id(gpu_id);
+	if (!top_dev)
+		return NULL;
+
+	return top_dev->gpu;
+}
+
+struct kfd_node *kfd_device_by_pci_dev(const struct pci_dev *pdev)
+{
+	struct kfd_topology_device *top_dev;
+	struct kfd_node *device = NULL;
+
+	down_read(&topology_lock);
+
+	list_for_each_entry(top_dev, &topology_device_list, list)
+		if (top_dev->gpu && top_dev->gpu->adev->pdev == pdev) {
+			device = top_dev->gpu;
+			break;
+		}
+
+	up_read(&topology_lock);
+
+	return device;
+}
+
+/* Called with write topology_lock acquired */
+static void kfd_release_topology_device(struct kfd_topology_device *dev)
+{
+	struct kfd_mem_properties *mem;
+	struct kfd_cache_properties *cache;
+	struct kfd_iolink_properties *iolink;
+	struct kfd_iolink_properties *p2plink;
+	struct kfd_perf_properties *perf;
+
+	list_del(&dev->list);
+
+	while (dev->mem_props.next != &dev->mem_props) {
+		mem = container_of(dev->mem_props.next,
+				struct kfd_mem_properties, list);
+		list_del(&mem->list);
+		kfree(mem);
+	}
+
+	while (dev->cache_props.next != &dev->cache_props) {
+		cache = container_of(dev->cache_props.next,
+				struct kfd_cache_properties, list);
+		list_del(&cache->list);
+		kfree(cache);
+	}
+
+	while (dev->io_link_props.next != &dev->io_link_props) {
+		iolink = container_of(dev->io_link_props.next,
+				struct kfd_iolink_properties, list);
+		list_del(&iolink->list);
+		kfree(iolink);
+	}
+
+	while (dev->p2p_link_props.next != &dev->p2p_link_props) {
+		p2plink = container_of(dev->p2p_link_props.next,
+				struct kfd_iolink_properties, list);
+		list_del(&p2plink->list);
+		kfree(p2plink);
+	}
+
+	while (dev->perf_props.next != &dev->perf_props) {
+		perf = container_of(dev->perf_props.next,
+				struct kfd_perf_properties, list);
+		list_del(&perf->list);
+		kfree(perf);
+	}
+
+	kfree(dev);
+}
+
+void kfd_release_topology_device_list(struct list_head *device_list)
+{
+	struct kfd_topology_device *dev;
+
+	while (!list_empty(device_list)) {
+		dev = list_first_entry(device_list,
+				       struct kfd_topology_device, list);
+		kfd_release_topology_device(dev);
+	}
+}
+
+static void kfd_release_live_view(void)
+{
+	kfd_release_topology_device_list(&topology_device_list);
+	memset(&sys_props, 0, sizeof(sys_props));
+}
+
+struct kfd_topology_device *kfd_create_topology_device(
+				struct list_head *device_list)
+{
+	struct kfd_topology_device *dev;
+
+	dev = kfd_alloc_struct(dev);
+	if (!dev) {
+		pr_err("No memory to allocate a topology device");
+		return NULL;
+	}
+
+	INIT_LIST_HEAD(&dev->mem_props);
+	INIT_LIST_HEAD(&dev->cache_props);
+	INIT_LIST_HEAD(&dev->io_link_props);
+	INIT_LIST_HEAD(&dev->p2p_link_props);
+	INIT_LIST_HEAD(&dev->perf_props);
+
+	list_add_tail(&dev->list, device_list);
+
+	return dev;
+}
+
+
+#define sysfs_show_gen_prop(buffer, offs, fmt, ...)		\
+		(offs += snprintf(buffer+offs, PAGE_SIZE-offs,	\
+				  fmt, __VA_ARGS__))
+#define sysfs_show_32bit_prop(buffer, offs, name, value) \
+		sysfs_show_gen_prop(buffer, offs, "%s %u\n", name, value)
+#define sysfs_show_64bit_prop(buffer, offs, name, value) \
+		sysfs_show_gen_prop(buffer, offs, "%s %llu\n", name, value)
+#define sysfs_show_32bit_val(buffer, offs, value) \
+		sysfs_show_gen_prop(buffer, offs, "%u\n", value)
+#define sysfs_show_str_val(buffer, offs, value) \
+		sysfs_show_gen_prop(buffer, offs, "%s\n", value)
+
+static ssize_t sysprops_show(struct kobject *kobj, struct attribute *attr,
+		char *buffer)
+{
+	int offs = 0;
+
+	/* Making sure that the buffer is an empty string */
+	buffer[0] = 0;
+
+	if (attr == &sys_props.attr_genid) {
+		sysfs_show_32bit_val(buffer, offs,
+				     sys_props.generation_count);
+	} else if (attr == &sys_props.attr_props) {
+		sysfs_show_64bit_prop(buffer, offs, "platform_oem",
+				      sys_props.platform_oem);
+		sysfs_show_64bit_prop(buffer, offs, "platform_id",
+				      sys_props.platform_id);
+		sysfs_show_64bit_prop(buffer, offs, "platform_rev",
+				      sys_props.platform_rev);
+	} else {
+		offs = -EINVAL;
+	}
+
+	return offs;
+}
+
+static void kfd_topology_kobj_release(struct kobject *kobj)
+{
+	kfree(kobj);
+}
+
+static const struct sysfs_ops sysprops_ops = {
+	.show = sysprops_show,
+};
+
+static const struct kobj_type sysprops_type = {
+	.release = kfd_topology_kobj_release,
+	.sysfs_ops = &sysprops_ops,
+};
+
+static ssize_t iolink_show(struct kobject *kobj, struct attribute *attr,
+		char *buffer)
+{
+	int offs = 0;
+	struct kfd_iolink_properties *iolink;
+
+	/* Making sure that the buffer is an empty string */
+	buffer[0] = 0;
+
+	iolink = container_of(attr, struct kfd_iolink_properties, attr);
+	if (iolink->gpu && kfd_devcgroup_check_permission(iolink->gpu))
+		return -EPERM;
+	sysfs_show_32bit_prop(buffer, offs, "type", iolink->iolink_type);
+	sysfs_show_32bit_prop(buffer, offs, "version_major", iolink->ver_maj);
+	sysfs_show_32bit_prop(buffer, offs, "version_minor", iolink->ver_min);
+	sysfs_show_32bit_prop(buffer, offs, "node_from", iolink->node_from);
+	sysfs_show_32bit_prop(buffer, offs, "node_to", iolink->node_to);
+	sysfs_show_32bit_prop(buffer, offs, "weight", iolink->weight);
+	sysfs_show_32bit_prop(buffer, offs, "min_latency", iolink->min_latency);
+	sysfs_show_32bit_prop(buffer, offs, "max_latency", iolink->max_latency);
+	sysfs_show_32bit_prop(buffer, offs, "min_bandwidth",
+			      iolink->min_bandwidth);
+	sysfs_show_32bit_prop(buffer, offs, "max_bandwidth",
+			      iolink->max_bandwidth);
+	sysfs_show_32bit_prop(buffer, offs, "recommended_transfer_size",
+			      iolink->rec_transfer_size);
+	sysfs_show_32bit_prop(buffer, offs, "flags", iolink->flags);
+
+	return offs;
+}
+
+static const struct sysfs_ops iolink_ops = {
+	.show = iolink_show,
+};
+
+static const struct kobj_type iolink_type = {
+	.release = kfd_topology_kobj_release,
+	.sysfs_ops = &iolink_ops,
+};
+
+static ssize_t mem_show(struct kobject *kobj, struct attribute *attr,
+		char *buffer)
+{
+	int offs = 0;
+	struct kfd_mem_properties *mem;
+
+	/* Making sure that the buffer is an empty string */
+	buffer[0] = 0;
+
+	mem = container_of(attr, struct kfd_mem_properties, attr);
+	if (mem->gpu && kfd_devcgroup_check_permission(mem->gpu))
+		return -EPERM;
+	sysfs_show_32bit_prop(buffer, offs, "heap_type", mem->heap_type);
+	sysfs_show_64bit_prop(buffer, offs, "size_in_bytes",
+			      mem->size_in_bytes);
+	sysfs_show_32bit_prop(buffer, offs, "flags", mem->flags);
+	sysfs_show_32bit_prop(buffer, offs, "width", mem->width);
+	sysfs_show_32bit_prop(buffer, offs, "mem_clk_max",
+			      mem->mem_clk_max);
+
+	return offs;
+}
+
+static const struct sysfs_ops mem_ops = {
+	.show = mem_show,
+};
+
+static const struct kobj_type mem_type = {
+	.release = kfd_topology_kobj_release,
+	.sysfs_ops = &mem_ops,
+};
+
+static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr,
+		char *buffer)
+{
+	int offs = 0;
+	uint32_t i, j;
+	struct kfd_cache_properties *cache;
+
+	/* Making sure that the buffer is an empty string */
+	buffer[0] = 0;
+	cache = container_of(attr, struct kfd_cache_properties, attr);
+	if (cache->gpu && kfd_devcgroup_check_permission(cache->gpu))
+		return -EPERM;
+	sysfs_show_32bit_prop(buffer, offs, "processor_id_low",
+			cache->processor_id_low);
+	sysfs_show_32bit_prop(buffer, offs, "level", cache->cache_level);
+	sysfs_show_32bit_prop(buffer, offs, "size", cache->cache_size);
+	sysfs_show_32bit_prop(buffer, offs, "cache_line_size",
+			      cache->cacheline_size);
+	sysfs_show_32bit_prop(buffer, offs, "cache_lines_per_tag",
+			      cache->cachelines_per_tag);
+	sysfs_show_32bit_prop(buffer, offs, "association", cache->cache_assoc);
+	sysfs_show_32bit_prop(buffer, offs, "latency", cache->cache_latency);
+	sysfs_show_32bit_prop(buffer, offs, "type", cache->cache_type);
+
+	offs += snprintf(buffer+offs, PAGE_SIZE-offs, "sibling_map ");
+	for (i = 0; i < cache->sibling_map_size; i++)
+		for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++)
+			/* Check each bit */
+			offs += snprintf(buffer+offs, PAGE_SIZE-offs, "%d,",
+						(cache->sibling_map[i] >> j) & 1);
+
+	/* Replace the last "," with end of line */
+	buffer[offs-1] = '\n';
+	return offs;
+}
+
+static const struct sysfs_ops cache_ops = {
+	.show = kfd_cache_show,
+};
+
+static const struct kobj_type cache_type = {
+	.release = kfd_topology_kobj_release,
+	.sysfs_ops = &cache_ops,
+};
+
+/****** Sysfs of Performance Counters ******/
+
+struct kfd_perf_attr {
+	struct kobj_attribute attr;
+	uint32_t data;
+};
+
+static ssize_t perf_show(struct kobject *kobj, struct kobj_attribute *attrs,
+			char *buf)
+{
+	int offs = 0;
+	struct kfd_perf_attr *attr;
+
+	buf[0] = 0;
+	attr = container_of(attrs, struct kfd_perf_attr, attr);
+	if (!attr->data) /* invalid data for PMC */
+		return 0;
+	else
+		return sysfs_show_32bit_val(buf, offs, attr->data);
+}
+
+#define KFD_PERF_DESC(_name, _data)			\
+{							\
+	.attr  = __ATTR(_name, 0444, perf_show, NULL),	\
+	.data = _data,					\
+}
+
+static struct kfd_perf_attr perf_attr_iommu[] = {
+	KFD_PERF_DESC(max_concurrent, 0),
+	KFD_PERF_DESC(num_counters, 0),
+	KFD_PERF_DESC(counter_ids, 0),
+};
+/****************************************/
+
+static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
+		char *buffer)
+{
+	int offs = 0;
+	struct kfd_topology_device *dev;
+	uint32_t log_max_watch_addr;
+
+	/* Making sure that the buffer is an empty string */
+	buffer[0] = 0;
+
+	if (strcmp(attr->name, "gpu_id") == 0) {
+		dev = container_of(attr, struct kfd_topology_device,
+				attr_gpuid);
+		if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu))
+			return -EPERM;
+		return sysfs_show_32bit_val(buffer, offs, dev->gpu_id);
+	}
+
+	if (strcmp(attr->name, "name") == 0) {
+		dev = container_of(attr, struct kfd_topology_device,
+				attr_name);
+
+		if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu))
+			return -EPERM;
+		return sysfs_show_str_val(buffer, offs, dev->node_props.name);
+	}
+
+	dev = container_of(attr, struct kfd_topology_device,
+			attr_props);
+	if (dev->gpu && kfd_devcgroup_check_permission(dev->gpu))
+		return -EPERM;
+	sysfs_show_32bit_prop(buffer, offs, "cpu_cores_count",
+			      dev->node_props.cpu_cores_count);
+	sysfs_show_32bit_prop(buffer, offs, "simd_count",
+			      dev->gpu ? dev->node_props.simd_count : 0);
+	sysfs_show_32bit_prop(buffer, offs, "mem_banks_count",
+			      dev->node_props.mem_banks_count);
+	sysfs_show_32bit_prop(buffer, offs, "caches_count",
+			      dev->node_props.caches_count);
+	sysfs_show_32bit_prop(buffer, offs, "io_links_count",
+			      dev->node_props.io_links_count);
+	sysfs_show_32bit_prop(buffer, offs, "p2p_links_count",
+			      dev->node_props.p2p_links_count);
+	sysfs_show_32bit_prop(buffer, offs, "cpu_core_id_base",
+			      dev->node_props.cpu_core_id_base);
+	sysfs_show_32bit_prop(buffer, offs, "simd_id_base",
+			      dev->node_props.simd_id_base);
+	sysfs_show_32bit_prop(buffer, offs, "max_waves_per_simd",
+			      dev->node_props.max_waves_per_simd);
+	sysfs_show_32bit_prop(buffer, offs, "lds_size_in_kb",
+			      dev->node_props.lds_size_in_kb);
+	sysfs_show_32bit_prop(buffer, offs, "gds_size_in_kb",
+			      dev->node_props.gds_size_in_kb);
+	sysfs_show_32bit_prop(buffer, offs, "num_gws",
+			      dev->node_props.num_gws);
+	sysfs_show_32bit_prop(buffer, offs, "wave_front_size",
+			      dev->node_props.wave_front_size);
+	sysfs_show_32bit_prop(buffer, offs, "array_count",
+			      dev->gpu ? (dev->node_props.array_count *
+					  NUM_XCC(dev->gpu->xcc_mask)) : 0);
+	sysfs_show_32bit_prop(buffer, offs, "simd_arrays_per_engine",
+			      dev->node_props.simd_arrays_per_engine);
+	sysfs_show_32bit_prop(buffer, offs, "cu_per_simd_array",
+			      dev->node_props.cu_per_simd_array);
+	sysfs_show_32bit_prop(buffer, offs, "simd_per_cu",
+			      dev->node_props.simd_per_cu);
+	sysfs_show_32bit_prop(buffer, offs, "max_slots_scratch_cu",
+			      dev->node_props.max_slots_scratch_cu);
+	sysfs_show_32bit_prop(buffer, offs, "gfx_target_version",
+			      dev->node_props.gfx_target_version);
+	sysfs_show_32bit_prop(buffer, offs, "vendor_id",
+			      dev->node_props.vendor_id);
+	sysfs_show_32bit_prop(buffer, offs, "device_id",
+			      dev->node_props.device_id);
+	sysfs_show_32bit_prop(buffer, offs, "location_id",
+			      dev->node_props.location_id);
+	sysfs_show_32bit_prop(buffer, offs, "domain",
+			      dev->node_props.domain);
+	sysfs_show_32bit_prop(buffer, offs, "drm_render_minor",
+			      dev->node_props.drm_render_minor);
+	sysfs_show_64bit_prop(buffer, offs, "hive_id",
+			      dev->node_props.hive_id);
+	sysfs_show_32bit_prop(buffer, offs, "num_sdma_engines",
+			      dev->node_props.num_sdma_engines);
+	sysfs_show_32bit_prop(buffer, offs, "num_sdma_xgmi_engines",
+			      dev->node_props.num_sdma_xgmi_engines);
+	sysfs_show_32bit_prop(buffer, offs, "num_sdma_queues_per_engine",
+			      dev->node_props.num_sdma_queues_per_engine);
+	sysfs_show_32bit_prop(buffer, offs, "num_cp_queues",
+			      dev->node_props.num_cp_queues);
+
+	if (dev->gpu) {
+		log_max_watch_addr =
+			__ilog2_u32(dev->gpu->kfd->device_info.num_of_watch_points);
+
+		if (log_max_watch_addr) {
+			dev->node_props.capability |=
+					HSA_CAP_WATCH_POINTS_SUPPORTED;
+
+			dev->node_props.capability |=
+				((log_max_watch_addr <<
+					HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT) &
+				HSA_CAP_WATCH_POINTS_TOTALBITS_MASK);
+		}
+
+		if (dev->gpu->adev->asic_type == CHIP_TONGA)
+			dev->node_props.capability |=
+					HSA_CAP_AQL_QUEUE_DOUBLE_MAP;
+
+		sysfs_show_32bit_prop(buffer, offs, "max_engine_clk_fcompute",
+			dev->node_props.max_engine_clk_fcompute);
+
+		sysfs_show_64bit_prop(buffer, offs, "local_mem_size", 0ULL);
+
+		sysfs_show_32bit_prop(buffer, offs, "fw_version",
+				      dev->gpu->kfd->mec_fw_version);
+		sysfs_show_32bit_prop(buffer, offs, "capability",
+				      dev->node_props.capability);
+		sysfs_show_64bit_prop(buffer, offs, "debug_prop",
+				      dev->node_props.debug_prop);
+		sysfs_show_32bit_prop(buffer, offs, "sdma_fw_version",
+				      dev->gpu->kfd->sdma_fw_version);
+		sysfs_show_64bit_prop(buffer, offs, "unique_id",
+				      dev->gpu->adev->unique_id);
+		sysfs_show_32bit_prop(buffer, offs, "num_xcc",
+				      NUM_XCC(dev->gpu->xcc_mask));
+	}
+
+	return sysfs_show_32bit_prop(buffer, offs, "max_engine_clk_ccompute",
+				     cpufreq_quick_get_max(0)/1000);
+}
+
+static const struct sysfs_ops node_ops = {
+	.show = node_show,
+};
+
+static const struct kobj_type node_type = {
+	.release = kfd_topology_kobj_release,
+	.sysfs_ops = &node_ops,
+};
+
+static void kfd_remove_sysfs_file(struct kobject *kobj, struct attribute *attr)
+{
+	sysfs_remove_file(kobj, attr);
+	kobject_del(kobj);
+	kobject_put(kobj);
+}
+
+static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
+{
+	struct kfd_iolink_properties *p2plink;
+	struct kfd_iolink_properties *iolink;
+	struct kfd_cache_properties *cache;
+	struct kfd_mem_properties *mem;
+	struct kfd_perf_properties *perf;
+
+	if (dev->kobj_iolink) {
+		list_for_each_entry(iolink, &dev->io_link_props, list)
+			if (iolink->kobj) {
+				kfd_remove_sysfs_file(iolink->kobj,
+							&iolink->attr);
+				iolink->kobj = NULL;
+			}
+		kobject_del(dev->kobj_iolink);
+		kobject_put(dev->kobj_iolink);
+		dev->kobj_iolink = NULL;
+	}
+
+	if (dev->kobj_p2plink) {
+		list_for_each_entry(p2plink, &dev->p2p_link_props, list)
+			if (p2plink->kobj) {
+				kfd_remove_sysfs_file(p2plink->kobj,
+							&p2plink->attr);
+				p2plink->kobj = NULL;
+			}
+		kobject_del(dev->kobj_p2plink);
+		kobject_put(dev->kobj_p2plink);
+		dev->kobj_p2plink = NULL;
+	}
+
+	if (dev->kobj_cache) {
+		list_for_each_entry(cache, &dev->cache_props, list)
+			if (cache->kobj) {
+				kfd_remove_sysfs_file(cache->kobj,
+							&cache->attr);
+				cache->kobj = NULL;
+			}
+		kobject_del(dev->kobj_cache);
+		kobject_put(dev->kobj_cache);
+		dev->kobj_cache = NULL;
+	}
+
+	if (dev->kobj_mem) {
+		list_for_each_entry(mem, &dev->mem_props, list)
+			if (mem->kobj) {
+				kfd_remove_sysfs_file(mem->kobj, &mem->attr);
+				mem->kobj = NULL;
+			}
+		kobject_del(dev->kobj_mem);
+		kobject_put(dev->kobj_mem);
+		dev->kobj_mem = NULL;
+	}
+
+	if (dev->kobj_perf) {
+		list_for_each_entry(perf, &dev->perf_props, list) {
+			kfree(perf->attr_group);
+			perf->attr_group = NULL;
+		}
+		kobject_del(dev->kobj_perf);
+		kobject_put(dev->kobj_perf);
+		dev->kobj_perf = NULL;
+	}
+
+	if (dev->kobj_node) {
+		sysfs_remove_file(dev->kobj_node, &dev->attr_gpuid);
+		sysfs_remove_file(dev->kobj_node, &dev->attr_name);
+		sysfs_remove_file(dev->kobj_node, &dev->attr_props);
+		kobject_del(dev->kobj_node);
+		kobject_put(dev->kobj_node);
+		dev->kobj_node = NULL;
+	}
+}
+
+static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
+		uint32_t id)
+{
+	struct kfd_iolink_properties *p2plink;
+	struct kfd_iolink_properties *iolink;
+	struct kfd_cache_properties *cache;
+	struct kfd_mem_properties *mem;
+	struct kfd_perf_properties *perf;
+	int ret;
+	uint32_t i, num_attrs;
+	struct attribute **attrs;
+
+	if (WARN_ON(dev->kobj_node))
+		return -EEXIST;
+
+	/*
+	 * Creating the sysfs folders
+	 */
+	dev->kobj_node = kfd_alloc_struct(dev->kobj_node);
+	if (!dev->kobj_node)
+		return -ENOMEM;
+
+	ret = kobject_init_and_add(dev->kobj_node, &node_type,
+			sys_props.kobj_nodes, "%d", id);
+	if (ret < 0) {
+		kobject_put(dev->kobj_node);
+		return ret;
+	}
+
+	dev->kobj_mem = kobject_create_and_add("mem_banks", dev->kobj_node);
+	if (!dev->kobj_mem)
+		return -ENOMEM;
+
+	dev->kobj_cache = kobject_create_and_add("caches", dev->kobj_node);
+	if (!dev->kobj_cache)
+		return -ENOMEM;
+
+	dev->kobj_iolink = kobject_create_and_add("io_links", dev->kobj_node);
+	if (!dev->kobj_iolink)
+		return -ENOMEM;
+
+	dev->kobj_p2plink = kobject_create_and_add("p2p_links", dev->kobj_node);
+	if (!dev->kobj_p2plink)
+		return -ENOMEM;
+
+	dev->kobj_perf = kobject_create_and_add("perf", dev->kobj_node);
+	if (!dev->kobj_perf)
+		return -ENOMEM;
+
+	/*
+	 * Creating sysfs files for node properties
+	 */
+	dev->attr_gpuid.name = "gpu_id";
+	dev->attr_gpuid.mode = KFD_SYSFS_FILE_MODE;
+	sysfs_attr_init(&dev->attr_gpuid);
+	dev->attr_name.name = "name";
+	dev->attr_name.mode = KFD_SYSFS_FILE_MODE;
+	sysfs_attr_init(&dev->attr_name);
+	dev->attr_props.name = "properties";
+	dev->attr_props.mode = KFD_SYSFS_FILE_MODE;
+	sysfs_attr_init(&dev->attr_props);
+	ret = sysfs_create_file(dev->kobj_node, &dev->attr_gpuid);
+	if (ret < 0)
+		return ret;
+	ret = sysfs_create_file(dev->kobj_node, &dev->attr_name);
+	if (ret < 0)
+		return ret;
+	ret = sysfs_create_file(dev->kobj_node, &dev->attr_props);
+	if (ret < 0)
+		return ret;
+
+	i = 0;
+	list_for_each_entry(mem, &dev->mem_props, list) {
+		mem->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
+		if (!mem->kobj)
+			return -ENOMEM;
+		ret = kobject_init_and_add(mem->kobj, &mem_type,
+				dev->kobj_mem, "%d", i);
+		if (ret < 0) {
+			kobject_put(mem->kobj);
+			return ret;
+		}
+
+		mem->attr.name = "properties";
+		mem->attr.mode = KFD_SYSFS_FILE_MODE;
+		sysfs_attr_init(&mem->attr);
+		ret = sysfs_create_file(mem->kobj, &mem->attr);
+		if (ret < 0)
+			return ret;
+		i++;
+	}
+
+	i = 0;
+	list_for_each_entry(cache, &dev->cache_props, list) {
+		cache->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
+		if (!cache->kobj)
+			return -ENOMEM;
+		ret = kobject_init_and_add(cache->kobj, &cache_type,
+				dev->kobj_cache, "%d", i);
+		if (ret < 0) {
+			kobject_put(cache->kobj);
+			return ret;
+		}
+
+		cache->attr.name = "properties";
+		cache->attr.mode = KFD_SYSFS_FILE_MODE;
+		sysfs_attr_init(&cache->attr);
+		ret = sysfs_create_file(cache->kobj, &cache->attr);
+		if (ret < 0)
+			return ret;
+		i++;
+	}
+
+	i = 0;
+	list_for_each_entry(iolink, &dev->io_link_props, list) {
+		iolink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
+		if (!iolink->kobj)
+			return -ENOMEM;
+		ret = kobject_init_and_add(iolink->kobj, &iolink_type,
+				dev->kobj_iolink, "%d", i);
+		if (ret < 0) {
+			kobject_put(iolink->kobj);
+			return ret;
+		}
+
+		iolink->attr.name = "properties";
+		iolink->attr.mode = KFD_SYSFS_FILE_MODE;
+		sysfs_attr_init(&iolink->attr);
+		ret = sysfs_create_file(iolink->kobj, &iolink->attr);
+		if (ret < 0)
+			return ret;
+		i++;
+	}
+
+	i = 0;
+	list_for_each_entry(p2plink, &dev->p2p_link_props, list) {
+		p2plink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
+		if (!p2plink->kobj)
+			return -ENOMEM;
+		ret = kobject_init_and_add(p2plink->kobj, &iolink_type,
+				dev->kobj_p2plink, "%d", i);
+		if (ret < 0) {
+			kobject_put(p2plink->kobj);
+			return ret;
+		}
+
+		p2plink->attr.name = "properties";
+		p2plink->attr.mode = KFD_SYSFS_FILE_MODE;
+		sysfs_attr_init(&p2plink->attr);
+		ret = sysfs_create_file(p2plink->kobj, &p2plink->attr);
+		if (ret < 0)
+			return ret;
+		i++;
+	}
+
+	/* All hardware blocks have the same number of attributes. */
+	num_attrs = ARRAY_SIZE(perf_attr_iommu);
+	list_for_each_entry(perf, &dev->perf_props, list) {
+		perf->attr_group = kzalloc(sizeof(struct kfd_perf_attr)
+			* num_attrs + sizeof(struct attribute_group),
+			GFP_KERNEL);
+		if (!perf->attr_group)
+			return -ENOMEM;
+
+		attrs = (struct attribute **)(perf->attr_group + 1);
+		if (!strcmp(perf->block_name, "iommu")) {
+		/* Information of IOMMU's num_counters and counter_ids is shown
+		 * under /sys/bus/event_source/devices/amd_iommu. We don't
+		 * duplicate here.
+		 */
+			perf_attr_iommu[0].data = perf->max_concurrent;
+			for (i = 0; i < num_attrs; i++)
+				attrs[i] = &perf_attr_iommu[i].attr.attr;
+		}
+		perf->attr_group->name = perf->block_name;
+		perf->attr_group->attrs = attrs;
+		ret = sysfs_create_group(dev->kobj_perf, perf->attr_group);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+/* Called with write topology lock acquired */
+static int kfd_build_sysfs_node_tree(void)
+{
+	struct kfd_topology_device *dev;
+	int ret;
+	uint32_t i = 0;
+
+	list_for_each_entry(dev, &topology_device_list, list) {
+		ret = kfd_build_sysfs_node_entry(dev, i);
+		if (ret < 0)
+			return ret;
+		i++;
+	}
+
+	return 0;
+}
+
+/* Called with write topology lock acquired */
+static void kfd_remove_sysfs_node_tree(void)
+{
+	struct kfd_topology_device *dev;
+
+	list_for_each_entry(dev, &topology_device_list, list)
+		kfd_remove_sysfs_node_entry(dev);
+}
+
+static int kfd_topology_update_sysfs(void)
+{
+	int ret;
+
+	if (!sys_props.kobj_topology) {
+		sys_props.kobj_topology =
+				kfd_alloc_struct(sys_props.kobj_topology);
+		if (!sys_props.kobj_topology)
+			return -ENOMEM;
+
+		ret = kobject_init_and_add(sys_props.kobj_topology,
+				&sysprops_type,  &kfd_device->kobj,
+				"topology");
+		if (ret < 0) {
+			kobject_put(sys_props.kobj_topology);
+			return ret;
+		}
+
+		sys_props.kobj_nodes = kobject_create_and_add("nodes",
+				sys_props.kobj_topology);
+		if (!sys_props.kobj_nodes)
+			return -ENOMEM;
+
+		sys_props.attr_genid.name = "generation_id";
+		sys_props.attr_genid.mode = KFD_SYSFS_FILE_MODE;
+		sysfs_attr_init(&sys_props.attr_genid);
+		ret = sysfs_create_file(sys_props.kobj_topology,
+				&sys_props.attr_genid);
+		if (ret < 0)
+			return ret;
+
+		sys_props.attr_props.name = "system_properties";
+		sys_props.attr_props.mode = KFD_SYSFS_FILE_MODE;
+		sysfs_attr_init(&sys_props.attr_props);
+		ret = sysfs_create_file(sys_props.kobj_topology,
+				&sys_props.attr_props);
+		if (ret < 0)
+			return ret;
+	}
+
+	kfd_remove_sysfs_node_tree();
+
+	return kfd_build_sysfs_node_tree();
+}
+
+static void kfd_topology_release_sysfs(void)
+{
+	kfd_remove_sysfs_node_tree();
+	if (sys_props.kobj_topology) {
+		sysfs_remove_file(sys_props.kobj_topology,
+				&sys_props.attr_genid);
+		sysfs_remove_file(sys_props.kobj_topology,
+				&sys_props.attr_props);
+		if (sys_props.kobj_nodes) {
+			kobject_del(sys_props.kobj_nodes);
+			kobject_put(sys_props.kobj_nodes);
+			sys_props.kobj_nodes = NULL;
+		}
+		kobject_del(sys_props.kobj_topology);
+		kobject_put(sys_props.kobj_topology);
+		sys_props.kobj_topology = NULL;
+	}
+}
+
+/* Called with write topology_lock acquired */
+static void kfd_topology_update_device_list(struct list_head *temp_list,
+					struct list_head *master_list)
+{
+	while (!list_empty(temp_list)) {
+		list_move_tail(temp_list->next, master_list);
+		sys_props.num_devices++;
+	}
+}
+
+static void kfd_debug_print_topology(void)
+{
+	struct kfd_topology_device *dev;
+
+	down_read(&topology_lock);
+
+	dev = list_last_entry(&topology_device_list,
+			struct kfd_topology_device, list);
+	if (dev) {
+		if (dev->node_props.cpu_cores_count &&
+				dev->node_props.simd_count) {
+			pr_info("Topology: Add APU node [0x%0x:0x%0x]\n",
+				dev->node_props.device_id,
+				dev->node_props.vendor_id);
+		} else if (dev->node_props.cpu_cores_count)
+			pr_info("Topology: Add CPU node\n");
+		else if (dev->node_props.simd_count)
+			pr_info("Topology: Add dGPU node [0x%0x:0x%0x]\n",
+				dev->node_props.device_id,
+				dev->node_props.vendor_id);
+	}
+	up_read(&topology_lock);
+}
+
+/* Helper function for intializing platform_xx members of
+ * kfd_system_properties. Uses OEM info from the last CPU/APU node.
+ */
+static void kfd_update_system_properties(void)
+{
+	struct kfd_topology_device *dev;
+
+	down_read(&topology_lock);
+	dev = list_last_entry(&topology_device_list,
+			struct kfd_topology_device, list);
+	if (dev) {
+		sys_props.platform_id =
+			(*((uint64_t *)dev->oem_id)) & CRAT_OEMID_64BIT_MASK;
+		sys_props.platform_oem = *((uint64_t *)dev->oem_table_id);
+		sys_props.platform_rev = dev->oem_revision;
+	}
+	up_read(&topology_lock);
+}
+
+static void find_system_memory(const struct dmi_header *dm,
+	void *private)
+{
+	struct kfd_mem_properties *mem;
+	u16 mem_width, mem_clock;
+	struct kfd_topology_device *kdev =
+		(struct kfd_topology_device *)private;
+	const u8 *dmi_data = (const u8 *)(dm + 1);
+
+	if (dm->type == DMI_ENTRY_MEM_DEVICE && dm->length >= 0x15) {
+		mem_width = (u16)(*(const u16 *)(dmi_data + 0x6));
+		mem_clock = (u16)(*(const u16 *)(dmi_data + 0x11));
+		list_for_each_entry(mem, &kdev->mem_props, list) {
+			if (mem_width != 0xFFFF && mem_width != 0)
+				mem->width = mem_width;
+			if (mem_clock != 0)
+				mem->mem_clk_max = mem_clock;
+		}
+	}
+}
+
+/* kfd_add_non_crat_information - Add information that is not currently
+ *	defined in CRAT but is necessary for KFD topology
+ * @dev - topology device to which addition info is added
+ */
+static void kfd_add_non_crat_information(struct kfd_topology_device *kdev)
+{
+	/* Check if CPU only node. */
+	if (!kdev->gpu) {
+		/* Add system memory information */
+		dmi_walk(find_system_memory, kdev);
+	}
+	/* TODO: For GPU node, rearrange code from kfd_topology_add_device */
+}
+
+int kfd_topology_init(void)
+{
+	void *crat_image = NULL;
+	size_t image_size = 0;
+	int ret;
+	struct list_head temp_topology_device_list;
+	int cpu_only_node = 0;
+	struct kfd_topology_device *kdev;
+	int proximity_domain;
+
+	/* topology_device_list - Master list of all topology devices
+	 * temp_topology_device_list - temporary list created while parsing CRAT
+	 * or VCRAT. Once parsing is complete the contents of list is moved to
+	 * topology_device_list
+	 */
+
+	/* Initialize the head for the both the lists */
+	INIT_LIST_HEAD(&topology_device_list);
+	INIT_LIST_HEAD(&temp_topology_device_list);
+	init_rwsem(&topology_lock);
+
+	memset(&sys_props, 0, sizeof(sys_props));
+
+	/* Proximity domains in ACPI CRAT tables start counting at
+	 * 0. The same should be true for virtual CRAT tables created
+	 * at this stage. GPUs added later in kfd_topology_add_device
+	 * use a counter.
+	 */
+	proximity_domain = 0;
+
+	ret = kfd_create_crat_image_virtual(&crat_image, &image_size,
+					    COMPUTE_UNIT_CPU, NULL,
+					    proximity_domain);
+	cpu_only_node = 1;
+	if (ret) {
+		pr_err("Error creating VCRAT table for CPU\n");
+		return ret;
+	}
+
+	ret = kfd_parse_crat_table(crat_image,
+				   &temp_topology_device_list,
+				   proximity_domain);
+	if (ret) {
+		pr_err("Error parsing VCRAT table for CPU\n");
+		goto err;
+	}
+
+	kdev = list_first_entry(&temp_topology_device_list,
+				struct kfd_topology_device, list);
+
+	down_write(&topology_lock);
+	kfd_topology_update_device_list(&temp_topology_device_list,
+					&topology_device_list);
+	topology_crat_proximity_domain = sys_props.num_devices-1;
+	ret = kfd_topology_update_sysfs();
+	up_write(&topology_lock);
+
+	if (!ret) {
+		sys_props.generation_count++;
+		kfd_update_system_properties();
+		kfd_debug_print_topology();
+	} else
+		pr_err("Failed to update topology in sysfs ret=%d\n", ret);
+
+	/* For nodes with GPU, this information gets added
+	 * when GPU is detected (kfd_topology_add_device).
+	 */
+	if (cpu_only_node) {
+		/* Add additional information to CPU only node created above */
+		down_write(&topology_lock);
+		kdev = list_first_entry(&topology_device_list,
+				struct kfd_topology_device, list);
+		up_write(&topology_lock);
+		kfd_add_non_crat_information(kdev);
+	}
+
+err:
+	kfd_destroy_crat_image(crat_image);
+	return ret;
+}
+
+void kfd_topology_shutdown(void)
+{
+	down_write(&topology_lock);
+	kfd_topology_release_sysfs();
+	kfd_release_live_view();
+	up_write(&topology_lock);
+}
+
+static uint32_t kfd_generate_gpu_id(struct kfd_node *gpu)
+{
+	uint32_t hashout;
+	uint32_t buf[8];
+	uint64_t local_mem_size;
+	int i;
+
+	if (!gpu)
+		return 0;
+
+	local_mem_size = gpu->local_mem_info.local_mem_size_private +
+			gpu->local_mem_info.local_mem_size_public;
+	buf[0] = gpu->adev->pdev->devfn;
+	buf[1] = gpu->adev->pdev->subsystem_vendor |
+		(gpu->adev->pdev->subsystem_device << 16);
+	buf[2] = pci_domain_nr(gpu->adev->pdev->bus);
+	buf[3] = gpu->adev->pdev->device;
+	buf[4] = gpu->adev->pdev->bus->number;
+	buf[5] = lower_32_bits(local_mem_size);
+	buf[6] = upper_32_bits(local_mem_size);
+	buf[7] = (ffs(gpu->xcc_mask) - 1) | (NUM_XCC(gpu->xcc_mask) << 16);
+
+	for (i = 0, hashout = 0; i < 8; i++)
+		hashout ^= hash_32(buf[i], KFD_GPU_ID_HASH_WIDTH);
+
+	return hashout;
+}
+/* kfd_assign_gpu - Attach @gpu to the correct kfd topology device. If
+ *		the GPU device is not already present in the topology device
+ *		list then return NULL. This means a new topology device has to
+ *		be created for this GPU.
+ */
+static struct kfd_topology_device *kfd_assign_gpu(struct kfd_node *gpu)
+{
+	struct kfd_topology_device *dev;
+	struct kfd_topology_device *out_dev = NULL;
+	struct kfd_mem_properties *mem;
+	struct kfd_cache_properties *cache;
+	struct kfd_iolink_properties *iolink;
+	struct kfd_iolink_properties *p2plink;
+
+	list_for_each_entry(dev, &topology_device_list, list) {
+		/* Discrete GPUs need their own topology device list
+		 * entries. Don't assign them to CPU/APU nodes.
+		 */
+		if (dev->node_props.cpu_cores_count)
+			continue;
+
+		if (!dev->gpu && (dev->node_props.simd_count > 0)) {
+			dev->gpu = gpu;
+			out_dev = dev;
+
+			list_for_each_entry(mem, &dev->mem_props, list)
+				mem->gpu = dev->gpu;
+			list_for_each_entry(cache, &dev->cache_props, list)
+				cache->gpu = dev->gpu;
+			list_for_each_entry(iolink, &dev->io_link_props, list)
+				iolink->gpu = dev->gpu;
+			list_for_each_entry(p2plink, &dev->p2p_link_props, list)
+				p2plink->gpu = dev->gpu;
+			break;
+		}
+	}
+	return out_dev;
+}
+
+static void kfd_notify_gpu_change(uint32_t gpu_id, int arrival)
+{
+	/*
+	 * TODO: Generate an event for thunk about the arrival/removal
+	 * of the GPU
+	 */
+}
+
+/* kfd_fill_mem_clk_max_info - Since CRAT doesn't have memory clock info,
+ *		patch this after CRAT parsing.
+ */
+static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev)
+{
+	struct kfd_mem_properties *mem;
+	struct kfd_local_mem_info local_mem_info;
+
+	if (!dev)
+		return;
+
+	/* Currently, amdgpu driver (amdgpu_mc) deals only with GPUs with
+	 * single bank of VRAM local memory.
+	 * for dGPUs - VCRAT reports only one bank of Local Memory
+	 * for APUs - If CRAT from ACPI reports more than one bank, then
+	 *	all the banks will report the same mem_clk_max information
+	 */
+	amdgpu_amdkfd_get_local_mem_info(dev->gpu->adev, &local_mem_info,
+					 dev->gpu->xcp);
+
+	list_for_each_entry(mem, &dev->mem_props, list)
+		mem->mem_clk_max = local_mem_info.mem_clk_max;
+}
+
+static void kfd_set_iolink_no_atomics(struct kfd_topology_device *dev,
+					struct kfd_topology_device *target_gpu_dev,
+					struct kfd_iolink_properties *link)
+{
+	/* xgmi always supports atomics between links. */
+	if (link->iolink_type == CRAT_IOLINK_TYPE_XGMI)
+		return;
+
+	/* check pcie support to set cpu(dev) flags for target_gpu_dev link. */
+	if (target_gpu_dev) {
+		uint32_t cap;
+
+		pcie_capability_read_dword(target_gpu_dev->gpu->adev->pdev,
+				PCI_EXP_DEVCAP2, &cap);
+
+		if (!(cap & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
+			     PCI_EXP_DEVCAP2_ATOMIC_COMP64)))
+			link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
+				CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
+	/* set gpu (dev) flags. */
+	} else {
+		if (!dev->gpu->kfd->pci_atomic_requested ||
+				dev->gpu->adev->asic_type == CHIP_HAWAII)
+			link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
+				CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
+	}
+}
+
+static void kfd_set_iolink_non_coherent(struct kfd_topology_device *to_dev,
+		struct kfd_iolink_properties *outbound_link,
+		struct kfd_iolink_properties *inbound_link)
+{
+	/* CPU -> GPU with PCIe */
+	if (!to_dev->gpu &&
+	    inbound_link->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS)
+		inbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT;
+
+	if (to_dev->gpu) {
+		/* GPU <-> GPU with PCIe and
+		 * Vega20 with XGMI
+		 */
+		if (inbound_link->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS ||
+		    (inbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI &&
+		    KFD_GC_VERSION(to_dev->gpu) == IP_VERSION(9, 4, 0))) {
+			outbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT;
+			inbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT;
+		}
+	}
+}
+
+static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev)
+{
+	struct kfd_iolink_properties *link, *inbound_link;
+	struct kfd_topology_device *peer_dev;
+
+	if (!dev || !dev->gpu)
+		return;
+
+	/* GPU only creates direct links so apply flags setting to all */
+	list_for_each_entry(link, &dev->io_link_props, list) {
+		link->flags = CRAT_IOLINK_FLAGS_ENABLED;
+		kfd_set_iolink_no_atomics(dev, NULL, link);
+		peer_dev = kfd_topology_device_by_proximity_domain(
+				link->node_to);
+
+		if (!peer_dev)
+			continue;
+
+		/* Include the CPU peer in GPU hive if connected over xGMI. */
+		if (!peer_dev->gpu &&
+		    link->iolink_type == CRAT_IOLINK_TYPE_XGMI) {
+			/*
+			 * If the GPU is not part of a GPU hive, use its pci
+			 * device location as the hive ID to bind with the CPU.
+			 */
+			if (!dev->node_props.hive_id)
+				dev->node_props.hive_id = pci_dev_id(dev->gpu->adev->pdev);
+			peer_dev->node_props.hive_id = dev->node_props.hive_id;
+		}
+
+		list_for_each_entry(inbound_link, &peer_dev->io_link_props,
+									list) {
+			if (inbound_link->node_to != link->node_from)
+				continue;
+
+			inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED;
+			kfd_set_iolink_no_atomics(peer_dev, dev, inbound_link);
+			kfd_set_iolink_non_coherent(peer_dev, link, inbound_link);
+		}
+	}
+
+	/* Create indirect links so apply flags setting to all */
+	list_for_each_entry(link, &dev->p2p_link_props, list) {
+		link->flags = CRAT_IOLINK_FLAGS_ENABLED;
+		kfd_set_iolink_no_atomics(dev, NULL, link);
+		peer_dev = kfd_topology_device_by_proximity_domain(
+				link->node_to);
+
+		if (!peer_dev)
+			continue;
+
+		list_for_each_entry(inbound_link, &peer_dev->p2p_link_props,
+									list) {
+			if (inbound_link->node_to != link->node_from)
+				continue;
+
+			inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED;
+			kfd_set_iolink_no_atomics(peer_dev, dev, inbound_link);
+			kfd_set_iolink_non_coherent(peer_dev, link, inbound_link);
+		}
+	}
+}
+
+static int kfd_build_p2p_node_entry(struct kfd_topology_device *dev,
+				struct kfd_iolink_properties *p2plink)
+{
+	int ret;
+
+	p2plink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
+	if (!p2plink->kobj)
+		return -ENOMEM;
+
+	ret = kobject_init_and_add(p2plink->kobj, &iolink_type,
+			dev->kobj_p2plink, "%d", dev->node_props.p2p_links_count - 1);
+	if (ret < 0) {
+		kobject_put(p2plink->kobj);
+		return ret;
+	}
+
+	p2plink->attr.name = "properties";
+	p2plink->attr.mode = KFD_SYSFS_FILE_MODE;
+	sysfs_attr_init(&p2plink->attr);
+	ret = sysfs_create_file(p2plink->kobj, &p2plink->attr);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int kfd_create_indirect_link_prop(struct kfd_topology_device *kdev, int gpu_node)
+{
+	struct kfd_iolink_properties *gpu_link, *tmp_link, *cpu_link;
+	struct kfd_iolink_properties *props = NULL, *props2 = NULL;
+	struct kfd_topology_device *cpu_dev;
+	int ret = 0;
+	int i, num_cpu;
+
+	num_cpu = 0;
+	list_for_each_entry(cpu_dev, &topology_device_list, list) {
+		if (cpu_dev->gpu)
+			break;
+		num_cpu++;
+	}
+
+	if (list_empty(&kdev->io_link_props))
+		return -ENODATA;
+
+	gpu_link = list_first_entry(&kdev->io_link_props,
+				    struct kfd_iolink_properties, list);
+
+	for (i = 0; i < num_cpu; i++) {
+		/* CPU <--> GPU */
+		if (gpu_link->node_to == i)
+			continue;
+
+		/* find CPU <-->  CPU links */
+		cpu_link = NULL;
+		cpu_dev = kfd_topology_device_by_proximity_domain(i);
+		if (cpu_dev) {
+			list_for_each_entry(tmp_link,
+					&cpu_dev->io_link_props, list) {
+				if (tmp_link->node_to == gpu_link->node_to) {
+					cpu_link = tmp_link;
+					break;
+				}
+			}
+		}
+
+		if (!cpu_link)
+			return -ENOMEM;
+
+		/* CPU <--> CPU <--> GPU, GPU node*/
+		props = kfd_alloc_struct(props);
+		if (!props)
+			return -ENOMEM;
+
+		memcpy(props, gpu_link, sizeof(struct kfd_iolink_properties));
+		props->weight = gpu_link->weight + cpu_link->weight;
+		props->min_latency = gpu_link->min_latency + cpu_link->min_latency;
+		props->max_latency = gpu_link->max_latency + cpu_link->max_latency;
+		props->min_bandwidth = min(gpu_link->min_bandwidth, cpu_link->min_bandwidth);
+		props->max_bandwidth = min(gpu_link->max_bandwidth, cpu_link->max_bandwidth);
+
+		props->node_from = gpu_node;
+		props->node_to = i;
+		kdev->node_props.p2p_links_count++;
+		list_add_tail(&props->list, &kdev->p2p_link_props);
+		ret = kfd_build_p2p_node_entry(kdev, props);
+		if (ret < 0)
+			return ret;
+
+		/* for small Bar, no CPU --> GPU in-direct links */
+		if (kfd_dev_is_large_bar(kdev->gpu)) {
+			/* CPU <--> CPU <--> GPU, CPU node*/
+			props2 = kfd_alloc_struct(props2);
+			if (!props2)
+				return -ENOMEM;
+
+			memcpy(props2, props, sizeof(struct kfd_iolink_properties));
+			props2->node_from = i;
+			props2->node_to = gpu_node;
+			props2->kobj = NULL;
+			cpu_dev->node_props.p2p_links_count++;
+			list_add_tail(&props2->list, &cpu_dev->p2p_link_props);
+			ret = kfd_build_p2p_node_entry(cpu_dev, props2);
+			if (ret < 0)
+				return ret;
+		}
+	}
+	return ret;
+}
+
+#if defined(CONFIG_HSA_AMD_P2P)
+static int kfd_add_peer_prop(struct kfd_topology_device *kdev,
+		struct kfd_topology_device *peer, int from, int to)
+{
+	struct kfd_iolink_properties *props = NULL;
+	struct kfd_iolink_properties *iolink1, *iolink2, *iolink3;
+	struct kfd_topology_device *cpu_dev;
+	int ret = 0;
+
+	if (!amdgpu_device_is_peer_accessible(
+				kdev->gpu->adev,
+				peer->gpu->adev))
+		return ret;
+
+	if (list_empty(&kdev->io_link_props))
+		return -ENODATA;
+
+	iolink1 = list_first_entry(&kdev->io_link_props,
+				   struct kfd_iolink_properties, list);
+
+	if (list_empty(&peer->io_link_props))
+		return -ENODATA;
+
+	iolink2 = list_first_entry(&peer->io_link_props,
+				   struct kfd_iolink_properties, list);
+
+	props = kfd_alloc_struct(props);
+	if (!props)
+		return -ENOMEM;
+
+	memcpy(props, iolink1, sizeof(struct kfd_iolink_properties));
+
+	props->weight = iolink1->weight + iolink2->weight;
+	props->min_latency = iolink1->min_latency + iolink2->min_latency;
+	props->max_latency = iolink1->max_latency + iolink2->max_latency;
+	props->min_bandwidth = min(iolink1->min_bandwidth, iolink2->min_bandwidth);
+	props->max_bandwidth = min(iolink2->max_bandwidth, iolink2->max_bandwidth);
+
+	if (iolink1->node_to != iolink2->node_to) {
+		/* CPU->CPU  link*/
+		cpu_dev = kfd_topology_device_by_proximity_domain(iolink1->node_to);
+		if (cpu_dev) {
+			list_for_each_entry(iolink3, &cpu_dev->io_link_props, list)
+				if (iolink3->node_to == iolink2->node_to)
+					break;
+
+			props->weight += iolink3->weight;
+			props->min_latency += iolink3->min_latency;
+			props->max_latency += iolink3->max_latency;
+			props->min_bandwidth = min(props->min_bandwidth,
+							iolink3->min_bandwidth);
+			props->max_bandwidth = min(props->max_bandwidth,
+							iolink3->max_bandwidth);
+		} else {
+			WARN(1, "CPU node not found");
+		}
+	}
+
+	props->node_from = from;
+	props->node_to = to;
+	peer->node_props.p2p_links_count++;
+	list_add_tail(&props->list, &peer->p2p_link_props);
+	ret = kfd_build_p2p_node_entry(peer, props);
+
+	return ret;
+}
+#endif
+
+static int kfd_dev_create_p2p_links(void)
+{
+	struct kfd_topology_device *dev;
+	struct kfd_topology_device *new_dev;
+#if defined(CONFIG_HSA_AMD_P2P)
+	uint32_t i;
+#endif
+	uint32_t k;
+	int ret = 0;
+
+	k = 0;
+	list_for_each_entry(dev, &topology_device_list, list)
+		k++;
+	if (k < 2)
+		return 0;
+
+	new_dev = list_last_entry(&topology_device_list, struct kfd_topology_device, list);
+	if (WARN_ON(!new_dev->gpu))
+		return 0;
+
+	k--;
+
+	/* create in-direct links */
+	ret = kfd_create_indirect_link_prop(new_dev, k);
+	if (ret < 0)
+		goto out;
+
+	/* create p2p links */
+#if defined(CONFIG_HSA_AMD_P2P)
+	i = 0;
+	list_for_each_entry(dev, &topology_device_list, list) {
+		if (dev == new_dev)
+			break;
+		if (!dev->gpu || !dev->gpu->adev ||
+		    (dev->gpu->kfd->hive_id &&
+		     dev->gpu->kfd->hive_id == new_dev->gpu->kfd->hive_id))
+			goto next;
+
+		/* check if node(s) is/are peer accessible in one direction or bi-direction */
+		ret = kfd_add_peer_prop(new_dev, dev, i, k);
+		if (ret < 0)
+			goto out;
+
+		ret = kfd_add_peer_prop(dev, new_dev, k, i);
+		if (ret < 0)
+			goto out;
+next:
+		i++;
+	}
+#endif
+
+out:
+	return ret;
+}
+
+/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
+static int fill_in_l1_pcache(struct kfd_cache_properties **props_ext,
+				struct kfd_gpu_cache_info *pcache_info,
+				struct kfd_cu_info *cu_info,
+				int cu_bitmask,
+				int cache_type, unsigned int cu_processor_id,
+				int cu_block)
+{
+	unsigned int cu_sibling_map_mask;
+	int first_active_cu;
+	struct kfd_cache_properties *pcache = NULL;
+
+	cu_sibling_map_mask = cu_bitmask;
+	cu_sibling_map_mask >>= cu_block;
+	cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1);
+	first_active_cu = ffs(cu_sibling_map_mask);
+
+	/* CU could be inactive. In case of shared cache find the first active
+	 * CU. and incase of non-shared cache check if the CU is inactive. If
+	 * inactive active skip it
+	 */
+	if (first_active_cu) {
+		pcache = kfd_alloc_struct(pcache);
+		if (!pcache)
+			return -ENOMEM;
+
+		memset(pcache, 0, sizeof(struct kfd_cache_properties));
+		pcache->processor_id_low = cu_processor_id + (first_active_cu - 1);
+		pcache->cache_level = pcache_info[cache_type].cache_level;
+		pcache->cache_size = pcache_info[cache_type].cache_size;
+
+		if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE)
+			pcache->cache_type |= HSA_CACHE_TYPE_DATA;
+		if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_INST_CACHE)
+			pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION;
+		if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_CPU_CACHE)
+			pcache->cache_type |= HSA_CACHE_TYPE_CPU;
+		if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_SIMD_CACHE)
+			pcache->cache_type |= HSA_CACHE_TYPE_HSACU;
+
+		/* Sibling map is w.r.t processor_id_low, so shift out
+		 * inactive CU
+		 */
+		cu_sibling_map_mask =
+			cu_sibling_map_mask >> (first_active_cu - 1);
+
+		pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF);
+		pcache->sibling_map[1] =
+				(uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
+		pcache->sibling_map[2] =
+				(uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
+		pcache->sibling_map[3] =
+				(uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
+
+		pcache->sibling_map_size = 4;
+		*props_ext = pcache;
+
+		return 0;
+	}
+	return 1;
+}
+
+/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
+static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
+				struct kfd_gpu_cache_info *pcache_info,
+				struct kfd_cu_info *cu_info,
+				int cache_type, unsigned int cu_processor_id,
+				struct kfd_node *knode)
+{
+	unsigned int cu_sibling_map_mask;
+	int first_active_cu;
+	int i, j, k, xcc, start, end;
+	struct kfd_cache_properties *pcache = NULL;
+
+	start = ffs(knode->xcc_mask) - 1;
+	end = start + NUM_XCC(knode->xcc_mask);
+	cu_sibling_map_mask = cu_info->cu_bitmap[start][0][0];
+	cu_sibling_map_mask &=
+		((1 << pcache_info[cache_type].num_cu_shared) - 1);
+	first_active_cu = ffs(cu_sibling_map_mask);
+
+	/* CU could be inactive. In case of shared cache find the first active
+	 * CU. and incase of non-shared cache check if the CU is inactive. If
+	 * inactive active skip it
+	 */
+	if (first_active_cu) {
+		pcache = kfd_alloc_struct(pcache);
+		if (!pcache)
+			return -ENOMEM;
+
+		memset(pcache, 0, sizeof(struct kfd_cache_properties));
+		pcache->processor_id_low = cu_processor_id
+					+ (first_active_cu - 1);
+		pcache->cache_level = pcache_info[cache_type].cache_level;
+		pcache->cache_size = pcache_info[cache_type].cache_size;
+
+		if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE)
+			pcache->cache_type |= HSA_CACHE_TYPE_DATA;
+		if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_INST_CACHE)
+			pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION;
+		if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_CPU_CACHE)
+			pcache->cache_type |= HSA_CACHE_TYPE_CPU;
+		if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_SIMD_CACHE)
+			pcache->cache_type |= HSA_CACHE_TYPE_HSACU;
+
+		/* Sibling map is w.r.t processor_id_low, so shift out
+		 * inactive CU
+		 */
+		cu_sibling_map_mask = cu_sibling_map_mask >> (first_active_cu - 1);
+		k = 0;
+
+		for (xcc = start; xcc < end; xcc++) {
+			for (i = 0; i < cu_info->num_shader_engines; i++) {
+				for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {
+					pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF);
+					pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
+					pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
+					pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
+					k += 4;
+
+					cu_sibling_map_mask = cu_info->cu_bitmap[xcc][i % 4][j + i / 4];
+					cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1);
+				}
+			}
+		}
+		pcache->sibling_map_size = k;
+		*props_ext = pcache;
+		return 0;
+	}
+	return 1;
+}
+
+#define KFD_MAX_CACHE_TYPES 6
+
+/* kfd_fill_cache_non_crat_info - Fill GPU cache info using kfd_gpu_cache_info
+ * tables
+ */
+static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct kfd_node *kdev)
+{
+	struct kfd_gpu_cache_info *pcache_info = NULL;
+	int i, j, k, xcc, start, end;
+	int ct = 0;
+	unsigned int cu_processor_id;
+	int ret;
+	unsigned int num_cu_shared;
+	struct kfd_cu_info cu_info;
+	struct kfd_cu_info *pcu_info;
+	int gpu_processor_id;
+	struct kfd_cache_properties *props_ext;
+	int num_of_entries = 0;
+	int num_of_cache_types = 0;
+	struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES];
+
+	amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info);
+	pcu_info = &cu_info;
+
+	gpu_processor_id = dev->node_props.simd_id_base;
+
+	pcache_info = cache_info;
+	num_of_cache_types = kfd_get_gpu_cache_info(kdev, &pcache_info);
+	if (!num_of_cache_types) {
+		pr_warn("no cache info found\n");
+		return;
+	}
+
+	/* For each type of cache listed in the kfd_gpu_cache_info table,
+	 * go through all available Compute Units.
+	 * The [i,j,k] loop will
+	 *		if kfd_gpu_cache_info.num_cu_shared = 1
+	 *			will parse through all available CU
+	 *		If (kfd_gpu_cache_info.num_cu_shared != 1)
+	 *			then it will consider only one CU from
+	 *			the shared unit
+	 */
+	start = ffs(kdev->xcc_mask) - 1;
+	end = start + NUM_XCC(kdev->xcc_mask);
+
+	for (ct = 0; ct < num_of_cache_types; ct++) {
+		cu_processor_id = gpu_processor_id;
+		if (pcache_info[ct].cache_level == 1) {
+			for (xcc = start; xcc < end; xcc++) {
+				for (i = 0; i < pcu_info->num_shader_engines; i++) {
+					for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) {
+						for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) {
+
+							ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info,
+										pcu_info->cu_bitmap[xcc][i % 4][j + i / 4], ct,
+										cu_processor_id, k);
+
+							if (ret < 0)
+								break;
+
+							if (!ret) {
+								num_of_entries++;
+								list_add_tail(&props_ext->list, &dev->cache_props);
+							}
+
+							/* Move to next CU block */
+							num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
+								pcu_info->num_cu_per_sh) ?
+								pcache_info[ct].num_cu_shared :
+								(pcu_info->num_cu_per_sh - k);
+							cu_processor_id += num_cu_shared;
+						}
+					}
+				}
+			}
+		} else {
+			ret = fill_in_l2_l3_pcache(&props_ext, pcache_info,
+					pcu_info, ct, cu_processor_id, kdev);
+
+			if (ret < 0)
+				break;
+
+			if (!ret) {
+				num_of_entries++;
+				list_add_tail(&props_ext->list, &dev->cache_props);
+			}
+		}
+	}
+	dev->node_props.caches_count += num_of_entries;
+	pr_debug("Added [%d] GPU cache entries\n", num_of_entries);
+}
+
+static int kfd_topology_add_device_locked(struct kfd_node *gpu, uint32_t gpu_id,
+					  struct kfd_topology_device **dev)
+{
+	int proximity_domain = ++topology_crat_proximity_domain;
+	struct list_head temp_topology_device_list;
+	void *crat_image = NULL;
+	size_t image_size = 0;
+	int res;
+
+	res = kfd_create_crat_image_virtual(&crat_image, &image_size,
+					    COMPUTE_UNIT_GPU, gpu,
+					    proximity_domain);
+	if (res) {
+		pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n",
+		       gpu_id);
+		topology_crat_proximity_domain--;
+		goto err;
+	}
+
+	INIT_LIST_HEAD(&temp_topology_device_list);
+
+	res = kfd_parse_crat_table(crat_image,
+				   &temp_topology_device_list,
+				   proximity_domain);
+	if (res) {
+		pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n",
+		       gpu_id);
+		topology_crat_proximity_domain--;
+		goto err;
+	}
+
+	kfd_topology_update_device_list(&temp_topology_device_list,
+					&topology_device_list);
+
+	*dev = kfd_assign_gpu(gpu);
+	if (WARN_ON(!*dev)) {
+		res = -ENODEV;
+		goto err;
+	}
+
+	/* Fill the cache affinity information here for the GPUs
+	 * using VCRAT
+	 */
+	kfd_fill_cache_non_crat_info(*dev, gpu);
+
+	/* Update the SYSFS tree, since we added another topology
+	 * device
+	 */
+	res = kfd_topology_update_sysfs();
+	if (!res)
+		sys_props.generation_count++;
+	else
+		pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n",
+		       gpu_id, res);
+
+err:
+	kfd_destroy_crat_image(crat_image);
+	return res;
+}
+
+static void kfd_topology_set_dbg_firmware_support(struct kfd_topology_device *dev)
+{
+	bool firmware_supported = true;
+
+	if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0) &&
+			KFD_GC_VERSION(dev->gpu) < IP_VERSION(12, 0, 0)) {
+		uint32_t mes_api_rev = (dev->gpu->adev->mes.sched_version &
+						AMDGPU_MES_API_VERSION_MASK) >>
+						AMDGPU_MES_API_VERSION_SHIFT;
+		uint32_t mes_rev = dev->gpu->adev->mes.sched_version &
+						AMDGPU_MES_VERSION_MASK;
+
+		firmware_supported = (mes_api_rev >= 14) && (mes_rev >= 64);
+		goto out;
+	}
+
+	/*
+	 * Note: Any unlisted devices here are assumed to support exception handling.
+	 * Add additional checks here as needed.
+	 */
+	switch (KFD_GC_VERSION(dev->gpu)) {
+	case IP_VERSION(9, 0, 1):
+		firmware_supported = dev->gpu->kfd->mec_fw_version >= 459 + 32768;
+		break;
+	case IP_VERSION(9, 1, 0):
+	case IP_VERSION(9, 2, 1):
+	case IP_VERSION(9, 2, 2):
+	case IP_VERSION(9, 3, 0):
+	case IP_VERSION(9, 4, 0):
+		firmware_supported = dev->gpu->kfd->mec_fw_version >= 459;
+		break;
+	case IP_VERSION(9, 4, 1):
+		firmware_supported = dev->gpu->kfd->mec_fw_version >= 60;
+		break;
+	case IP_VERSION(9, 4, 2):
+		firmware_supported = dev->gpu->kfd->mec_fw_version >= 51;
+		break;
+	case IP_VERSION(10, 1, 10):
+	case IP_VERSION(10, 1, 2):
+	case IP_VERSION(10, 1, 1):
+		firmware_supported = dev->gpu->kfd->mec_fw_version >= 144;
+		break;
+	case IP_VERSION(10, 3, 0):
+	case IP_VERSION(10, 3, 2):
+	case IP_VERSION(10, 3, 1):
+	case IP_VERSION(10, 3, 4):
+	case IP_VERSION(10, 3, 5):
+		firmware_supported = dev->gpu->kfd->mec_fw_version >= 89;
+		break;
+	case IP_VERSION(10, 1, 3):
+	case IP_VERSION(10, 3, 3):
+		firmware_supported = false;
+		break;
+	default:
+		break;
+	}
+
+out:
+	if (firmware_supported)
+		dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_FIRMWARE_SUPPORTED;
+}
+
+static void kfd_topology_set_capabilities(struct kfd_topology_device *dev)
+{
+	dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 <<
+				HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
+				HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
+
+	dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_SUPPORT |
+			HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_TRAP_OVERRIDE_SUPPORTED |
+			HSA_CAP_TRAP_DEBUG_WAVE_LAUNCH_MODE_SUPPORTED;
+
+	if (kfd_dbg_has_ttmps_always_setup(dev->gpu))
+		dev->node_props.debug_prop |= HSA_DBG_DISPATCH_INFO_ALWAYS_VALID;
+
+	if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0)) {
+		if (KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 3))
+			dev->node_props.debug_prop |=
+				HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9_4_3 |
+				HSA_DBG_WATCH_ADDR_MASK_HI_BIT_GFX9_4_3;
+		else
+			dev->node_props.debug_prop |=
+				HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9 |
+				HSA_DBG_WATCH_ADDR_MASK_HI_BIT;
+
+		if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(9, 4, 2))
+			dev->node_props.capability |=
+				HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED;
+	} else {
+		dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 |
+					HSA_DBG_WATCH_ADDR_MASK_HI_BIT;
+
+		if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0))
+			dev->node_props.capability |=
+				HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED;
+	}
+
+	kfd_topology_set_dbg_firmware_support(dev);
+}
+
+int kfd_topology_add_device(struct kfd_node *gpu)
+{
+	uint32_t gpu_id;
+	struct kfd_topology_device *dev;
+	struct kfd_cu_info cu_info;
+	int res = 0;
+	int i;
+	const char *asic_name = amdgpu_asic_name[gpu->adev->asic_type];
+
+	gpu_id = kfd_generate_gpu_id(gpu);
+	if (gpu->xcp && !gpu->xcp->ddev) {
+		dev_warn(gpu->adev->dev,
+		"Won't add GPU (ID: 0x%x) to topology since it has no drm node assigned.",
+		gpu_id);
+		return 0;
+	} else {
+		pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
+	}
+
+	/* Check to see if this gpu device exists in the topology_device_list.
+	 * If so, assign the gpu to that device,
+	 * else create a Virtual CRAT for this gpu device and then parse that
+	 * CRAT to create a new topology device. Once created assign the gpu to
+	 * that topology device
+	 */
+	down_write(&topology_lock);
+	dev = kfd_assign_gpu(gpu);
+	if (!dev)
+		res = kfd_topology_add_device_locked(gpu, gpu_id, &dev);
+	up_write(&topology_lock);
+	if (res)
+		return res;
+
+	dev->gpu_id = gpu_id;
+	gpu->id = gpu_id;
+
+	kfd_dev_create_p2p_links();
+
+	/* TODO: Move the following lines to function
+	 *	kfd_add_non_crat_information
+	 */
+
+	/* Fill-in additional information that is not available in CRAT but
+	 * needed for the topology
+	 */
+
+	amdgpu_amdkfd_get_cu_info(dev->gpu->adev, &cu_info);
+
+	for (i = 0; i < KFD_TOPOLOGY_PUBLIC_NAME_SIZE-1; i++) {
+		dev->node_props.name[i] = __tolower(asic_name[i]);
+		if (asic_name[i] == '\0')
+			break;
+	}
+	dev->node_props.name[i] = '\0';
+
+	dev->node_props.simd_arrays_per_engine =
+		cu_info.num_shader_arrays_per_engine;
+
+	dev->node_props.gfx_target_version =
+				gpu->kfd->device_info.gfx_target_version;
+	dev->node_props.vendor_id = gpu->adev->pdev->vendor;
+	dev->node_props.device_id = gpu->adev->pdev->device;
+	dev->node_props.capability |=
+		((dev->gpu->adev->rev_id << HSA_CAP_ASIC_REVISION_SHIFT) &
+			HSA_CAP_ASIC_REVISION_MASK);
+
+	dev->node_props.location_id = pci_dev_id(gpu->adev->pdev);
+	if (KFD_GC_VERSION(dev->gpu->kfd) == IP_VERSION(9, 4, 3))
+		dev->node_props.location_id |= dev->gpu->node_id;
+
+	dev->node_props.domain = pci_domain_nr(gpu->adev->pdev->bus);
+	dev->node_props.max_engine_clk_fcompute =
+		amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->adev);
+	dev->node_props.max_engine_clk_ccompute =
+		cpufreq_quick_get_max(0) / 1000;
+
+	if (gpu->xcp)
+		dev->node_props.drm_render_minor = gpu->xcp->ddev->render->index;
+	else
+		dev->node_props.drm_render_minor =
+				gpu->kfd->shared_resources.drm_render_minor;
+
+	dev->node_props.hive_id = gpu->kfd->hive_id;
+	dev->node_props.num_sdma_engines = kfd_get_num_sdma_engines(gpu);
+	dev->node_props.num_sdma_xgmi_engines =
+					kfd_get_num_xgmi_sdma_engines(gpu);
+	dev->node_props.num_sdma_queues_per_engine =
+				gpu->kfd->device_info.num_sdma_queues_per_engine -
+				gpu->kfd->device_info.num_reserved_sdma_queues_per_engine;
+	dev->node_props.num_gws = (dev->gpu->gws &&
+		dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ?
+		dev->gpu->adev->gds.gws_size : 0;
+	dev->node_props.num_cp_queues = get_cp_queues_num(dev->gpu->dqm);
+
+	kfd_fill_mem_clk_max_info(dev);
+	kfd_fill_iolink_non_crat_info(dev);
+
+	switch (dev->gpu->adev->asic_type) {
+	case CHIP_KAVERI:
+	case CHIP_HAWAII:
+	case CHIP_TONGA:
+		dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_PRE_1_0 <<
+			HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
+			HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
+		break;
+	case CHIP_CARRIZO:
+	case CHIP_FIJI:
+	case CHIP_POLARIS10:
+	case CHIP_POLARIS11:
+	case CHIP_POLARIS12:
+	case CHIP_VEGAM:
+		pr_debug("Adding doorbell packet type capability\n");
+		dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_1_0 <<
+			HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
+			HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
+		break;
+	default:
+		if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(9, 0, 1))
+			WARN(1, "Unexpected ASIC family %u",
+			     dev->gpu->adev->asic_type);
+		else
+			kfd_topology_set_capabilities(dev);
+	}
+
+	/*
+	 * Overwrite ATS capability according to needs_iommu_device to fix
+	 * potential missing corresponding bit in CRAT of BIOS.
+	 */
+	dev->node_props.capability &= ~HSA_CAP_ATS_PRESENT;
+
+	/* Fix errors in CZ CRAT.
+	 * simd_count: Carrizo CRAT reports wrong simd_count, probably
+	 *		because it doesn't consider masked out CUs
+	 * max_waves_per_simd: Carrizo reports wrong max_waves_per_simd
+	 */
+	if (dev->gpu->adev->asic_type == CHIP_CARRIZO) {
+		dev->node_props.simd_count =
+			cu_info.simd_per_cu * cu_info.cu_active_number;
+		dev->node_props.max_waves_per_simd = 10;
+	}
+
+	/* kfd only concerns sram ecc on GFX and HBM ecc on UMC */
+	dev->node_props.capability |=
+		((dev->gpu->adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ?
+		HSA_CAP_SRAM_EDCSUPPORTED : 0;
+	dev->node_props.capability |=
+		((dev->gpu->adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ?
+		HSA_CAP_MEM_EDCSUPPORTED : 0;
+
+	if (KFD_GC_VERSION(dev->gpu) != IP_VERSION(9, 0, 1))
+		dev->node_props.capability |= (dev->gpu->adev->ras_enabled != 0) ?
+			HSA_CAP_RASEVENTNOTIFY : 0;
+
+	if (KFD_IS_SVM_API_SUPPORTED(dev->gpu->adev))
+		dev->node_props.capability |= HSA_CAP_SVMAPI_SUPPORTED;
+
+	if (dev->gpu->adev->gmc.is_app_apu ||
+		dev->gpu->adev->gmc.xgmi.connected_to_cpu)
+		dev->node_props.capability |= HSA_CAP_FLAGS_COHERENTHOSTACCESS;
+
+	kfd_debug_print_topology();
+
+	kfd_notify_gpu_change(gpu_id, 1);
+
+	return 0;
+}
+
+/**
+ * kfd_topology_update_io_links() - Update IO links after device removal.
+ * @proximity_domain: Proximity domain value of the dev being removed.
+ *
+ * The topology list currently is arranged in increasing order of
+ * proximity domain.
+ *
+ * Two things need to be done when a device is removed:
+ * 1. All the IO links to this device need to be removed.
+ * 2. All nodes after the current device node need to move
+ *    up once this device node is removed from the topology
+ *    list. As a result, the proximity domain values for
+ *    all nodes after the node being deleted reduce by 1.
+ *    This would also cause the proximity domain values for
+ *    io links to be updated based on new proximity domain
+ *    values.
+ *
+ * Context: The caller must hold write topology_lock.
+ */
+static void kfd_topology_update_io_links(int proximity_domain)
+{
+	struct kfd_topology_device *dev;
+	struct kfd_iolink_properties *iolink, *p2plink, *tmp;
+
+	list_for_each_entry(dev, &topology_device_list, list) {
+		if (dev->proximity_domain > proximity_domain)
+			dev->proximity_domain--;
+
+		list_for_each_entry_safe(iolink, tmp, &dev->io_link_props, list) {
+			/*
+			 * If there is an io link to the dev being deleted
+			 * then remove that IO link also.
+			 */
+			if (iolink->node_to == proximity_domain) {
+				list_del(&iolink->list);
+				dev->node_props.io_links_count--;
+			} else {
+				if (iolink->node_from > proximity_domain)
+					iolink->node_from--;
+				if (iolink->node_to > proximity_domain)
+					iolink->node_to--;
+			}
+		}
+
+		list_for_each_entry_safe(p2plink, tmp, &dev->p2p_link_props, list) {
+			/*
+			 * If there is a p2p link to the dev being deleted
+			 * then remove that p2p link also.
+			 */
+			if (p2plink->node_to == proximity_domain) {
+				list_del(&p2plink->list);
+				dev->node_props.p2p_links_count--;
+			} else {
+				if (p2plink->node_from > proximity_domain)
+					p2plink->node_from--;
+				if (p2plink->node_to > proximity_domain)
+					p2plink->node_to--;
+			}
+		}
+	}
+}
+
+int kfd_topology_remove_device(struct kfd_node *gpu)
+{
+	struct kfd_topology_device *dev, *tmp;
+	uint32_t gpu_id;
+	int res = -ENODEV;
+	int i = 0;
+
+	down_write(&topology_lock);
+
+	list_for_each_entry_safe(dev, tmp, &topology_device_list, list) {
+		if (dev->gpu == gpu) {
+			gpu_id = dev->gpu_id;
+			kfd_remove_sysfs_node_entry(dev);
+			kfd_release_topology_device(dev);
+			sys_props.num_devices--;
+			kfd_topology_update_io_links(i);
+			topology_crat_proximity_domain = sys_props.num_devices-1;
+			sys_props.generation_count++;
+			res = 0;
+			if (kfd_topology_update_sysfs() < 0)
+				kfd_topology_release_sysfs();
+			break;
+		}
+		i++;
+	}
+
+	up_write(&topology_lock);
+
+	if (!res)
+		kfd_notify_gpu_change(gpu_id, 0);
+
+	return res;
+}
+
+/* kfd_topology_enum_kfd_devices - Enumerate through all devices in KFD
+ *	topology. If GPU device is found @idx, then valid kfd_dev pointer is
+ *	returned through @kdev
+ * Return -	0: On success (@kdev will be NULL for non GPU nodes)
+ *		-1: If end of list
+ */
+int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_node **kdev)
+{
+
+	struct kfd_topology_device *top_dev;
+	uint8_t device_idx = 0;
+
+	*kdev = NULL;
+	down_read(&topology_lock);
+
+	list_for_each_entry(top_dev, &topology_device_list, list) {
+		if (device_idx == idx) {
+			*kdev = top_dev->gpu;
+			up_read(&topology_lock);
+			return 0;
+		}
+
+		device_idx++;
+	}
+
+	up_read(&topology_lock);
+
+	return -1;
+
+}
+
+static int kfd_cpumask_to_apic_id(const struct cpumask *cpumask)
+{
+	int first_cpu_of_numa_node;
+
+	if (!cpumask || cpumask == cpu_none_mask)
+		return -1;
+	first_cpu_of_numa_node = cpumask_first(cpumask);
+	if (first_cpu_of_numa_node >= nr_cpu_ids)
+		return -1;
+#ifdef CONFIG_X86_64
+	return cpu_data(first_cpu_of_numa_node).apicid;
+#else
+	return first_cpu_of_numa_node;
+#endif
+}
+
+/* kfd_numa_node_to_apic_id - Returns the APIC ID of the first logical processor
+ *	of the given NUMA node (numa_node_id)
+ * Return -1 on failure
+ */
+int kfd_numa_node_to_apic_id(int numa_node_id)
+{
+	if (numa_node_id == -1) {
+		pr_warn("Invalid NUMA Node. Use online CPU mask\n");
+		return kfd_cpumask_to_apic_id(cpu_online_mask);
+	}
+	return kfd_cpumask_to_apic_id(cpumask_of_node(numa_node_id));
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data)
+{
+	struct kfd_topology_device *dev;
+	unsigned int i = 0;
+	int r = 0;
+
+	down_read(&topology_lock);
+
+	list_for_each_entry(dev, &topology_device_list, list) {
+		if (!dev->gpu) {
+			i++;
+			continue;
+		}
+
+		seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id);
+		r = dqm_debugfs_hqds(m, dev->gpu->dqm);
+		if (r)
+			break;
+	}
+
+	up_read(&topology_lock);
+
+	return r;
+}
+
+int kfd_debugfs_rls_by_device(struct seq_file *m, void *data)
+{
+	struct kfd_topology_device *dev;
+	unsigned int i = 0;
+	int r = 0;
+
+	down_read(&topology_lock);
+
+	list_for_each_entry(dev, &topology_device_list, list) {
+		if (!dev->gpu) {
+			i++;
+			continue;
+		}
+
+		seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id);
+		r = pm_debugfs_runlist(m, &dev->gpu->dqm->packet_mgr);
+		if (r)
+			break;
+	}
+
+	up_read(&topology_lock);
+
+	return r;
+}
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
new file mode 100644
index 000000000..27386ce9a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __KFD_TOPOLOGY_H__
+#define __KFD_TOPOLOGY_H__
+
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/kfd_sysfs.h>
+#include "kfd_crat.h"
+
+#define KFD_TOPOLOGY_PUBLIC_NAME_SIZE 32
+
+#define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9	6
+#define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX9_4_3 7
+#define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10	7
+#define HSA_DBG_WATCH_ADDR_MASK_HI_BIT  \
+			(29 << HSA_DBG_WATCH_ADDR_MASK_HI_BIT_SHIFT)
+#define HSA_DBG_WATCH_ADDR_MASK_HI_BIT_GFX9_4_3 \
+			(30 << HSA_DBG_WATCH_ADDR_MASK_HI_BIT_SHIFT)
+
+struct kfd_node_properties {
+	uint64_t hive_id;
+	uint32_t cpu_cores_count;
+	uint32_t simd_count;
+	uint32_t mem_banks_count;
+	uint32_t caches_count;
+	uint32_t io_links_count;
+	uint32_t p2p_links_count;
+	uint32_t cpu_core_id_base;
+	uint32_t simd_id_base;
+	uint32_t capability;
+	uint64_t debug_prop;
+	uint32_t max_waves_per_simd;
+	uint32_t lds_size_in_kb;
+	uint32_t gds_size_in_kb;
+	uint32_t num_gws;
+	uint32_t wave_front_size;
+	uint32_t array_count;
+	uint32_t simd_arrays_per_engine;
+	uint32_t cu_per_simd_array;
+	uint32_t simd_per_cu;
+	uint32_t max_slots_scratch_cu;
+	uint32_t engine_id;
+	uint32_t gfx_target_version;
+	uint32_t vendor_id;
+	uint32_t device_id;
+	uint32_t location_id;
+	uint32_t domain;
+	uint32_t max_engine_clk_fcompute;
+	uint32_t max_engine_clk_ccompute;
+	int32_t  drm_render_minor;
+	uint32_t num_sdma_engines;
+	uint32_t num_sdma_xgmi_engines;
+	uint32_t num_sdma_queues_per_engine;
+	uint32_t num_cp_queues;
+	char name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE];
+};
+
+struct kfd_mem_properties {
+	struct list_head	list;
+	uint32_t		heap_type;
+	uint64_t		size_in_bytes;
+	uint32_t		flags;
+	uint32_t		width;
+	uint32_t		mem_clk_max;
+	struct kfd_node		*gpu;
+	struct kobject		*kobj;
+	struct attribute	attr;
+};
+
+#define CACHE_SIBLINGMAP_SIZE 128
+
+struct kfd_cache_properties {
+	struct list_head	list;
+	uint32_t		processor_id_low;
+	uint32_t		cache_level;
+	uint32_t		cache_size;
+	uint32_t		cacheline_size;
+	uint32_t		cachelines_per_tag;
+	uint32_t		cache_assoc;
+	uint32_t		cache_latency;
+	uint32_t		cache_type;
+	uint8_t			sibling_map[CACHE_SIBLINGMAP_SIZE];
+	struct kfd_node		*gpu;
+	struct kobject		*kobj;
+	struct attribute	attr;
+	uint32_t		sibling_map_size;
+};
+
+struct kfd_iolink_properties {
+	struct list_head	list;
+	uint32_t		iolink_type;
+	uint32_t		ver_maj;
+	uint32_t		ver_min;
+	uint32_t		node_from;
+	uint32_t		node_to;
+	uint32_t		weight;
+	uint32_t		min_latency;
+	uint32_t		max_latency;
+	uint32_t		min_bandwidth;
+	uint32_t		max_bandwidth;
+	uint32_t		rec_transfer_size;
+	uint32_t		flags;
+	struct kfd_node		*gpu;
+	struct kobject		*kobj;
+	struct attribute	attr;
+};
+
+struct kfd_perf_properties {
+	struct list_head	list;
+	char			block_name[16];
+	uint32_t		max_concurrent;
+	struct attribute_group	*attr_group;
+};
+
+struct kfd_topology_device {
+	struct list_head		list;
+	uint32_t			gpu_id;
+	uint32_t			proximity_domain;
+	struct kfd_node_properties	node_props;
+	struct list_head		mem_props;
+	struct list_head		cache_props;
+	struct list_head		io_link_props;
+	struct list_head		p2p_link_props;
+	struct list_head		perf_props;
+	struct kfd_node			*gpu;
+	struct kobject			*kobj_node;
+	struct kobject			*kobj_mem;
+	struct kobject			*kobj_cache;
+	struct kobject			*kobj_iolink;
+	struct kobject			*kobj_p2plink;
+	struct kobject			*kobj_perf;
+	struct attribute		attr_gpuid;
+	struct attribute		attr_name;
+	struct attribute		attr_props;
+	uint8_t				oem_id[CRAT_OEMID_LENGTH];
+	uint8_t				oem_table_id[CRAT_OEMTABLEID_LENGTH];
+	uint32_t			oem_revision;
+};
+
+struct kfd_system_properties {
+	uint32_t		num_devices;     /* Number of H-NUMA nodes */
+	uint32_t		generation_count;
+	uint64_t		platform_oem;
+	uint64_t		platform_id;
+	uint64_t		platform_rev;
+	struct kobject		*kobj_topology;
+	struct kobject		*kobj_nodes;
+	struct attribute	attr_genid;
+	struct attribute	attr_props;
+};
+
+struct kfd_topology_device *kfd_create_topology_device(
+		struct list_head *device_list);
+void kfd_release_topology_device_list(struct list_head *device_list);
+
+#endif /* __KFD_TOPOLOGY_H__ */
diff --git a/drivers/gpu/drm/amd/amdkfd/soc15_int.h b/drivers/gpu/drm/amd/amdkfd/soc15_int.h
new file mode 100644
index 000000000..10138676f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/soc15_int.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef HSA_SOC15_INT_H_INCLUDED
+#define HSA_SOC15_INT_H_INCLUDED
+
+#include "soc15_ih_clientid.h"
+
+#define SOC15_INTSRC_CP_END_OF_PIPE	181
+#define SOC15_INTSRC_CP_BAD_OPCODE	183
+#define SOC15_INTSRC_SQ_INTERRUPT_MSG	239
+#define SOC15_INTSRC_VMC_FAULT		0
+#define SOC15_INTSRC_SDMA_TRAP		224
+#define SOC15_INTSRC_SDMA_ECC		220
+#define SOC21_INTSRC_SDMA_TRAP		49
+#define SOC21_INTSRC_SDMA_ECC		62
+
+#define SOC15_CLIENT_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) & 0xff)
+#define SOC15_SOURCE_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 8 & 0xff)
+#define SOC15_RING_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 16 & 0xff)
+#define SOC15_VMID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 24 & 0xf)
+#define SOC15_VMID_TYPE_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 31 & 0x1)
+#define SOC15_PASID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[3]) & 0xffff)
+#define SOC15_NODEID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[3]) >> 16 & 0xff)
+#define SOC15_CONTEXT_ID0_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[4]))
+#define SOC15_CONTEXT_ID1_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[5]))
+#define SOC15_CONTEXT_ID2_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[6]))
+#define SOC15_CONTEXT_ID3_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[7]))
+
+#endif
+
-- 
cgit v1.2.3