14 files changed, 775 insertions, 390 deletions
diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c
index 3aa6eeef44..39e23d625a 100644
--- a/drivers/accel/habanalabs/common/command_submission.c
+++ b/drivers/accel/habanalabs/common/command_submission.c
@@ -1360,9 +1360,8 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
 			return -EINVAL;
 		}
 
-	if (!hl_device_operational(hdev, &status)) {
+	if (!hl_device_operational(hdev, &status))
 		return -EBUSY;
-	}
 
 	if ((args->in.cs_flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
 			!hdev->supports_staged_submission) {
diff --git a/drivers/accel/habanalabs/common/debugfs.c b/drivers/accel/habanalabs/common/debugfs.c
index 01f071d525..b1c88d1837 100644
--- a/drivers/accel/habanalabs/common/debugfs.c
+++ b/drivers/accel/habanalabs/common/debugfs.c
@@ -484,7 +484,7 @@ static ssize_t mmu_asid_va_write(struct file *file, const char __user *buf,
 	struct hl_debugfs_entry *entry = s->private;
 	struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
 	struct hl_device *hdev = dev_entry->hdev;
-	char kbuf[MMU_KBUF_SIZE];
+	char kbuf[MMU_KBUF_SIZE] = {0};
 	char *c;
 	ssize_t rc;
 
@@ -546,7 +546,7 @@ static ssize_t mmu_ack_error_value_write(struct file *file,
 	struct hl_debugfs_entry *entry = s->private;
 	struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
 	struct hl_device *hdev = dev_entry->hdev;
-	char kbuf[MMU_KBUF_SIZE];
+	char kbuf[MMU_KBUF_SIZE] = {0};
 	ssize_t rc;
 
 	if (count > sizeof(kbuf) - 1)
@@ -1643,19 +1643,19 @@ static void add_files_to_device(struct hl_device *hdev, struct hl_dbg_device_ent
 				&hl_data64b_fops);
 
 	debugfs_create_file("set_power_state",
-				0200,
+				0644,
 				root,
 				dev_entry,
 				&hl_power_fops);
 
 	debugfs_create_file("device",
-				0200,
+				0644,
 				root,
 				dev_entry,
 				&hl_device_fops);
 
 	debugfs_create_file("clk_gate",
-				0200,
+				0644,
 				root,
 				dev_entry,
 				&hl_clk_gate_fops);
@@ -1667,13 +1667,13 @@ static void add_files_to_device(struct hl_device *hdev, struct hl_dbg_device_ent
 				&hl_stop_on_err_fops);
 
 	debugfs_create_file("dump_security_violations",
-				0644,
+				0400,
 				root,
 				dev_entry,
 				&hl_security_violations_fops);
 
 	debugfs_create_file("dump_razwi_events",
-				0644,
+				0400,
 				root,
 				dev_entry,
 				&hl_razwi_check_fops);
@@ -1706,7 +1706,7 @@ static void add_files_to_device(struct hl_device *hdev, struct hl_dbg_device_ent
 				&hdev->reset_info.skip_reset_on_timeout);
 
 	debugfs_create_file("state_dump",
-				0600,
+				0644,
 				root,
 				dev_entry,
 				&hl_state_dump_fops);
@@ -1724,7 +1724,7 @@ static void add_files_to_device(struct hl_device *hdev, struct hl_dbg_device_ent
 
 	for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
 		debugfs_create_file(hl_debugfs_list[i].name,
-					0444,
+					0644,
 					root,
 					entry,
 					&hl_debugfs_fops);
diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c
index a73bd4be94..8f92445c5a 100644
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -55,7 +55,8 @@ static u64 hl_set_dram_bar(struct hl_device *hdev, u64 addr, struct pci_mem_regi
 	if (is_power_of_2(prop->dram_pci_bar_size))
 		bar_base_addr = addr & ~(prop->dram_pci_bar_size - 0x1ull);
 	else
-		bar_base_addr = DIV_ROUND_DOWN_ULL(addr, prop->dram_pci_bar_size) *
+		bar_base_addr = region->region_base +
+				div64_u64((addr - region->region_base), prop->dram_pci_bar_size) *
 				prop->dram_pci_bar_size;
 
 	old_base = hdev->asic_funcs->set_dram_bar_base(hdev, bar_base_addr);
@@ -1034,14 +1035,14 @@ static void device_early_fini(struct hl_device *hdev)
 
 static bool is_pci_link_healthy(struct hl_device *hdev)
 {
-	u16 vendor_id;
+	u16 device_id;
 
 	if (!hdev->pdev)
 		return false;
 
-	pci_read_config_word(hdev->pdev, PCI_VENDOR_ID, &vendor_id);
+	pci_read_config_word(hdev->pdev, PCI_DEVICE_ID, &device_id);
 
-	return (vendor_id == PCI_VENDOR_ID_HABANALABS);
+	return (device_id == hdev->pdev->device);
 }
 
 static int hl_device_eq_heartbeat_check(struct hl_device *hdev)
@@ -1768,14 +1769,16 @@ kill_processes:
 		hdev->device_cpu_disabled = false;
 		hdev->reset_info.hard_reset_pending = false;
 
+		/*
+		 * Put the device in an unusable state if there are 2 back to back resets due to
+		 * fatal errors.
+		 */
 		if (hdev->reset_info.reset_trigger_repeated &&
-				(hdev->reset_info.prev_reset_trigger ==
-						HL_DRV_RESET_FW_FATAL_ERR)) {
-			/* if there 2 back to back resets from FW,
-			 * ensure driver puts the driver in a unusable state
-			 */
+				(hdev->reset_info.prev_reset_trigger == HL_DRV_RESET_FW_FATAL_ERR ||
+						hdev->reset_info.prev_reset_trigger ==
+								HL_DRV_RESET_HEARTBEAT)) {
 			dev_crit(hdev->dev,
-				"%s Consecutive FW fatal errors received, stopping hard reset\n",
+				"%s Consecutive fatal errors, stopping hard reset\n",
 				dev_name(&(hdev)->pdev->dev));
 			rc = -EIO;
 			goto out_err;
@@ -2801,3 +2804,35 @@ void hl_enable_err_info_capture(struct hl_error_info *captured_err_info)
 	atomic_set(&captured_err_info->cs_timeout.write_enable, 1);
 	captured_err_info->undef_opcode.write_enable = true;
 }
+
+void hl_init_cpu_for_irq(struct hl_device *hdev)
+{
+#ifdef CONFIG_NUMA
+	struct cpumask *available_mask = &hdev->irq_affinity_mask;
+	int numa_node = hdev->pdev->dev.numa_node, i;
+	static struct cpumask cpu_mask;
+
+	if (numa_node < 0)
+		return;
+
+	if (!cpumask_and(&cpu_mask, cpumask_of_node(numa_node), cpu_online_mask)) {
+		dev_err(hdev->dev, "No available affinities in current numa node\n");
+		return;
+	}
+
+	/* Remove HT siblings */
+	for_each_cpu(i, &cpu_mask)
+		cpumask_set_cpu(cpumask_first(topology_sibling_cpumask(i)), available_mask);
+#endif
+}
+
+void hl_set_irq_affinity(struct hl_device *hdev, int irq)
+{
+	if (cpumask_empty(&hdev->irq_affinity_mask)) {
+		dev_dbg(hdev->dev, "affinity mask is empty\n");
+		return;
+	}
+
+	if (irq_set_affinity_and_hint(irq, &hdev->irq_affinity_mask))
+		dev_err(hdev->dev, "Failed setting irq %d affinity\n", irq);
+}
diff --git a/drivers/accel/habanalabs/common/firmware_if.c b/drivers/accel/habanalabs/common/firmware_if.c
index 3558a6a8e1..4bd02778a9 100644
--- a/drivers/accel/habanalabs/common/firmware_if.c
+++ b/drivers/accel/habanalabs/common/firmware_if.c
@@ -501,7 +501,7 @@ int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type)
 						0, &result);
 
 	if (rc)
-		dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type);
+		dev_err(hdev->dev, "failed to unmask event %d", event_type);
 
 	return rc;
 }
@@ -540,7 +540,7 @@ int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr,
 						total_pkt_size, 0, &result);
 
 	if (rc)
-		dev_err(hdev->dev, "failed to unmask IRQ array\n");
+		dev_err(hdev->dev, "failed to unmask event array\n");
 
 	kfree(pkt);
 
@@ -2718,18 +2718,20 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
 		hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
 	}
 
+	rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader, sizeof(struct lkd_msg_comms));
+	if (rc)
+		goto protocol_err;
+
+	if (hdev->asic_prop.support_dynamic_resereved_fw_size)
+		hdev->asic_prop.reserved_fw_mem_size =
+			le32_to_cpu(fw_loader->dynamic_loader.comm_desc.rsvd_mem_size_mb) * SZ_1M;
+
 	if (!(hdev->fw_components & FW_TYPE_BOOT_CPU)) {
 		struct lkd_fw_binning_info *binning_info;
 
-		rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader,
-							sizeof(struct lkd_msg_comms));
-		if (rc)
-			goto protocol_err;
-
 		/* read preboot version */
 		rc = hl_fw_dynamic_read_device_fw_version(hdev, FW_COMP_PREBOOT,
 				fw_loader->dynamic_loader.comm_desc.cur_fw_ver);
-
 		if (rc)
 			return rc;
 
@@ -2756,11 +2758,6 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
 				hdev->decoder_binning, hdev->rotator_binning);
 		}
 
-		if (hdev->asic_prop.support_dynamic_resereved_fw_size) {
-			hdev->asic_prop.reserved_fw_mem_size =
-				le32_to_cpu(fw_loader->dynamic_loader.comm_desc.rsvd_mem_size_mb);
-		}
-
 		return 0;
 	}
 
@@ -2795,7 +2792,7 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
 	hdev->asic_funcs->init_cpu_scrambler_dram(hdev);
 
 	if (!(hdev->fw_components & FW_TYPE_LINUX)) {
-		dev_info(hdev->dev, "Skip loading Linux F/W\n");
+		dev_dbg(hdev->dev, "Skip loading Linux F/W\n");
 		return 0;
 	}
 
diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index 41c7aac2ff..48f0f3eea1 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -443,18 +443,22 @@ enum hl_collective_mode {
  *                  a CB handle can be provided for jobs on this queue.
  *                  Otherwise, a CB address must be provided.
  * @collective_mode: collective mode of current queue
+ * @q_dram_bd_address: PQ dram address, used when PQ need to reside in DRAM.
  * @driver_only: true if only the driver is allowed to send a job to this queue,
  *               false otherwise.
  * @binned: True if the queue is binned out and should not be used
  * @supports_sync_stream: True if queue supports sync stream
+ * @dram_bd: True if the bd should be copied to dram, needed for PQ which has been allocated on dram
  */
 struct hw_queue_properties {
 	enum hl_queue_type		type;
 	enum queue_cb_alloc_flags	cb_alloc_flags;
 	enum hl_collective_mode		collective_mode;
+	u64				q_dram_bd_address;
 	u8				driver_only;
 	u8				binned;
 	u8				supports_sync_stream;
+	u8				dram_bd;
 };
 
 /**
@@ -590,8 +594,6 @@ struct hl_hints_range {
  *                 we display to the user
  * @mmu_pgt_size: MMU page tables total size.
  * @mmu_pte_size: PTE size in MMU page tables.
- * @mmu_hop_table_size: MMU hop table size.
- * @mmu_hop0_tables_total_size: total size of MMU hop0 tables.
  * @dram_page_size: The DRAM physical page size.
  * @cfg_size: configuration space size on SRAM.
  * @sram_size: total size of SRAM.
@@ -645,10 +647,10 @@ struct hl_hints_range {
  * @num_engine_cores: number of engine cpu cores.
  * @max_num_of_engines: maximum number of all engines in the ASIC.
  * @num_of_special_blocks: special_blocks array size.
- * @glbl_err_cause_num: global err cause number.
+ * @glbl_err_max_cause_num: global err max cause number.
  * @hbw_flush_reg: register to read to generate HBW flush. value of 0 means HBW flush is
  *                 not supported.
- * @reserved_fw_mem_size: size in MB of dram memory reserved for FW.
+ * @reserved_fw_mem_size: size of dram memory reserved for FW.
  * @collective_first_sob: first sync object available for collective use
  * @collective_first_mon: first monitor available for collective use
  * @sync_stream_first_sob: first sync object available for sync stream use
@@ -743,8 +745,6 @@ struct asic_fixed_properties {
 	u32				clk_pll_index;
 	u32				mmu_pgt_size;
 	u32				mmu_pte_size;
-	u32				mmu_hop_table_size;
-	u32				mmu_hop0_tables_total_size;
 	u32				dram_page_size;
 	u32				cfg_size;
 	u32				sram_size;
@@ -779,7 +779,7 @@ struct asic_fixed_properties {
 	u32				num_engine_cores;
 	u32				max_num_of_engines;
 	u32				num_of_special_blocks;
-	u32				glbl_err_cause_num;
+	u32				glbl_err_max_cause_num;
 	u32				hbw_flush_reg;
 	u32				reserved_fw_mem_size;
 	u16				collective_first_sob;
@@ -1052,6 +1052,8 @@ struct hl_encaps_signals_mgr {
  * @collective_mode: collective mode of current queue
  * @kernel_address: holds the queue's kernel virtual address.
  * @bus_address: holds the queue's DMA address.
+ * @pq_dram_address: hold the dram address when the PQ is allocated, used when dram_bd is true in
+ *                   queue properites.
  * @pi: holds the queue's pi value.
  * @ci: holds the queue's ci value, AS CALCULATED BY THE DRIVER (not real ci).
  * @hw_queue_id: the id of the H/W queue.
@@ -1061,6 +1063,7 @@ struct hl_encaps_signals_mgr {
  * @valid: is the queue valid (we have array of 32 queues, not all of them
  *         exist).
  * @supports_sync_stream: True if queue supports sync stream
+ * @dram_bd: True if the bd should be copied to dram, needed for PQ which has been allocated on dram
  */
 struct hl_hw_queue {
 	struct hl_cs_job			**shadow_queue;
@@ -1069,6 +1072,7 @@ struct hl_hw_queue {
 	enum hl_collective_mode			collective_mode;
 	void					*kernel_address;
 	dma_addr_t				bus_address;
+	u64					pq_dram_address;
 	u32					pi;
 	atomic_t				ci;
 	u32					hw_queue_id;
@@ -1077,6 +1081,7 @@ struct hl_hw_queue {
 	u16					int_queue_len;
 	u8					valid;
 	u8					supports_sync_stream;
+	u8					dram_bd;
 };
 
 /**
@@ -3257,6 +3262,7 @@ struct hl_reset_info {
  * @clk_throttling: holds information about current/previous clock throttling events
  * @captured_err_info: holds information about errors.
  * @reset_info: holds current device reset information.
+ * @irq_affinity_mask: mask of available CPU cores for user and decoder interrupt handling.
  * @stream_master_qid_arr: pointer to array with QIDs of master streams.
  * @fw_inner_major_ver: the major of current loaded preboot inner version.
  * @fw_inner_minor_ver: the minor of current loaded preboot inner version.
@@ -3446,6 +3452,8 @@ struct hl_device {
 
 	struct hl_reset_info		reset_info;
 
+	cpumask_t			irq_affinity_mask;
+
 	u32				*stream_master_qid_arr;
 	u32				fw_inner_major_ver;
 	u32				fw_inner_minor_ver;
@@ -3886,6 +3894,7 @@ int hl_mmu_hr_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, struct hl_mmu_hop_
 							struct hl_hr_mmu_funcs *hr_func);
 int hl_mmu_if_set_funcs(struct hl_device *hdev);
 void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu);
+void hl_mmu_v2_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu);
 void hl_mmu_v2_hr_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu);
 int hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr);
 int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
@@ -3893,6 +3902,22 @@ int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
 u64 hl_mmu_scramble_addr(struct hl_device *hdev, u64 addr);
 u64 hl_mmu_descramble_addr(struct hl_device *hdev, u64 addr);
 bool hl_is_dram_va(struct hl_device *hdev, u64 virt_addr);
+struct pgt_info *hl_mmu_dr_get_pgt_info(struct hl_ctx *ctx, u64 hop_addr);
+void hl_mmu_dr_free_hop(struct hl_ctx *ctx, u64 hop_addr);
+void hl_mmu_dr_free_pgt_node(struct hl_ctx *ctx, struct pgt_info *pgt_info);
+u64 hl_mmu_dr_get_phys_hop0_addr(struct hl_ctx *ctx);
+u64 hl_mmu_dr_get_hop0_addr(struct hl_ctx *ctx);
+void hl_mmu_dr_write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val);
+void hl_mmu_dr_write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val);
+void hl_mmu_dr_clear_pte(struct hl_ctx *ctx, u64 pte_addr);
+u64 hl_mmu_dr_get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr);
+void hl_mmu_dr_get_pte(struct hl_ctx *ctx, u64 hop_addr);
+int hl_mmu_dr_put_pte(struct hl_ctx *ctx, u64 hop_addr);
+u64 hl_mmu_dr_get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte, bool *is_new_hop);
+u64 hl_mmu_dr_alloc_hop(struct hl_ctx *ctx);
+void hl_mmu_dr_flush(struct hl_ctx *ctx);
+int hl_mmu_dr_init(struct hl_device *hdev);
+void hl_mmu_dr_fini(struct hl_device *hdev);
 
 int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
 				void __iomem *dst, u32 src_offset, u32 size);
@@ -4032,6 +4057,8 @@ void hl_handle_critical_hw_err(struct hl_device *hdev, u16 event_id, u64 *event_
 void hl_handle_fw_err(struct hl_device *hdev, struct hl_info_fw_err_info *info);
 void hl_capture_engine_err(struct hl_device *hdev, u16 engine_id, u16 error_count);
 void hl_enable_err_info_capture(struct hl_error_info *captured_err_info);
+void hl_init_cpu_for_irq(struct hl_device *hdev);
+void hl_set_irq_affinity(struct hl_device *hdev, int irq);
 
 #ifdef CONFIG_DEBUG_FS
 
diff --git a/drivers/accel/habanalabs/common/hw_queue.c b/drivers/accel/habanalabs/common/hw_queue.c
index d0087c0ec4..3d04a7507c 100644
--- a/drivers/accel/habanalabs/common/hw_queue.c
+++ b/drivers/accel/habanalabs/common/hw_queue.c
@@ -84,6 +84,8 @@ void hl_hw_queue_submit_bd(struct hl_device *hdev, struct hl_hw_queue *q,
 		u32 ctl, u32 len, u64 ptr)
 {
 	struct hl_bd *bd;
+	u64 addr;
+	int i;
 
 	bd = q->kernel_address;
 	bd += hl_pi_2_offset(q->pi);
@@ -91,7 +93,16 @@ void hl_hw_queue_submit_bd(struct hl_device *hdev, struct hl_hw_queue *q,
 	bd->len = cpu_to_le32(len);
 	bd->ptr = cpu_to_le64(ptr);
 
+	if (q->dram_bd)
+		for (i = 0 ; i < 2 ; i++) {
+			addr = q->pq_dram_address +
+			((hl_pi_2_offset(q->pi) * sizeof(struct hl_bd))	+ (i * sizeof(u64)));
+			hdev->asic_funcs->access_dev_mem(hdev, PCI_REGION_DRAM,	addr,
+						(u64 *)(bd) + i, DEBUGFS_WRITE64);
+		}
+
 	q->pi = hl_queue_inc_ptr(q->pi);
+
 	hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi);
 }
 
@@ -1087,12 +1098,18 @@ int hl_hw_queues_create(struct hl_device *hdev)
 		q->supports_sync_stream =
 				asic->hw_queues_props[i].supports_sync_stream;
 		q->collective_mode = asic->hw_queues_props[i].collective_mode;
+		q->dram_bd = asic->hw_queues_props[i].dram_bd;
+
 		rc = queue_init(hdev, q, i);
 		if (rc) {
 			dev_err(hdev->dev,
 				"failed to initialize queue %d\n", i);
 			goto release_queues;
 		}
+
+		/* Set DRAM PQ address for the queue if it should be at DRAM */
+		if (q->dram_bd)
+			q->pq_dram_address = asic->hw_queues_props[i].q_dram_bd_address;
 	}
 
 	return 0;
diff --git a/drivers/accel/habanalabs/common/hwmon.c b/drivers/accel/habanalabs/common/hwmon.c
index 1ee2ee07e9..36b951b5f5 100644
--- a/drivers/accel/habanalabs/common/hwmon.c
+++ b/drivers/accel/habanalabs/common/hwmon.c
@@ -46,7 +46,7 @@ static u32 fixup_flags_legacy_fw(struct hl_device *hdev, enum hwmon_sensor_types
 		break;
 
 	default:
-		dev_err(hdev->dev, "unsupported h/w sensor type %d\n", type);
+		dev_err_ratelimited(hdev->dev, "unsupported h/w sensor type %d\n", type);
 		flags = cpucp_flags;
 		break;
 	}
@@ -134,7 +134,7 @@ static u32 adjust_hwmon_flags(struct hl_device *hdev, enum hwmon_sensor_types ty
 			break;
 
 		default:
-			dev_err(hdev->dev, "unsupported h/w sensor type %d\n", type);
+			dev_err_ratelimited(hdev->dev, "unsupported h/w sensor type %d\n", type);
 			flags = cpucp_flags;
 			break;
 		}
@@ -162,7 +162,8 @@ int hl_build_hwmon_channel_info(struct hl_device *hdev, struct cpucp_sensor *sen
 			break;
 
 		if (type >= HWMON_NR_SENSOR_TYPES) {
-			dev_err(hdev->dev, "Got wrong sensor type %d from device\n", type);
+			dev_err_ratelimited(hdev->dev,
+				"Got wrong sensor type %d from device\n", type);
 			return -EINVAL;
 		}
 
@@ -584,7 +585,7 @@ int hl_get_temperature(struct hl_device *hdev,
 	*value = (long) result;
 
 	if (rc) {
-		dev_err(hdev->dev,
+		dev_err_ratelimited(hdev->dev,
 			"Failed to get temperature from sensor %d, error %d\n",
 			sensor_index, rc);
 		*value = 0;
@@ -611,7 +612,7 @@ int hl_set_temperature(struct hl_device *hdev,
 						0, NULL);
 
 	if (rc)
-		dev_err(hdev->dev,
+		dev_err_ratelimited(hdev->dev,
 			"Failed to set temperature of sensor %d, error %d\n",
 			sensor_index, rc);
 
@@ -638,7 +639,7 @@ int hl_get_voltage(struct hl_device *hdev,
 	*value = (long) result;
 
 	if (rc) {
-		dev_err(hdev->dev,
+		dev_err_ratelimited(hdev->dev,
 			"Failed to get voltage from sensor %d, error %d\n",
 			sensor_index, rc);
 		*value = 0;
@@ -667,7 +668,7 @@ int hl_get_current(struct hl_device *hdev,
 	*value = (long) result;
 
 	if (rc) {
-		dev_err(hdev->dev,
+		dev_err_ratelimited(hdev->dev,
 			"Failed to get current from sensor %d, error %d\n",
 			sensor_index, rc);
 		*value = 0;
@@ -696,7 +697,7 @@ int hl_get_fan_speed(struct hl_device *hdev,
 	*value = (long) result;
 
 	if (rc) {
-		dev_err(hdev->dev,
+		dev_err_ratelimited(hdev->dev,
 			"Failed to get fan speed from sensor %d, error %d\n",
 			sensor_index, rc);
 		*value = 0;
@@ -725,7 +726,7 @@ int hl_get_pwm_info(struct hl_device *hdev,
 	*value = (long) result;
 
 	if (rc) {
-		dev_err(hdev->dev,
+		dev_err_ratelimited(hdev->dev,
 			"Failed to get pwm info from sensor %d, error %d\n",
 			sensor_index, rc);
 		*value = 0;
@@ -752,7 +753,7 @@ void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
 						0, NULL);
 
 	if (rc)
-		dev_err(hdev->dev,
+		dev_err_ratelimited(hdev->dev,
 			"Failed to set pwm info to sensor %d, error %d\n",
 			sensor_index, rc);
 }
@@ -775,7 +776,7 @@ int hl_set_voltage(struct hl_device *hdev,
 						0, NULL);
 
 	if (rc)
-		dev_err(hdev->dev,
+		dev_err_ratelimited(hdev->dev,
 			"Failed to set voltage of sensor %d, error %d\n",
 			sensor_index, rc);
 
@@ -800,7 +801,7 @@ int hl_set_current(struct hl_device *hdev,
 						0, NULL);
 
 	if (rc)
-		dev_err(hdev->dev,
+		dev_err_ratelimited(hdev->dev,
 			"Failed to set current of sensor %d, error %d\n",
 			sensor_index, rc);
 
@@ -831,7 +832,7 @@ int hl_set_power(struct hl_device *hdev,
 						0, NULL);
 
 	if (rc)
-		dev_err(hdev->dev,
+		dev_err_ratelimited(hdev->dev,
 			"Failed to set power of sensor %d, error %d\n",
 			sensor_index, rc);
 
@@ -858,7 +859,7 @@ int hl_get_power(struct hl_device *hdev,
 	*value = (long) result;
 
 	if (rc) {
-		dev_err(hdev->dev,
+		dev_err_ratelimited(hdev->dev,
 			"Failed to get power of sensor %d, error %d\n",
 			sensor_index, rc);
 		*value = 0;
diff --git a/drivers/accel/habanalabs/common/mmu/Makefile b/drivers/accel/habanalabs/common/mmu/Makefile
index 1806c524e0..f4b815bf4f 100644
--- a/drivers/accel/habanalabs/common/mmu/Makefile
+++ b/drivers/accel/habanalabs/common/mmu/Makefile
@@ -1,3 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0-only
 HL_COMMON_MMU_FILES := common/mmu/mmu.o common/mmu/mmu_v1.o \
-			common/mmu/mmu_v2_hr.o
+			common/mmu/mmu_v2.o common/mmu/mmu_v2_hr.o
diff --git a/drivers/accel/habanalabs/common/mmu/mmu.c b/drivers/accel/habanalabs/common/mmu/mmu.c
index b654302a68..d3eaab9084 100644
--- a/drivers/accel/habanalabs/common/mmu/mmu.c
+++ b/drivers/accel/habanalabs/common/mmu/mmu.c
@@ -585,6 +585,8 @@ int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
 
 int hl_mmu_if_set_funcs(struct hl_device *hdev)
 {
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+
 	if (hdev->mmu_disable)
 		return 0;
 
@@ -597,8 +599,9 @@ int hl_mmu_if_set_funcs(struct hl_device *hdev)
 	case ASIC_GAUDI2:
 	case ASIC_GAUDI2B:
 	case ASIC_GAUDI2C:
-		/* MMUs in Gaudi2 are always host resident */
-		hl_mmu_v2_hr_set_funcs(hdev, &hdev->mmu_func[MMU_HR_PGT]);
+		hl_mmu_v2_set_funcs(hdev, &hdev->mmu_func[MMU_DR_PGT]);
+		if (prop->pmmu.host_resident)
+			hl_mmu_v2_hr_set_funcs(hdev, &hdev->mmu_func[MMU_HR_PGT]);
 		break;
 	default:
 		dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
@@ -1209,3 +1212,219 @@ int hl_mmu_hr_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, struct hl_mmu_hop_
 	return 0;
 }
 
+struct pgt_info *hl_mmu_dr_get_pgt_info(struct hl_ctx *ctx, u64 hop_addr)
+{
+	struct pgt_info *pgt_info = NULL;
+
+	hash_for_each_possible(ctx->mmu_shadow_hash, pgt_info, node,
+			(unsigned long) hop_addr)
+		if (hop_addr == pgt_info->shadow_addr)
+			break;
+
+	return pgt_info;
+}
+
+void hl_mmu_dr_free_hop(struct hl_ctx *ctx, u64 hop_addr)
+{
+	struct pgt_info *pgt_info = hl_mmu_dr_get_pgt_info(ctx, hop_addr);
+
+	hl_mmu_dr_free_pgt_node(ctx, pgt_info);
+}
+
+void hl_mmu_dr_free_pgt_node(struct hl_ctx *ctx, struct pgt_info *pgt_info)
+{
+	struct hl_device *hdev = ctx->hdev;
+
+	gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool, pgt_info->phys_addr,
+			hdev->asic_prop.dmmu.hop_table_size);
+	hash_del(&pgt_info->node);
+	kfree((u64 *) (uintptr_t) pgt_info->shadow_addr);
+	kfree(pgt_info);
+}
+
+u64 hl_mmu_dr_get_phys_hop0_addr(struct hl_ctx *ctx)
+{
+	return ctx->hdev->asic_prop.mmu_pgt_addr +
+			(ctx->asid * ctx->hdev->asic_prop.dmmu.hop_table_size);
+}
+
+u64 hl_mmu_dr_get_hop0_addr(struct hl_ctx *ctx)
+{
+	return (u64) (uintptr_t) ctx->hdev->mmu_priv.dr.mmu_shadow_hop0 +
+			(ctx->asid * ctx->hdev->asic_prop.dmmu.hop_table_size);
+}
+
+u64 hl_mmu_dr_get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr)
+{
+	u64 page_mask = ctx->hdev->asic_prop.dmmu.hop_table_size - 1;
+	u64 shadow_hop_addr = shadow_addr & (~page_mask);
+	u64 pte_offset = shadow_addr & page_mask;
+	u64 phys_hop_addr;
+
+	if (shadow_hop_addr != hl_mmu_dr_get_hop0_addr(ctx))
+		phys_hop_addr = hl_mmu_dr_get_pgt_info(ctx, shadow_hop_addr)->phys_addr;
+	else
+		phys_hop_addr = hl_mmu_dr_get_phys_hop0_addr(ctx);
+
+	return phys_hop_addr + pte_offset;
+}
+
+void hl_mmu_dr_write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val)
+{
+	u64 phys_val = hl_mmu_dr_get_phys_addr(ctx, val);
+
+	ctx->hdev->asic_funcs->write_pte(ctx->hdev, hl_mmu_dr_get_phys_addr(ctx, shadow_pte_addr),
+					phys_val);
+
+	*(u64 *) (uintptr_t) shadow_pte_addr = val;
+}
+
+void hl_mmu_dr_write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val)
+{
+	ctx->hdev->asic_funcs->write_pte(ctx->hdev,
+				hl_mmu_dr_get_phys_addr(ctx, shadow_pte_addr), val);
+	*(u64 *) (uintptr_t) shadow_pte_addr = val;
+}
+
+void hl_mmu_dr_clear_pte(struct hl_ctx *ctx, u64 pte_addr)
+{
+	hl_mmu_dr_write_final_pte(ctx, pte_addr, 0);
+}
+
+void hl_mmu_dr_get_pte(struct hl_ctx *ctx, u64 hop_addr)
+{
+	hl_mmu_dr_get_pgt_info(ctx, hop_addr)->num_of_ptes++;
+}
+
+int hl_mmu_dr_put_pte(struct hl_ctx *ctx, u64 hop_addr)
+{
+	struct pgt_info *pgt_info = hl_mmu_dr_get_pgt_info(ctx, hop_addr);
+	int num_of_ptes_left;
+
+	pgt_info->num_of_ptes--;
+
+	/*
+	 * Need to save the number of ptes left because hl_mmu_free_hop might free
+	 * the pgt_info
+	 */
+	num_of_ptes_left = pgt_info->num_of_ptes;
+	if (!num_of_ptes_left)
+		hl_mmu_dr_free_pgt_node(ctx, pgt_info);
+
+	return num_of_ptes_left;
+}
+
+u64 hl_mmu_dr_alloc_hop(struct hl_ctx *ctx)
+{
+	struct hl_device *hdev = ctx->hdev;
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	struct pgt_info *pgt_info;
+	u64 phys_addr, shadow_addr;
+
+	pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL);
+	if (!pgt_info)
+		return ULLONG_MAX;
+
+	phys_addr = (u64) gen_pool_alloc(hdev->mmu_priv.dr.mmu_pgt_pool,
+					prop->dmmu.hop_table_size);
+	if (!phys_addr) {
+		dev_err(hdev->dev, "failed to allocate page\n");
+		goto pool_add_err;
+	}
+
+	shadow_addr = (u64) (uintptr_t) kzalloc(prop->dmmu.hop_table_size,
+						GFP_KERNEL);
+	if (!shadow_addr)
+		goto shadow_err;
+
+	pgt_info->phys_addr = phys_addr;
+	pgt_info->shadow_addr = shadow_addr;
+	pgt_info->ctx = ctx;
+	pgt_info->num_of_ptes = 0;
+	hash_add(ctx->mmu_shadow_hash, &pgt_info->node, shadow_addr);
+
+	return shadow_addr;
+
+shadow_err:
+	gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool,
+			phys_addr, prop->dmmu.hop_table_size);
+pool_add_err:
+	kfree(pgt_info);
+
+	return ULLONG_MAX;
+}
+
+u64 hl_mmu_dr_get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte, bool *is_new_hop)
+{
+	u64 hop_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
+
+	if (hop_addr == ULLONG_MAX) {
+		hop_addr = hl_mmu_dr_alloc_hop(ctx);
+		*is_new_hop = (hop_addr != ULLONG_MAX);
+	}
+
+	return hop_addr;
+}
+
+void hl_mmu_dr_flush(struct hl_ctx *ctx)
+{
+	/* flush all writes from all cores to reach PCI */
+	mb();
+	ctx->hdev->asic_funcs->read_pte(ctx->hdev, hl_mmu_dr_get_phys_hop0_addr(ctx));
+}
+
+int hl_mmu_dr_init(struct hl_device *hdev)
+{
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	int rc;
+
+	hdev->mmu_priv.dr.mmu_pgt_pool =
+			gen_pool_create(__ffs(prop->dmmu.hop_table_size), -1);
+
+	if (!hdev->mmu_priv.dr.mmu_pgt_pool) {
+		dev_err(hdev->dev, "Failed to create page gen pool\n");
+		return -ENOMEM;
+	}
+
+	rc = gen_pool_add(hdev->mmu_priv.dr.mmu_pgt_pool, prop->mmu_pgt_addr +
+			prop->dmmu.hop0_tables_total_size,
+			prop->dmmu.pgt_size - prop->dmmu.hop0_tables_total_size,
+			-1);
+	if (rc) {
+		dev_err(hdev->dev, "Failed to add memory to page gen pool\n");
+		goto err_pool_add;
+	}
+
+	hdev->mmu_priv.dr.mmu_shadow_hop0 = kvcalloc(prop->max_asid,
+						prop->dmmu.hop_table_size, GFP_KERNEL);
+	if (ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0)) {
+		rc = -ENOMEM;
+		goto err_pool_add;
+	}
+
+	/* MMU H/W init will be done in device hw_init() */
+
+	return 0;
+
+err_pool_add:
+	gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
+
+	return rc;
+}
+
+void hl_mmu_dr_fini(struct hl_device *hdev)
+{
+	/* MMU H/W fini was already done in device hw_fini() */
+
+	if (ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0))
+		return;
+
+	kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0);
+	gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
+
+	/* Make sure that if we arrive here again without init was
+	 * called we won't cause kernel panic. This can happen for
+	 * example if we fail during hard reset code at certain points
+	 */
+	hdev->mmu_priv.dr.mmu_shadow_hop0 = NULL;
+}
diff --git a/drivers/accel/habanalabs/common/mmu/mmu_v1.c b/drivers/accel/habanalabs/common/mmu/mmu_v1.c
index d925dc4dd0..845d16aaa6 100644
--- a/drivers/accel/habanalabs/common/mmu/mmu_v1.c
+++ b/drivers/accel/habanalabs/common/mmu/mmu_v1.c
@@ -12,166 +12,6 @@
 
 #define MMU_V1_MAX_HOPS	(MMU_HOP4 + 1)
 
-static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr);
-
-static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr)
-{
-	struct pgt_info *pgt_info = NULL;
-
-	hash_for_each_possible(ctx->mmu_shadow_hash, pgt_info, node,
-				(unsigned long) hop_addr)
-		if (hop_addr == pgt_info->shadow_addr)
-			break;
-
-	return pgt_info;
-}
-
-static void _free_hop(struct hl_ctx *ctx, struct pgt_info *pgt_info)
-{
-	struct hl_device *hdev = ctx->hdev;
-
-	gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool, pgt_info->phys_addr,
-			hdev->asic_prop.mmu_hop_table_size);
-	hash_del(&pgt_info->node);
-	kfree((u64 *) (uintptr_t) pgt_info->shadow_addr);
-	kfree(pgt_info);
-}
-
-static void free_hop(struct hl_ctx *ctx, u64 hop_addr)
-{
-	struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
-
-	_free_hop(ctx, pgt_info);
-}
-
-static u64 alloc_hop(struct hl_ctx *ctx)
-{
-	struct hl_device *hdev = ctx->hdev;
-	struct asic_fixed_properties *prop = &hdev->asic_prop;
-	struct pgt_info *pgt_info;
-	u64 phys_addr, shadow_addr;
-
-	pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL);
-	if (!pgt_info)
-		return ULLONG_MAX;
-
-	phys_addr = (u64) gen_pool_alloc(hdev->mmu_priv.dr.mmu_pgt_pool,
-					prop->mmu_hop_table_size);
-	if (!phys_addr) {
-		dev_err(hdev->dev, "failed to allocate page\n");
-		goto pool_add_err;
-	}
-
-	shadow_addr = (u64) (uintptr_t) kzalloc(prop->mmu_hop_table_size,
-						GFP_KERNEL);
-	if (!shadow_addr)
-		goto shadow_err;
-
-	pgt_info->phys_addr = phys_addr;
-	pgt_info->shadow_addr = shadow_addr;
-	pgt_info->ctx = ctx;
-	pgt_info->num_of_ptes = 0;
-	hash_add(ctx->mmu_shadow_hash, &pgt_info->node, shadow_addr);
-
-	return shadow_addr;
-
-shadow_err:
-	gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool, phys_addr,
-			prop->mmu_hop_table_size);
-pool_add_err:
-	kfree(pgt_info);
-
-	return ULLONG_MAX;
-}
-
-static inline u64 get_phys_hop0_addr(struct hl_ctx *ctx)
-{
-	return ctx->hdev->asic_prop.mmu_pgt_addr +
-			(ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
-}
-
-static inline u64 get_hop0_addr(struct hl_ctx *ctx)
-{
-	return (u64) (uintptr_t) ctx->hdev->mmu_priv.dr.mmu_shadow_hop0 +
-			(ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
-}
-
-static void flush(struct hl_ctx *ctx)
-{
-	/* flush all writes from all cores to reach PCI */
-	mb();
-	ctx->hdev->asic_funcs->read_pte(ctx->hdev, get_phys_hop0_addr(ctx));
-}
-
-/* transform the value to physical address when writing to H/W */
-static inline void write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val)
-{
-	/*
-	 * The value to write is actually the address of the next shadow hop +
-	 * flags at the 12 LSBs.
-	 * Hence in order to get the value to write to the physical PTE, we
-	 * clear the 12 LSBs and translate the shadow hop to its associated
-	 * physical hop, and add back the original 12 LSBs.
-	 */
-	u64 phys_val = get_phys_addr(ctx, val & HOP_PHYS_ADDR_MASK) |
-				(val & FLAGS_MASK);
-
-	ctx->hdev->asic_funcs->write_pte(ctx->hdev,
-					get_phys_addr(ctx, shadow_pte_addr),
-					phys_val);
-
-	*(u64 *) (uintptr_t) shadow_pte_addr = val;
-}
-
-/* do not transform the value to physical address when writing to H/W */
-static inline void write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr,
-					u64 val)
-{
-	ctx->hdev->asic_funcs->write_pte(ctx->hdev,
-					get_phys_addr(ctx, shadow_pte_addr),
-					val);
-	*(u64 *) (uintptr_t) shadow_pte_addr = val;
-}
-
-/* clear the last and present bits */
-static inline void clear_pte(struct hl_ctx *ctx, u64 pte_addr)
-{
-	/* no need to transform the value to physical address */
-	write_final_pte(ctx, pte_addr, 0);
-}
-
-static inline void get_pte(struct hl_ctx *ctx, u64 hop_addr)
-{
-	get_pgt_info(ctx, hop_addr)->num_of_ptes++;
-}
-
-/*
- * put_pte - decrement the num of ptes and free the hop if possible
- *
- * @ctx: pointer to the context structure
- * @hop_addr: addr of the hop
- *
- * This function returns the number of ptes left on this hop. If the number is
- * 0, it means the pte was freed.
- */
-static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr)
-{
-	struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
-	int num_of_ptes_left;
-
-	pgt_info->num_of_ptes--;
-
-	/*
-	 * Need to save the number of ptes left because free_hop might free
-	 * the pgt_info
-	 */
-	num_of_ptes_left = pgt_info->num_of_ptes;
-	if (!num_of_ptes_left)
-		_free_hop(ctx, pgt_info);
-
-	return num_of_ptes_left;
-}
-
 static inline u64 get_hop_pte_addr(struct hl_ctx *ctx, struct hl_mmu_properties *mmu_prop,
 					u64 *hop_addr_arr, u64 virt_addr, enum mmu_hop_num hop_idx)
 {
@@ -183,35 +23,6 @@ static inline u64 get_hop_pte_addr(struct hl_ctx *ctx, struct hl_mmu_properties
 			ctx->hdev->asic_prop.mmu_pte_size * ((virt_addr & mask) >> shift);
 }
 
-static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte,
-						bool *is_new_hop)
-{
-	u64 hop_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
-
-	if (hop_addr == ULLONG_MAX) {
-		hop_addr = alloc_hop(ctx);
-		*is_new_hop = (hop_addr != ULLONG_MAX);
-	}
-
-	return hop_addr;
-}
-
-/* translates shadow address inside hop to a physical address */
-static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr)
-{
-	u64 page_mask = (ctx->hdev->asic_prop.mmu_hop_table_size - 1);
-	u64 shadow_hop_addr = shadow_addr & ~page_mask;
-	u64 pte_offset = shadow_addr & page_mask;
-	u64 phys_hop_addr;
-
-	if (shadow_hop_addr != get_hop0_addr(ctx))
-		phys_hop_addr = get_pgt_info(ctx, shadow_hop_addr)->phys_addr;
-	else
-		phys_hop_addr = get_phys_hop0_addr(ctx);
-
-	return phys_hop_addr + pte_offset;
-}
-
 static int dram_default_mapping_init(struct hl_ctx *ctx)
 {
 	struct hl_device *hdev = ctx->hdev;
@@ -232,13 +43,13 @@ static int dram_default_mapping_init(struct hl_ctx *ctx)
 	/* add hop1 and hop2 */
 	total_hops = num_of_hop3 + 2;
 
-	ctx->dram_default_hops = kzalloc(HL_PTE_SIZE * total_hops,  GFP_KERNEL);
+	ctx->dram_default_hops = kcalloc(total_hops, HL_PTE_SIZE,  GFP_KERNEL);
 	if (!ctx->dram_default_hops)
 		return -ENOMEM;
 
-	hop0_addr = get_hop0_addr(ctx);
+	hop0_addr = hl_mmu_dr_get_hop0_addr(ctx);
 
-	hop1_addr = alloc_hop(ctx);
+	hop1_addr = hl_mmu_dr_alloc_hop(ctx);
 	if (hop1_addr == ULLONG_MAX) {
 		dev_err(hdev->dev, "failed to alloc hop 1\n");
 		rc = -ENOMEM;
@@ -247,7 +58,7 @@ static int dram_default_mapping_init(struct hl_ctx *ctx)
 
 	ctx->dram_default_hops[total_hops - 1] = hop1_addr;
 
-	hop2_addr = alloc_hop(ctx);
+	hop2_addr = hl_mmu_dr_alloc_hop(ctx);
 	if (hop2_addr == ULLONG_MAX) {
 		dev_err(hdev->dev, "failed to alloc hop 2\n");
 		rc = -ENOMEM;
@@ -257,7 +68,7 @@ static int dram_default_mapping_init(struct hl_ctx *ctx)
 	ctx->dram_default_hops[total_hops - 2] = hop2_addr;
 
 	for (i = 0 ; i < num_of_hop3 ; i++) {
-		ctx->dram_default_hops[i] = alloc_hop(ctx);
+		ctx->dram_default_hops[i] = hl_mmu_dr_alloc_hop(ctx);
 		if (ctx->dram_default_hops[i] == ULLONG_MAX) {
 			dev_err(hdev->dev, "failed to alloc hop 3, i: %d\n", i);
 			rc = -ENOMEM;
@@ -268,18 +79,18 @@ static int dram_default_mapping_init(struct hl_ctx *ctx)
 
 	/* need only pte 0 in hops 0 and 1 */
 	pte_val = (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
-	write_pte(ctx, hop0_addr, pte_val);
+	hl_mmu_dr_write_pte(ctx, hop0_addr, pte_val);
 
 	pte_val = (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
-	write_pte(ctx, hop1_addr, pte_val);
-	get_pte(ctx, hop1_addr);
+	hl_mmu_dr_write_pte(ctx, hop1_addr, pte_val);
+	hl_mmu_dr_get_pte(ctx, hop1_addr);
 
 	hop2_pte_addr = hop2_addr;
 	for (i = 0 ; i < num_of_hop3 ; i++) {
 		pte_val = (ctx->dram_default_hops[i] & HOP_PHYS_ADDR_MASK) |
 				PAGE_PRESENT_MASK;
-		write_pte(ctx, hop2_pte_addr, pte_val);
-		get_pte(ctx, hop2_addr);
+		hl_mmu_dr_write_pte(ctx, hop2_pte_addr, pte_val);
+		hl_mmu_dr_get_pte(ctx, hop2_addr);
 		hop2_pte_addr += HL_PTE_SIZE;
 	}
 
@@ -289,23 +100,23 @@ static int dram_default_mapping_init(struct hl_ctx *ctx)
 	for (i = 0 ; i < num_of_hop3 ; i++) {
 		hop3_pte_addr = ctx->dram_default_hops[i];
 		for (j = 0 ; j < HOP_PTE_ENTRIES_512 ; j++) {
-			write_final_pte(ctx, hop3_pte_addr, pte_val);
-			get_pte(ctx, ctx->dram_default_hops[i]);
+			hl_mmu_dr_write_final_pte(ctx, hop3_pte_addr, pte_val);
+			hl_mmu_dr_get_pte(ctx, ctx->dram_default_hops[i]);
 			hop3_pte_addr += HL_PTE_SIZE;
 		}
 	}
 
-	flush(ctx);
+	hl_mmu_dr_flush(ctx);
 
 	return 0;
 
 hop3_err:
 	for (i = 0 ; i < hop3_allocated ; i++)
-		free_hop(ctx, ctx->dram_default_hops[i]);
+		hl_mmu_dr_free_hop(ctx, ctx->dram_default_hops[i]);
 
-	free_hop(ctx, hop2_addr);
+	hl_mmu_dr_free_hop(ctx, hop2_addr);
 hop2_err:
-	free_hop(ctx, hop1_addr);
+	hl_mmu_dr_free_hop(ctx, hop1_addr);
 hop1_err:
 	kfree(ctx->dram_default_hops);
 
@@ -329,7 +140,7 @@ static void dram_default_mapping_fini(struct hl_ctx *ctx)
 	do_div(num_of_hop3, prop->dram_page_size);
 	do_div(num_of_hop3, HOP_PTE_ENTRIES_512);
 
-	hop0_addr = get_hop0_addr(ctx);
+	hop0_addr = hl_mmu_dr_get_hop0_addr(ctx);
 	/* add hop1 and hop2 */
 	total_hops = num_of_hop3 + 2;
 	hop1_addr = ctx->dram_default_hops[total_hops - 1];
@@ -338,101 +149,26 @@ static void dram_default_mapping_fini(struct hl_ctx *ctx)
 	for (i = 0 ; i < num_of_hop3 ; i++) {
 		hop3_pte_addr = ctx->dram_default_hops[i];
 		for (j = 0 ; j < HOP_PTE_ENTRIES_512 ; j++) {
-			clear_pte(ctx, hop3_pte_addr);
-			put_pte(ctx, ctx->dram_default_hops[i]);
+			hl_mmu_dr_clear_pte(ctx, hop3_pte_addr);
+			hl_mmu_dr_put_pte(ctx, ctx->dram_default_hops[i]);
 			hop3_pte_addr += HL_PTE_SIZE;
 		}
 	}
 
 	hop2_pte_addr = hop2_addr;
 	for (i = 0 ; i < num_of_hop3 ; i++) {
-		clear_pte(ctx, hop2_pte_addr);
-		put_pte(ctx, hop2_addr);
+		hl_mmu_dr_clear_pte(ctx, hop2_pte_addr);
+		hl_mmu_dr_put_pte(ctx, hop2_addr);
 		hop2_pte_addr += HL_PTE_SIZE;
 	}
 
-	clear_pte(ctx, hop1_addr);
-	put_pte(ctx, hop1_addr);
-	clear_pte(ctx, hop0_addr);
+	hl_mmu_dr_clear_pte(ctx, hop1_addr);
+	hl_mmu_dr_put_pte(ctx, hop1_addr);
+	hl_mmu_dr_clear_pte(ctx, hop0_addr);
 
 	kfree(ctx->dram_default_hops);
 
-	flush(ctx);
-}
-
-/**
- * hl_mmu_v1_init() - initialize the MMU module.
- * @hdev: habanalabs device structure.
- *
- * This function does the following:
- * - Create a pool of pages for pgt_infos.
- * - Create a shadow table for pgt
- *
- * Return: 0 for success, non-zero for failure.
- */
-static int hl_mmu_v1_init(struct hl_device *hdev)
-{
-	struct asic_fixed_properties *prop = &hdev->asic_prop;
-	int rc;
-
-	hdev->mmu_priv.dr.mmu_pgt_pool =
-			gen_pool_create(__ffs(prop->mmu_hop_table_size), -1);
-
-	if (!hdev->mmu_priv.dr.mmu_pgt_pool) {
-		dev_err(hdev->dev, "Failed to create page gen pool\n");
-		return -ENOMEM;
-	}
-
-	rc = gen_pool_add(hdev->mmu_priv.dr.mmu_pgt_pool, prop->mmu_pgt_addr +
-			prop->mmu_hop0_tables_total_size,
-			prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size,
-			-1);
-	if (rc) {
-		dev_err(hdev->dev, "Failed to add memory to page gen pool\n");
-		goto err_pool_add;
-	}
-
-	hdev->mmu_priv.dr.mmu_shadow_hop0 = kvcalloc(prop->max_asid, prop->mmu_hop_table_size,
-										GFP_KERNEL);
-	if (ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0)) {
-		rc = -ENOMEM;
-		goto err_pool_add;
-	}
-
-	/* MMU H/W init will be done in device hw_init() */
-
-	return 0;
-
-err_pool_add:
-	gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
-
-	return rc;
-}
-
-/**
- * hl_mmu_v1_fini() - release the MMU module.
- * @hdev: habanalabs device structure.
- *
- * This function does the following:
- * - Disable MMU in H/W.
- * - Free the pgt_infos pool.
- *
- * All contexts should be freed before calling this function.
- */
-static void hl_mmu_v1_fini(struct hl_device *hdev)
-{
-	/* MMU H/W fini was already done in device hw_fini() */
-
-	if (!ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0)) {
-		kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0);
-		gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
-
-		/* Make sure that if we arrive here again without init was
-		 * called we won't cause kernel panic. This can happen for
-		 * example if we fail during hard reset code at certain points
-		 */
-		hdev->mmu_priv.dr.mmu_shadow_hop0 = NULL;
-	}
+	hl_mmu_dr_flush(ctx);
 }
 
 /**
@@ -476,7 +212,7 @@ static void hl_mmu_v1_ctx_fini(struct hl_ctx *ctx)
 		dev_err_ratelimited(hdev->dev,
 			"pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",
 			pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes);
-		_free_hop(ctx, pgt_info);
+		hl_mmu_dr_free_pgt_node(ctx, pgt_info);
 	}
 }
 
@@ -495,7 +231,7 @@ static int hl_mmu_v1_unmap(struct hl_ctx *ctx,
 
 	for (hop_idx = MMU_HOP0; hop_idx < MMU_HOP4; hop_idx++) {
 		if (hop_idx == MMU_HOP0) {
-			hop_addr[hop_idx] = get_hop0_addr(ctx);
+			hop_addr[hop_idx] = hl_mmu_dr_get_hop0_addr(ctx);
 		} else {
 			hop_addr[hop_idx] = hl_mmu_get_next_hop_addr(ctx, curr_pte);
 			if (hop_addr[hop_idx] == ULLONG_MAX)
@@ -546,30 +282,30 @@ static int hl_mmu_v1_unmap(struct hl_ctx *ctx,
 		}
 
 		hop_idx = MMU_HOP3;
-		write_final_pte(ctx, hop_pte_addr[hop_idx], default_pte);
-		put_pte(ctx, hop_addr[hop_idx]);
+		hl_mmu_dr_write_final_pte(ctx, hop_pte_addr[hop_idx], default_pte);
+		hl_mmu_dr_put_pte(ctx, hop_addr[hop_idx]);
 	} else {
 		if (!(curr_pte & PAGE_PRESENT_MASK))
 			goto not_mapped;
 
 		if (hop_addr[MMU_HOP4])
-			clear_pte(ctx, hop_pte_addr[MMU_HOP4]);
+			hl_mmu_dr_clear_pte(ctx, hop_pte_addr[MMU_HOP4]);
 		else
-			clear_pte(ctx, hop_pte_addr[MMU_HOP3]);
+			hl_mmu_dr_clear_pte(ctx, hop_pte_addr[MMU_HOP3]);
 
-		if (hop_addr[MMU_HOP4] && !put_pte(ctx, hop_addr[MMU_HOP4]))
+		if (hop_addr[MMU_HOP4] && !hl_mmu_dr_put_pte(ctx, hop_addr[MMU_HOP4]))
 			clear_hop3 = true;
 
 		if (!clear_hop3)
 			goto mapped;
 
 		for (hop_idx = MMU_HOP3; hop_idx >= 0; hop_idx--) {
-			clear_pte(ctx, hop_pte_addr[hop_idx]);
+			hl_mmu_dr_clear_pte(ctx, hop_pte_addr[hop_idx]);
 
 			if (hop_idx == MMU_HOP0)
 				break;
 
-			if (put_pte(ctx, hop_addr[hop_idx]))
+			if (hl_mmu_dr_put_pte(ctx, hop_addr[hop_idx]))
 				goto mapped;
 		}
 	}
@@ -616,10 +352,10 @@ static int hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
 
 	for (hop_idx = MMU_HOP0; hop_idx < num_hops; hop_idx++) {
 		if (hop_idx == MMU_HOP0) {
-			hop_addr[hop_idx] = get_hop0_addr(ctx);
+			hop_addr[hop_idx] = hl_mmu_dr_get_hop0_addr(ctx);
 		} else {
 			hop_addr[hop_idx] =
-					get_alloc_next_hop_addr(ctx, curr_pte, &hop_new[hop_idx]);
+				hl_mmu_dr_get_alloc_next_hop_addr(ctx, curr_pte, &hop_new[hop_idx]);
 			if (hop_addr[hop_idx] == ULLONG_MAX)
 				goto err;
 		}
@@ -666,27 +402,27 @@ static int hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
 	curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | mmu_prop->last_mask
 			| PAGE_PRESENT_MASK;
 
-	write_final_pte(ctx, hop_pte_addr[num_hops - 1], curr_pte);
+	hl_mmu_dr_write_final_pte(ctx, hop_pte_addr[num_hops - 1], curr_pte);
 
 	for (hop_idx = MMU_HOP1; hop_idx < num_hops; hop_idx++) {
 		prev_hop = hop_idx - 1;
 
 		if (hop_new[hop_idx]) {
 			curr_pte = (hop_addr[hop_idx] & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
-			write_pte(ctx, hop_pte_addr[prev_hop], curr_pte);
+			hl_mmu_dr_write_pte(ctx, hop_pte_addr[prev_hop], curr_pte);
 			if (hop_idx != MMU_HOP1)
-				get_pte(ctx, hop_addr[prev_hop]);
+				hl_mmu_dr_get_pte(ctx, hop_addr[prev_hop]);
 		}
 	}
 
-	get_pte(ctx, hop_addr[num_hops - 1]);
+	hl_mmu_dr_get_pte(ctx, hop_addr[num_hops - 1]);
 
 	return 0;
 
 err:
 	for (hop_idx = num_hops; hop_idx > MMU_HOP0; hop_idx--) {
 		if (hop_new[hop_idx])
-			free_hop(ctx, hop_addr[hop_idx]);
+			hl_mmu_dr_free_hop(ctx, hop_addr[hop_idx]);
 	}
 
 	return rc;
@@ -752,7 +488,7 @@ static int hl_mmu_v1_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
 	if (is_huge)
 		used_hops--;
 
-	hops->hop_info[0].hop_addr = get_phys_hop0_addr(ctx);
+	hops->hop_info[0].hop_addr = hl_mmu_dr_get_phys_hop0_addr(ctx);
 	hops->hop_info[0].hop_pte_addr =
 			hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0,
 					hops->hop_info[0].hop_addr, virt_addr);
@@ -801,13 +537,13 @@ static int hl_mmu_v1_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
  */
 void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu)
 {
-	mmu->init = hl_mmu_v1_init;
-	mmu->fini = hl_mmu_v1_fini;
+	mmu->init = hl_mmu_dr_init;
+	mmu->fini = hl_mmu_dr_fini;
 	mmu->ctx_init = hl_mmu_v1_ctx_init;
 	mmu->ctx_fini = hl_mmu_v1_ctx_fini;
 	mmu->map = hl_mmu_v1_map;
 	mmu->unmap = hl_mmu_v1_unmap;
-	mmu->flush = flush;
+	mmu->flush = hl_mmu_dr_flush;
 	mmu->swap_out = hl_mmu_v1_swap_out;
 	mmu->swap_in = hl_mmu_v1_swap_in;
 	mmu->get_tlb_info = hl_mmu_v1_get_tlb_info;
diff --git a/drivers/accel/habanalabs/common/mmu/mmu_v2.c b/drivers/accel/habanalabs/common/mmu/mmu_v2.c
new file mode 100644
index 0000000000..4bc0268fff
--- /dev/null
+++ b/drivers/accel/habanalabs/common/mmu/mmu_v2.c
@@ -0,0 +1,338 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2020 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include "../habanalabs.h"
+#include "../../include/hw_ip/mmu/mmu_general.h"
+#include "../../include/hw_ip/mmu/mmu_v2_0.h"
+
+#include <linux/slab.h>
+
+/**
+ * hl_mmu_v2_ctx_init() - initialize a context for using the MMU module.
+ * @ctx: pointer to the context structure to initialize.
+ *
+ * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all
+ * page tables hops related to this context.
+ * Return: 0 on success, non-zero otherwise.
+ */
+static int hl_mmu_v2_ctx_init(struct hl_ctx *ctx)
+{
+	hash_init(ctx->mmu_shadow_hash);
+
+	return 0;
+}
+
+/*
+ * hl_mmu_v2_ctx_fini - disable a ctx from using the mmu module
+ *
+ * @ctx: pointer to the context structure
+ *
+ * This function does the following:
+ * - Free any pgts which were not freed yet
+ * - Free the mutex
+ * - Free DRAM default page mapping hops
+ */
+static void hl_mmu_v2_ctx_fini(struct hl_ctx *ctx)
+{
+	struct hl_device *hdev = ctx->hdev;
+	struct pgt_info *pgt_info;
+	struct hlist_node *tmp;
+	int i;
+
+	if (!hash_empty(ctx->mmu_shadow_hash))
+		dev_err(hdev->dev, "ctx %d is freed while it has pgts in use\n",
+			ctx->asid);
+
+	hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) {
+		dev_err_ratelimited(hdev->dev,
+			"pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",
+			pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes);
+		hl_mmu_dr_free_pgt_node(ctx, pgt_info);
+	}
+}
+
+static int hl_mmu_v2_unmap(struct hl_ctx *ctx,	u64 virt_addr, bool is_dram_addr)
+{
+	u64 hop_addr[MMU_ARCH_6_HOPS] = { 0 }, hop_pte_addr[MMU_ARCH_6_HOPS] = { 0 }, curr_pte,
+							scrambled_virt_addr;
+	struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
+	struct hl_device *hdev = ctx->hdev;
+	struct hl_mmu_properties *mmu_prop;
+	bool is_huge = false;
+	int i, hop_last;
+
+	/* device resident in V2 are allowed only for HMMU */
+	if (!is_dram_addr)
+		return -EINVAL;
+
+	mmu_prop = &prop->dmmu;
+
+	hop_last = mmu_prop->num_hops - 1;
+
+	scrambled_virt_addr = hdev->asic_funcs->scramble_addr(hdev, virt_addr);
+
+	hop_addr[0] = hl_mmu_dr_get_hop0_addr(ctx);
+	hop_pte_addr[0] = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0,
+					hop_addr[0], scrambled_virt_addr);
+	if (hop_pte_addr[0] == U64_MAX)
+		return -EFAULT;
+
+	curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[0];
+
+	for (i = 1 ; i < mmu_prop->num_hops ; i++) {
+		hop_addr[i] = hl_mmu_get_next_hop_addr(ctx, curr_pte);
+		if (hop_addr[i] == ULLONG_MAX)
+			goto not_mapped;
+
+		hop_pte_addr[i] = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i,
+					hop_addr[i], scrambled_virt_addr);
+		if (hop_pte_addr[i] == U64_MAX)
+			return -EFAULT;
+
+		curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[i];
+
+		if ((i <= hop_last) && (curr_pte & mmu_prop->last_mask)) {
+			hop_last = i;
+			is_huge = true;
+			break;
+		}
+	}
+
+	if (is_dram_addr && !is_huge) {
+		dev_err(hdev->dev, "DRAM unmapping should use huge pages only\n");
+		return -EFAULT;
+	}
+
+	if (!(curr_pte & PAGE_PRESENT_MASK))
+		goto not_mapped;
+
+	for (i = hop_last ; i > 0 ; i--) {
+		hl_mmu_dr_clear_pte(ctx, hop_pte_addr[i]);
+		if (hl_mmu_dr_put_pte(ctx, hop_addr[i]))
+			goto mapped;
+	}
+	hl_mmu_dr_clear_pte(ctx, hop_pte_addr[0]);
+
+mapped:
+	return 0;
+
+not_mapped:
+	dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
+		virt_addr);
+
+	return -EINVAL;
+}
+
+static int hl_mmu_v2_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
+							u32 page_size, bool is_dram_addr)
+{
+	u64 hop_addr[MMU_ARCH_6_HOPS] = { 0 }, hop_pte_addr[MMU_ARCH_6_HOPS] = { 0 },
+			curr_pte = 0, scrambled_virt_addr, scrambled_phys_addr;
+	struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
+	bool hop_new[MMU_ARCH_6_HOPS] = { false };
+	struct hl_device *hdev = ctx->hdev;
+	struct hl_mmu_properties *mmu_prop;
+	int rc, i, hop_last;
+
+	/* device resident in V2 are allowed only for HMMU */
+	if (!is_dram_addr)
+		return -EINVAL;
+
+	mmu_prop = &prop->dmmu;
+
+	hop_last = mmu_prop->num_hops - 1;
+
+	scrambled_virt_addr = hdev->asic_funcs->scramble_addr(hdev, virt_addr);
+	scrambled_phys_addr = hdev->asic_funcs->scramble_addr(hdev, phys_addr);
+
+	/* First hop is preallocated therefore it is treated differently  */
+	hop_addr[0] = hl_mmu_dr_get_hop0_addr(ctx);
+	hop_pte_addr[0] = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0,
+						hop_addr[0], scrambled_virt_addr);
+	curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[0];
+
+	/* Handle hop1 to hop_last */
+	for (i = 1 ; i <= hop_last ; i++) {
+		hop_addr[i] = hl_mmu_dr_get_alloc_next_hop_addr(ctx, curr_pte, &hop_new[i]);
+		if (hop_addr[i] == ULLONG_MAX) {
+			rc = -ENOMEM;
+			goto err;
+		}
+
+		hop_pte_addr[i] = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i,
+					hop_addr[i], scrambled_virt_addr);
+		if (hop_pte_addr[i] == U64_MAX) {
+			rc = -EINVAL;
+			goto err;
+		}
+
+		if (!hop_pte_addr[i]) {
+			rc = -EINVAL;
+			goto err;
+		}
+
+		curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[i];
+	}
+
+	if (curr_pte & PAGE_PRESENT_MASK) {
+		dev_err(hdev->dev,
+			"mapping already exists for virt_addr 0x%llx\n",
+				virt_addr);
+
+		for (i = 0 ; i <= hop_last ; i++)
+			dev_dbg(hdev->dev, "hop%d pte: 0x%llx (0x%llx)\n",
+				i, *(u64 *) (uintptr_t) hop_pte_addr[i],
+				hop_pte_addr[i]);
+
+		rc = -EINVAL;
+		goto err;
+	}
+
+	curr_pte = (scrambled_phys_addr & HOP_PHYS_ADDR_MASK)
+					| mmu_prop->last_mask | PAGE_PRESENT_MASK;
+
+	/* Write the PTEs */
+	hl_mmu_dr_write_final_pte(ctx, hop_pte_addr[hop_last], curr_pte);
+
+	/* for each new hop, add its address to the table of previous-hop */
+	for (i = 1 ; i <= hop_last ; i++) {
+		if (hop_new[i]) {
+			curr_pte = (hop_addr[i] & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
+			hl_mmu_dr_write_pte(ctx, hop_pte_addr[i - 1], curr_pte);
+
+			if (i - 1)
+				hl_mmu_dr_get_pte(ctx, hop_addr[i - 1]);
+		}
+	}
+	hl_mmu_dr_get_pte(ctx, hop_addr[hop_last]);
+
+	return 0;
+
+err:
+	for (i = 1 ; i <= hop_last ; i++)
+		if (hop_new[i] && (hop_addr[i] != U64_MAX))
+			hl_mmu_dr_free_hop(ctx, hop_addr[i]);
+
+	return rc;
+}
+
+/*
+ * hl_mmu_v2_swap_out - marks all mapping of the given ctx as swapped out
+ *
+ * @ctx: pointer to the context structure
+ *
+ */
+static void hl_mmu_v2_swap_out(struct hl_ctx *ctx)
+{
+
+}
+
+/*
+ * hl_mmu_v2_swap_in - marks all mapping of the given ctx as swapped in
+ *
+ * @ctx: pointer to the context structure
+ *
+ */
+static void hl_mmu_v2_swap_in(struct hl_ctx *ctx)
+{
+
+}
+
+static int hl_mmu_v2_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, struct hl_mmu_hop_info *hops)
+{
+	struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
+	struct hl_device *hdev = ctx->hdev;
+	struct hl_mmu_properties *mmu_prop;
+	bool is_dram_addr;
+	int i;
+
+	is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
+						prop->dmmu.start_addr,
+						prop->dmmu.end_addr);
+
+	/* device resident in V2 are allowed only for HMMU */
+	if (!is_dram_addr)
+		return -EINVAL;
+
+	mmu_prop = &prop->dmmu;
+	hops->range_type = HL_VA_RANGE_TYPE_DRAM;
+
+	hops->scrambled_vaddr = hdev->asic_funcs->scramble_addr(hdev, virt_addr);
+
+	hops->hop_info[0].hop_addr = hl_mmu_dr_get_phys_hop0_addr(ctx);
+	hops->hop_info[0].hop_pte_addr = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0,
+						hops->hop_info[0].hop_addr,
+							hops->scrambled_vaddr);
+	if (hops->hop_info[0].hop_pte_addr == U64_MAX)
+		return -EFAULT;
+
+	hops->hop_info[0].hop_pte_val = hdev->asic_funcs->read_pte(hdev,
+						hops->hop_info[0].hop_pte_addr);
+	if (hops->hop_info[0].hop_pte_val == U64_MAX)
+		return -EFAULT;
+
+	for (i = 1 ; i < mmu_prop->num_hops ; i++) {
+		hops->hop_info[i].hop_addr =
+			hl_mmu_get_next_hop_addr(ctx, hops->hop_info[i - 1].hop_pte_val);
+		if (hops->hop_info[i].hop_addr == ULLONG_MAX)
+			return -EFAULT;
+
+		hops->hop_info[i].hop_pte_addr =
+				hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i,
+						hops->hop_info[i].hop_addr,
+						hops->scrambled_vaddr);
+		if (hops->hop_info[i].hop_pte_addr == U64_MAX)
+			return -EFAULT;
+
+		hops->hop_info[i].hop_pte_val =
+				hdev->asic_funcs->read_pte(hdev,
+					hops->hop_info[i].hop_pte_addr);
+
+		if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK))
+			return -EFAULT;
+
+		if (hops->hop_info[i].hop_pte_val & mmu_prop->last_mask)
+			break;
+	}
+
+	/* if passed over all hops then no last hop was found */
+	if (i == mmu_prop->num_hops)
+		return -EFAULT;
+
+	if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK))
+		return -EFAULT;
+
+	if (hops->scrambled_vaddr != virt_addr)
+		hops->unscrambled_paddr = hdev->asic_funcs->descramble_addr
+				(hdev, hops->hop_info[i].hop_pte_val);
+	else
+		hops->unscrambled_paddr = hops->hop_info[i].hop_pte_val;
+
+	hops->used_hops = i + 1;
+
+	return 0;
+}
+
+/*
+ * hl_mmu_v2_prepare - prepare mmu_if for working with mmu v2
+ *
+ * @hdev: pointer to the device structure
+ * @mmu_if: pointer to the mmu interface structure
+ */
+void hl_mmu_v2_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu)
+{
+	mmu->init = hl_mmu_dr_init;
+	mmu->fini = hl_mmu_dr_fini;
+	mmu->ctx_init = hl_mmu_v2_ctx_init;
+	mmu->ctx_fini = hl_mmu_v2_ctx_fini;
+	mmu->map = hl_mmu_v2_map;
+	mmu->unmap = hl_mmu_v2_unmap;
+	mmu->flush = hl_mmu_dr_flush;
+	mmu->swap_out = hl_mmu_v2_swap_out;
+	mmu->swap_in = hl_mmu_v2_swap_in;
+	mmu->get_tlb_info = hl_mmu_v2_get_tlb_info;
+}
diff --git a/drivers/accel/habanalabs/common/mmu/mmu_v2_hr.c b/drivers/accel/habanalabs/common/mmu/mmu_v2_hr.c
index afe7ef964f..31507b2a43 100644
--- a/drivers/accel/habanalabs/common/mmu/mmu_v2_hr.c
+++ b/drivers/accel/habanalabs/common/mmu/mmu_v2_hr.c
@@ -47,7 +47,7 @@ static inline int hl_mmu_v2_hr_init(struct hl_device *hdev)
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
 
-	return hl_mmu_hr_init(hdev, &hdev->mmu_priv.hr, prop->mmu_hop_table_size,
+	return hl_mmu_hr_init(hdev, &hdev->mmu_priv.hr, prop->pmmu.hop_table_size,
 				prop->mmu_pgt_size);
 }
 
@@ -65,7 +65,7 @@ static inline void hl_mmu_v2_hr_fini(struct hl_device *hdev)
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
 
-	hl_mmu_hr_fini(hdev, &hdev->mmu_priv.hr, prop->mmu_hop_table_size);
+	hl_mmu_hr_fini(hdev, &hdev->mmu_priv.hr, prop->pmmu.hop_table_size);
 }
 
 /**
@@ -108,7 +108,7 @@ static void hl_mmu_v2_hr_ctx_fini(struct hl_ctx *ctx)
 			"pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",
 			pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes);
 		hl_mmu_hr_free_hop_remove_pgt(pgt_info, &ctx->hdev->mmu_priv.hr,
-							ctx->hdev->asic_prop.mmu_hop_table_size);
+							ctx->hdev->asic_prop.pmmu.hop_table_size);
 	}
 }
 
@@ -150,7 +150,7 @@ static int _hl_mmu_v2_hr_unmap(struct hl_ctx *ctx,
 
 		curr_pte = *(u64 *) (uintptr_t) hl_mmu_hr_pte_phys_to_virt(ctx, hops_pgt_info[i],
 							hop_pte_phys_addr[i],
-							ctx->hdev->asic_prop.mmu_hop_table_size);
+							ctx->hdev->asic_prop.pmmu.hop_table_size);
 
 		if ((i < hop_last) && (curr_pte & mmu_prop->last_mask)) {
 			hop_last = i;
@@ -169,14 +169,14 @@ static int _hl_mmu_v2_hr_unmap(struct hl_ctx *ctx,
 
 	for (i = hop_last ; i > 0 ; i--) {
 		hl_mmu_hr_clear_pte(ctx, hops_pgt_info[i], hop_pte_phys_addr[i],
-						ctx->hdev->asic_prop.mmu_hop_table_size);
+						ctx->hdev->asic_prop.pmmu.hop_table_size);
 
 		if (hl_mmu_hr_put_pte(ctx, hops_pgt_info[i], &ctx->hdev->mmu_priv.hr,
-						ctx->hdev->asic_prop.mmu_hop_table_size))
+						ctx->hdev->asic_prop.pmmu.hop_table_size))
 			goto mapped;
 	}
 	hl_mmu_hr_clear_pte(ctx, hops_pgt_info[0], hop_pte_phys_addr[0],
-						ctx->hdev->asic_prop.mmu_hop_table_size);
+						ctx->hdev->asic_prop.pmmu.hop_table_size);
 
 mapped:
 	return 0;
@@ -255,7 +255,7 @@ static int _hl_mmu_v2_hr_map(struct hl_ctx *ctx,
 									scrambled_virt_addr);
 		curr_pte = *(u64 *) (uintptr_t) hl_mmu_hr_pte_phys_to_virt(ctx, hops_pgt_info[i],
 							hop_pte_phys_addr[i],
-							ctx->hdev->asic_prop.mmu_hop_table_size);
+							ctx->hdev->asic_prop.pmmu.hop_table_size);
 	}
 
 	if (curr_pte & PAGE_PRESENT_MASK) {
@@ -268,7 +268,7 @@ static int _hl_mmu_v2_hr_map(struct hl_ctx *ctx,
 					*(u64 *) (uintptr_t)
 					hl_mmu_hr_pte_phys_to_virt(ctx, hops_pgt_info[i],
 							hop_pte_phys_addr[i],
-							ctx->hdev->asic_prop.mmu_hop_table_size),
+							ctx->hdev->asic_prop.pmmu.hop_table_size),
 					hop_pte_phys_addr[i]);
 		rc = -EINVAL;
 		goto err;
@@ -279,7 +279,7 @@ static int _hl_mmu_v2_hr_map(struct hl_ctx *ctx,
 
 	/* Write the PTEs */
 	hl_mmu_hr_write_pte(ctx, hops_pgt_info[hop_last], hop_pte_phys_addr[hop_last], curr_pte,
-							ctx->hdev->asic_prop.mmu_hop_table_size);
+							ctx->hdev->asic_prop.pmmu.hop_table_size);
 
 	/* for each new hop, add its address to the table of previous-hop */
 	for (i = 1 ; i <= hop_last ; i++) {
@@ -287,7 +287,7 @@ static int _hl_mmu_v2_hr_map(struct hl_ctx *ctx,
 			curr_pte = (hops_pgt_info[i]->phys_addr & HOP_PHYS_ADDR_MASK) |
 							PAGE_PRESENT_MASK;
 			hl_mmu_hr_write_pte(ctx, hops_pgt_info[i - 1], hop_pte_phys_addr[i - 1],
-						curr_pte, ctx->hdev->asic_prop.mmu_hop_table_size);
+						curr_pte, ctx->hdev->asic_prop.pmmu.hop_table_size);
 			if (i - 1)
 				hl_mmu_hr_get_pte(ctx, &ctx->hdev->mmu_func[MMU_HR_PGT].hr_funcs,
 								hops_pgt_info[i - 1]->phys_addr);
@@ -303,7 +303,7 @@ err:
 	for (i = 1 ; i <= hop_last ; i++)
 		if (hop_new[i] && hops_pgt_info[i])
 			hl_mmu_hr_free_hop_remove_pgt(hops_pgt_info[i], &ctx->hdev->mmu_priv.hr,
-							ctx->hdev->asic_prop.mmu_hop_table_size);
+							ctx->hdev->asic_prop.pmmu.hop_table_size);
 
 	return rc;
 }
diff --git a/drivers/accel/habanalabs/common/security.c b/drivers/accel/habanalabs/common/security.c
index fe913965db..5402a3cd04 100644
--- a/drivers/accel/habanalabs/common/security.c
+++ b/drivers/accel/habanalabs/common/security.c
@@ -7,15 +7,31 @@
 
 #include "habanalabs.h"
 
-static const char * const hl_glbl_error_cause[HL_MAX_NUM_OF_GLBL_ERR_CAUSE] = {
+static const char * const hl_glbl_error_cause[] = {
 	"Error due to un-priv read",
 	"Error due to un-secure read",
 	"Error due to read from unmapped reg",
 	"Error due to un-priv write",
 	"Error due to un-secure write",
 	"Error due to write to unmapped reg",
+	"N/A",
+	"N/A",
+	"N/A",
+	"N/A",
+	"N/A",
+	"N/A",
+	"N/A",
+	"N/A",
+	"N/A",
+	"N/A",
 	"External I/F write sec violation",
 	"External I/F write to un-mapped reg",
+	"N/A",
+	"N/A",
+	"N/A",
+	"N/A",
+	"N/A",
+	"N/A",
 	"Read to write only",
 	"Write to read only"
 };
@@ -671,10 +687,11 @@ static bool hl_check_block_range_exclusion(struct hl_device *hdev,
 static int hl_read_glbl_errors(struct hl_device *hdev,
 		u32 blk_idx, u32 major, u32 minor, u32 sub_minor, void *data)
 {
-	struct hl_special_block_info *special_blocks = hdev->asic_prop.special_blocks;
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	struct hl_special_block_info *special_blocks = prop->special_blocks;
 	struct hl_special_block_info *current_block = &special_blocks[blk_idx];
 	u32 glbl_err_addr, glbl_err_cause, addr_val, cause_val, block_base,
-		base = current_block->base_addr - lower_32_bits(hdev->asic_prop.cfg_base_address);
+		base = current_block->base_addr - lower_32_bits(prop->cfg_base_address);
 	int i;
 
 	block_base = base + major * current_block->major_offset +
@@ -689,13 +706,13 @@ static int hl_read_glbl_errors(struct hl_device *hdev,
 	glbl_err_addr = block_base + HL_GLBL_ERR_ADDR_OFFSET;
 	addr_val = RREG32(glbl_err_addr);
 
-	for (i = 0 ; i < hdev->asic_prop.glbl_err_cause_num ; i++) {
+	for (i = 0 ; i <= prop->glbl_err_max_cause_num ; i++) {
 		if (cause_val & BIT(i))
 			dev_err_ratelimited(hdev->dev,
-				"%s, addr %#llx\n",
-				hl_glbl_error_cause[i],
-				hdev->asic_prop.cfg_base_address + block_base +
-				FIELD_GET(HL_GLBL_ERR_ADDRESS_MASK, addr_val));
+					"%s, addr %#llx\n",
+					hl_glbl_error_cause[i],
+					prop->cfg_base_address + block_base +
+						FIELD_GET(HL_GLBL_ERR_ADDRESS_MASK, addr_val));
 	}
 
 	WREG32(glbl_err_cause, cause_val);
diff --git a/drivers/accel/habanalabs/common/security.h b/drivers/accel/habanalabs/common/security.h
index d7a3b3e82e..476f70687c 100644
--- a/drivers/accel/habanalabs/common/security.h
+++ b/drivers/accel/habanalabs/common/security.h
@@ -13,8 +13,7 @@
 struct hl_device;
 
 /* special blocks */
-#define HL_MAX_NUM_OF_GLBL_ERR_CAUSE		10
-#define HL_GLBL_ERR_ADDRESS_MASK		GENMASK(11, 0)
+#define HL_GLBL_ERR_ADDRESS_MASK	GENMASK(11, 0)
 /* GLBL_ERR_ADDR register offset from the start of the block */
 #define HL_GLBL_ERR_ADDR_OFFSET		0xF44
 /* GLBL_ERR_CAUSE register offset from the start of the block */