summaryrefslogtreecommitdiffstats
path: root/drivers/accel
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-08-07 13:11:27 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-08-07 13:11:27 +0000
commit34996e42f82bfd60bc2c191e5cae3c6ab233ec6c (patch)
tree62db60558cbf089714b48daeabca82bf2b20b20e /drivers/accel
parentAdding debian version 6.8.12-1. (diff)
downloadlinux-34996e42f82bfd60bc2c191e5cae3c6ab233ec6c.tar.xz
linux-34996e42f82bfd60bc2c191e5cae3c6ab233ec6c.zip
Merging upstream version 6.9.7.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/accel')
-rw-r--r--drivers/accel/drm_accel.c2
-rw-r--r--drivers/accel/habanalabs/common/command_submission.c3
-rw-r--r--drivers/accel/habanalabs/common/debugfs.c18
-rw-r--r--drivers/accel/habanalabs/common/device.c55
-rw-r--r--drivers/accel/habanalabs/common/firmware_if.c25
-rw-r--r--drivers/accel/habanalabs/common/habanalabs.h41
-rw-r--r--drivers/accel/habanalabs/common/hw_queue.c17
-rw-r--r--drivers/accel/habanalabs/common/hwmon.c29
-rw-r--r--drivers/accel/habanalabs/common/mmu/Makefile2
-rw-r--r--drivers/accel/habanalabs/common/mmu/mmu.c223
-rw-r--r--drivers/accel/habanalabs/common/mmu/mmu_v1.c354
-rw-r--r--drivers/accel/habanalabs/common/mmu/mmu_v2.c338
-rw-r--r--drivers/accel/habanalabs/common/mmu/mmu_v2_hr.c24
-rw-r--r--drivers/accel/habanalabs/common/security.c33
-rw-r--r--drivers/accel/habanalabs/common/security.h3
-rw-r--r--drivers/accel/habanalabs/gaudi/gaudi.c9
-rw-r--r--drivers/accel/habanalabs/gaudi2/gaudi2.c308
-rw-r--r--drivers/accel/habanalabs/gaudi2/gaudi2P.h15
-rw-r--r--drivers/accel/habanalabs/goya/goya.c12
-rw-r--r--drivers/accel/habanalabs/goya/goya_coresight.c3
-rw-r--r--drivers/accel/habanalabs/include/hw_ip/mmu/mmu_general.h2
-rw-r--r--drivers/accel/ivpu/ivpu_debugfs.c32
-rw-r--r--drivers/accel/ivpu/ivpu_drv.c12
-rw-r--r--drivers/accel/ivpu/ivpu_drv.h7
-rw-r--r--drivers/accel/ivpu/ivpu_fw.c49
-rw-r--r--drivers/accel/ivpu/ivpu_fw_log.c6
-rw-r--r--drivers/accel/ivpu/ivpu_gem.c70
-rw-r--r--drivers/accel/ivpu/ivpu_gem.h6
-rw-r--r--drivers/accel/ivpu/ivpu_hw_37xx.c10
-rw-r--r--drivers/accel/ivpu/ivpu_hw_40xx.c10
-rw-r--r--drivers/accel/ivpu/ivpu_ipc.c12
-rw-r--r--drivers/accel/ivpu/ivpu_job.c20
-rw-r--r--drivers/accel/ivpu/ivpu_pm.c12
-rw-r--r--drivers/accel/ivpu/vpu_boot_api.h46
-rw-r--r--drivers/accel/ivpu/vpu_jsm_api.h32
-rw-r--r--drivers/accel/qaic/mhi_controller.c6
-rw-r--r--drivers/accel/qaic/qaic.h3
-rw-r--r--drivers/accel/qaic/qaic_data.c59
-rw-r--r--drivers/accel/qaic/qaic_drv.c140
39 files changed, 1321 insertions, 727 deletions
diff --git a/drivers/accel/drm_accel.c b/drivers/accel/drm_accel.c
index 24cac4c027..16c3edb8c4 100644
--- a/drivers/accel/drm_accel.c
+++ b/drivers/accel/drm_accel.c
@@ -23,7 +23,7 @@ static struct idr accel_minors_idr;
static struct dentry *accel_debugfs_root;
-static struct device_type accel_sysfs_device_minor = {
+static const struct device_type accel_sysfs_device_minor = {
.name = "accel_minor"
};
diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c
index 3aa6eeef44..39e23d625a 100644
--- a/drivers/accel/habanalabs/common/command_submission.c
+++ b/drivers/accel/habanalabs/common/command_submission.c
@@ -1360,9 +1360,8 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
return -EINVAL;
}
- if (!hl_device_operational(hdev, &status)) {
+ if (!hl_device_operational(hdev, &status))
return -EBUSY;
- }
if ((args->in.cs_flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
!hdev->supports_staged_submission) {
diff --git a/drivers/accel/habanalabs/common/debugfs.c b/drivers/accel/habanalabs/common/debugfs.c
index 01f071d525..b1c88d1837 100644
--- a/drivers/accel/habanalabs/common/debugfs.c
+++ b/drivers/accel/habanalabs/common/debugfs.c
@@ -484,7 +484,7 @@ static ssize_t mmu_asid_va_write(struct file *file, const char __user *buf,
struct hl_debugfs_entry *entry = s->private;
struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
struct hl_device *hdev = dev_entry->hdev;
- char kbuf[MMU_KBUF_SIZE];
+ char kbuf[MMU_KBUF_SIZE] = {0};
char *c;
ssize_t rc;
@@ -546,7 +546,7 @@ static ssize_t mmu_ack_error_value_write(struct file *file,
struct hl_debugfs_entry *entry = s->private;
struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
struct hl_device *hdev = dev_entry->hdev;
- char kbuf[MMU_KBUF_SIZE];
+ char kbuf[MMU_KBUF_SIZE] = {0};
ssize_t rc;
if (count > sizeof(kbuf) - 1)
@@ -1643,19 +1643,19 @@ static void add_files_to_device(struct hl_device *hdev, struct hl_dbg_device_ent
&hl_data64b_fops);
debugfs_create_file("set_power_state",
- 0200,
+ 0644,
root,
dev_entry,
&hl_power_fops);
debugfs_create_file("device",
- 0200,
+ 0644,
root,
dev_entry,
&hl_device_fops);
debugfs_create_file("clk_gate",
- 0200,
+ 0644,
root,
dev_entry,
&hl_clk_gate_fops);
@@ -1667,13 +1667,13 @@ static void add_files_to_device(struct hl_device *hdev, struct hl_dbg_device_ent
&hl_stop_on_err_fops);
debugfs_create_file("dump_security_violations",
- 0644,
+ 0400,
root,
dev_entry,
&hl_security_violations_fops);
debugfs_create_file("dump_razwi_events",
- 0644,
+ 0400,
root,
dev_entry,
&hl_razwi_check_fops);
@@ -1706,7 +1706,7 @@ static void add_files_to_device(struct hl_device *hdev, struct hl_dbg_device_ent
&hdev->reset_info.skip_reset_on_timeout);
debugfs_create_file("state_dump",
- 0600,
+ 0644,
root,
dev_entry,
&hl_state_dump_fops);
@@ -1724,7 +1724,7 @@ static void add_files_to_device(struct hl_device *hdev, struct hl_dbg_device_ent
for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
debugfs_create_file(hl_debugfs_list[i].name,
- 0444,
+ 0644,
root,
entry,
&hl_debugfs_fops);
diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c
index a73bd4be94..8f92445c5a 100644
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -55,7 +55,8 @@ static u64 hl_set_dram_bar(struct hl_device *hdev, u64 addr, struct pci_mem_regi
if (is_power_of_2(prop->dram_pci_bar_size))
bar_base_addr = addr & ~(prop->dram_pci_bar_size - 0x1ull);
else
- bar_base_addr = DIV_ROUND_DOWN_ULL(addr, prop->dram_pci_bar_size) *
+ bar_base_addr = region->region_base +
+ div64_u64((addr - region->region_base), prop->dram_pci_bar_size) *
prop->dram_pci_bar_size;
old_base = hdev->asic_funcs->set_dram_bar_base(hdev, bar_base_addr);
@@ -1034,14 +1035,14 @@ static void device_early_fini(struct hl_device *hdev)
static bool is_pci_link_healthy(struct hl_device *hdev)
{
- u16 vendor_id;
+ u16 device_id;
if (!hdev->pdev)
return false;
- pci_read_config_word(hdev->pdev, PCI_VENDOR_ID, &vendor_id);
+ pci_read_config_word(hdev->pdev, PCI_DEVICE_ID, &device_id);
- return (vendor_id == PCI_VENDOR_ID_HABANALABS);
+ return (device_id == hdev->pdev->device);
}
static int hl_device_eq_heartbeat_check(struct hl_device *hdev)
@@ -1768,14 +1769,16 @@ kill_processes:
hdev->device_cpu_disabled = false;
hdev->reset_info.hard_reset_pending = false;
+ /*
+ * Put the device in an unusable state if there are 2 back to back resets due to
+ * fatal errors.
+ */
if (hdev->reset_info.reset_trigger_repeated &&
- (hdev->reset_info.prev_reset_trigger ==
- HL_DRV_RESET_FW_FATAL_ERR)) {
- /* if there 2 back to back resets from FW,
- * ensure driver puts the driver in a unusable state
- */
+ (hdev->reset_info.prev_reset_trigger == HL_DRV_RESET_FW_FATAL_ERR ||
+ hdev->reset_info.prev_reset_trigger ==
+ HL_DRV_RESET_HEARTBEAT)) {
dev_crit(hdev->dev,
- "%s Consecutive FW fatal errors received, stopping hard reset\n",
+ "%s Consecutive fatal errors, stopping hard reset\n",
dev_name(&(hdev)->pdev->dev));
rc = -EIO;
goto out_err;
@@ -2801,3 +2804,35 @@ void hl_enable_err_info_capture(struct hl_error_info *captured_err_info)
atomic_set(&captured_err_info->cs_timeout.write_enable, 1);
captured_err_info->undef_opcode.write_enable = true;
}
+
+void hl_init_cpu_for_irq(struct hl_device *hdev)
+{
+#ifdef CONFIG_NUMA
+ struct cpumask *available_mask = &hdev->irq_affinity_mask;
+ int numa_node = hdev->pdev->dev.numa_node, i;
+ static struct cpumask cpu_mask;
+
+ if (numa_node < 0)
+ return;
+
+ if (!cpumask_and(&cpu_mask, cpumask_of_node(numa_node), cpu_online_mask)) {
+ dev_err(hdev->dev, "No available affinities in current numa node\n");
+ return;
+ }
+
+ /* Remove HT siblings */
+ for_each_cpu(i, &cpu_mask)
+ cpumask_set_cpu(cpumask_first(topology_sibling_cpumask(i)), available_mask);
+#endif
+}
+
+void hl_set_irq_affinity(struct hl_device *hdev, int irq)
+{
+ if (cpumask_empty(&hdev->irq_affinity_mask)) {
+ dev_dbg(hdev->dev, "affinity mask is empty\n");
+ return;
+ }
+
+ if (irq_set_affinity_and_hint(irq, &hdev->irq_affinity_mask))
+ dev_err(hdev->dev, "Failed setting irq %d affinity\n", irq);
+}
diff --git a/drivers/accel/habanalabs/common/firmware_if.c b/drivers/accel/habanalabs/common/firmware_if.c
index 3558a6a8e1..4bd02778a9 100644
--- a/drivers/accel/habanalabs/common/firmware_if.c
+++ b/drivers/accel/habanalabs/common/firmware_if.c
@@ -501,7 +501,7 @@ int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type)
0, &result);
if (rc)
- dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type);
+ dev_err(hdev->dev, "failed to unmask event %d", event_type);
return rc;
}
@@ -540,7 +540,7 @@ int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr,
total_pkt_size, 0, &result);
if (rc)
- dev_err(hdev->dev, "failed to unmask IRQ array\n");
+ dev_err(hdev->dev, "failed to unmask event array\n");
kfree(pkt);
@@ -2718,18 +2718,20 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
}
+ rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader, sizeof(struct lkd_msg_comms));
+ if (rc)
+ goto protocol_err;
+
+ if (hdev->asic_prop.support_dynamic_resereved_fw_size)
+ hdev->asic_prop.reserved_fw_mem_size =
+ le32_to_cpu(fw_loader->dynamic_loader.comm_desc.rsvd_mem_size_mb) * SZ_1M;
+
if (!(hdev->fw_components & FW_TYPE_BOOT_CPU)) {
struct lkd_fw_binning_info *binning_info;
- rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader,
- sizeof(struct lkd_msg_comms));
- if (rc)
- goto protocol_err;
-
/* read preboot version */
rc = hl_fw_dynamic_read_device_fw_version(hdev, FW_COMP_PREBOOT,
fw_loader->dynamic_loader.comm_desc.cur_fw_ver);
-
if (rc)
return rc;
@@ -2756,11 +2758,6 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
hdev->decoder_binning, hdev->rotator_binning);
}
- if (hdev->asic_prop.support_dynamic_resereved_fw_size) {
- hdev->asic_prop.reserved_fw_mem_size =
- le32_to_cpu(fw_loader->dynamic_loader.comm_desc.rsvd_mem_size_mb);
- }
-
return 0;
}
@@ -2795,7 +2792,7 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
hdev->asic_funcs->init_cpu_scrambler_dram(hdev);
if (!(hdev->fw_components & FW_TYPE_LINUX)) {
- dev_info(hdev->dev, "Skip loading Linux F/W\n");
+ dev_dbg(hdev->dev, "Skip loading Linux F/W\n");
return 0;
}
diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index 41c7aac2ff..48f0f3eea1 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -443,18 +443,22 @@ enum hl_collective_mode {
* a CB handle can be provided for jobs on this queue.
* Otherwise, a CB address must be provided.
* @collective_mode: collective mode of current queue
+ * @q_dram_bd_address: PQ dram address, used when PQ need to reside in DRAM.
* @driver_only: true if only the driver is allowed to send a job to this queue,
* false otherwise.
* @binned: True if the queue is binned out and should not be used
* @supports_sync_stream: True if queue supports sync stream
+ * @dram_bd: True if the bd should be copied to dram, needed for PQ which has been allocated on dram
*/
struct hw_queue_properties {
enum hl_queue_type type;
enum queue_cb_alloc_flags cb_alloc_flags;
enum hl_collective_mode collective_mode;
+ u64 q_dram_bd_address;
u8 driver_only;
u8 binned;
u8 supports_sync_stream;
+ u8 dram_bd;
};
/**
@@ -590,8 +594,6 @@ struct hl_hints_range {
* we display to the user
* @mmu_pgt_size: MMU page tables total size.
* @mmu_pte_size: PTE size in MMU page tables.
- * @mmu_hop_table_size: MMU hop table size.
- * @mmu_hop0_tables_total_size: total size of MMU hop0 tables.
* @dram_page_size: The DRAM physical page size.
* @cfg_size: configuration space size on SRAM.
* @sram_size: total size of SRAM.
@@ -645,10 +647,10 @@ struct hl_hints_range {
* @num_engine_cores: number of engine cpu cores.
* @max_num_of_engines: maximum number of all engines in the ASIC.
* @num_of_special_blocks: special_blocks array size.
- * @glbl_err_cause_num: global err cause number.
+ * @glbl_err_max_cause_num: global err max cause number.
* @hbw_flush_reg: register to read to generate HBW flush. value of 0 means HBW flush is
* not supported.
- * @reserved_fw_mem_size: size in MB of dram memory reserved for FW.
+ * @reserved_fw_mem_size: size of dram memory reserved for FW.
* @collective_first_sob: first sync object available for collective use
* @collective_first_mon: first monitor available for collective use
* @sync_stream_first_sob: first sync object available for sync stream use
@@ -743,8 +745,6 @@ struct asic_fixed_properties {
u32 clk_pll_index;
u32 mmu_pgt_size;
u32 mmu_pte_size;
- u32 mmu_hop_table_size;
- u32 mmu_hop0_tables_total_size;
u32 dram_page_size;
u32 cfg_size;
u32 sram_size;
@@ -779,7 +779,7 @@ struct asic_fixed_properties {
u32 num_engine_cores;
u32 max_num_of_engines;
u32 num_of_special_blocks;
- u32 glbl_err_cause_num;
+ u32 glbl_err_max_cause_num;
u32 hbw_flush_reg;
u32 reserved_fw_mem_size;
u16 collective_first_sob;
@@ -1052,6 +1052,8 @@ struct hl_encaps_signals_mgr {
* @collective_mode: collective mode of current queue
* @kernel_address: holds the queue's kernel virtual address.
* @bus_address: holds the queue's DMA address.
+ * @pq_dram_address: hold the dram address when the PQ is allocated, used when dram_bd is true in
+ * queue properites.
* @pi: holds the queue's pi value.
* @ci: holds the queue's ci value, AS CALCULATED BY THE DRIVER (not real ci).
* @hw_queue_id: the id of the H/W queue.
@@ -1061,6 +1063,7 @@ struct hl_encaps_signals_mgr {
* @valid: is the queue valid (we have array of 32 queues, not all of them
* exist).
* @supports_sync_stream: True if queue supports sync stream
+ * @dram_bd: True if the bd should be copied to dram, needed for PQ which has been allocated on dram
*/
struct hl_hw_queue {
struct hl_cs_job **shadow_queue;
@@ -1069,6 +1072,7 @@ struct hl_hw_queue {
enum hl_collective_mode collective_mode;
void *kernel_address;
dma_addr_t bus_address;
+ u64 pq_dram_address;
u32 pi;
atomic_t ci;
u32 hw_queue_id;
@@ -1077,6 +1081,7 @@ struct hl_hw_queue {
u16 int_queue_len;
u8 valid;
u8 supports_sync_stream;
+ u8 dram_bd;
};
/**
@@ -3257,6 +3262,7 @@ struct hl_reset_info {
* @clk_throttling: holds information about current/previous clock throttling events
* @captured_err_info: holds information about errors.
* @reset_info: holds current device reset information.
+ * @irq_affinity_mask: mask of available CPU cores for user and decoder interrupt handling.
* @stream_master_qid_arr: pointer to array with QIDs of master streams.
* @fw_inner_major_ver: the major of current loaded preboot inner version.
* @fw_inner_minor_ver: the minor of current loaded preboot inner version.
@@ -3446,6 +3452,8 @@ struct hl_device {
struct hl_reset_info reset_info;
+ cpumask_t irq_affinity_mask;
+
u32 *stream_master_qid_arr;
u32 fw_inner_major_ver;
u32 fw_inner_minor_ver;
@@ -3886,6 +3894,7 @@ int hl_mmu_hr_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, struct hl_mmu_hop_
struct hl_hr_mmu_funcs *hr_func);
int hl_mmu_if_set_funcs(struct hl_device *hdev);
void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu);
+void hl_mmu_v2_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu);
void hl_mmu_v2_hr_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu);
int hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr);
int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
@@ -3893,6 +3902,22 @@ int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
u64 hl_mmu_scramble_addr(struct hl_device *hdev, u64 addr);
u64 hl_mmu_descramble_addr(struct hl_device *hdev, u64 addr);
bool hl_is_dram_va(struct hl_device *hdev, u64 virt_addr);
+struct pgt_info *hl_mmu_dr_get_pgt_info(struct hl_ctx *ctx, u64 hop_addr);
+void hl_mmu_dr_free_hop(struct hl_ctx *ctx, u64 hop_addr);
+void hl_mmu_dr_free_pgt_node(struct hl_ctx *ctx, struct pgt_info *pgt_info);
+u64 hl_mmu_dr_get_phys_hop0_addr(struct hl_ctx *ctx);
+u64 hl_mmu_dr_get_hop0_addr(struct hl_ctx *ctx);
+void hl_mmu_dr_write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val);
+void hl_mmu_dr_write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val);
+void hl_mmu_dr_clear_pte(struct hl_ctx *ctx, u64 pte_addr);
+u64 hl_mmu_dr_get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr);
+void hl_mmu_dr_get_pte(struct hl_ctx *ctx, u64 hop_addr);
+int hl_mmu_dr_put_pte(struct hl_ctx *ctx, u64 hop_addr);
+u64 hl_mmu_dr_get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte, bool *is_new_hop);
+u64 hl_mmu_dr_alloc_hop(struct hl_ctx *ctx);
+void hl_mmu_dr_flush(struct hl_ctx *ctx);
+int hl_mmu_dr_init(struct hl_device *hdev);
+void hl_mmu_dr_fini(struct hl_device *hdev);
int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
void __iomem *dst, u32 src_offset, u32 size);
@@ -4032,6 +4057,8 @@ void hl_handle_critical_hw_err(struct hl_device *hdev, u16 event_id, u64 *event_
void hl_handle_fw_err(struct hl_device *hdev, struct hl_info_fw_err_info *info);
void hl_capture_engine_err(struct hl_device *hdev, u16 engine_id, u16 error_count);
void hl_enable_err_info_capture(struct hl_error_info *captured_err_info);
+void hl_init_cpu_for_irq(struct hl_device *hdev);
+void hl_set_irq_affinity(struct hl_device *hdev, int irq);
#ifdef CONFIG_DEBUG_FS
diff --git a/drivers/accel/habanalabs/common/hw_queue.c b/drivers/accel/habanalabs/common/hw_queue.c
index d0087c0ec4..3d04a7507c 100644
--- a/drivers/accel/habanalabs/common/hw_queue.c
+++ b/drivers/accel/habanalabs/common/hw_queue.c
@@ -84,6 +84,8 @@ void hl_hw_queue_submit_bd(struct hl_device *hdev, struct hl_hw_queue *q,
u32 ctl, u32 len, u64 ptr)
{
struct hl_bd *bd;
+ u64 addr;
+ int i;
bd = q->kernel_address;
bd += hl_pi_2_offset(q->pi);
@@ -91,7 +93,16 @@ void hl_hw_queue_submit_bd(struct hl_device *hdev, struct hl_hw_queue *q,
bd->len = cpu_to_le32(len);
bd->ptr = cpu_to_le64(ptr);
+ if (q->dram_bd)
+ for (i = 0 ; i < 2 ; i++) {
+ addr = q->pq_dram_address +
+ ((hl_pi_2_offset(q->pi) * sizeof(struct hl_bd)) + (i * sizeof(u64)));
+ hdev->asic_funcs->access_dev_mem(hdev, PCI_REGION_DRAM, addr,
+ (u64 *)(bd) + i, DEBUGFS_WRITE64);
+ }
+
q->pi = hl_queue_inc_ptr(q->pi);
+
hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi);
}
@@ -1087,12 +1098,18 @@ int hl_hw_queues_create(struct hl_device *hdev)
q->supports_sync_stream =
asic->hw_queues_props[i].supports_sync_stream;
q->collective_mode = asic->hw_queues_props[i].collective_mode;
+ q->dram_bd = asic->hw_queues_props[i].dram_bd;
+
rc = queue_init(hdev, q, i);
if (rc) {
dev_err(hdev->dev,
"failed to initialize queue %d\n", i);
goto release_queues;
}
+
+ /* Set DRAM PQ address for the queue if it should be at DRAM */
+ if (q->dram_bd)
+ q->pq_dram_address = asic->hw_queues_props[i].q_dram_bd_address;
}
return 0;
diff --git a/drivers/accel/habanalabs/common/hwmon.c b/drivers/accel/habanalabs/common/hwmon.c
index 1ee2ee07e9..36b951b5f5 100644
--- a/drivers/accel/habanalabs/common/hwmon.c
+++ b/drivers/accel/habanalabs/common/hwmon.c
@@ -46,7 +46,7 @@ static u32 fixup_flags_legacy_fw(struct hl_device *hdev, enum hwmon_sensor_types
break;
default:
- dev_err(hdev->dev, "unsupported h/w sensor type %d\n", type);
+ dev_err_ratelimited(hdev->dev, "unsupported h/w sensor type %d\n", type);
flags = cpucp_flags;
break;
}
@@ -134,7 +134,7 @@ static u32 adjust_hwmon_flags(struct hl_device *hdev, enum hwmon_sensor_types ty
break;
default:
- dev_err(hdev->dev, "unsupported h/w sensor type %d\n", type);
+ dev_err_ratelimited(hdev->dev, "unsupported h/w sensor type %d\n", type);
flags = cpucp_flags;
break;
}
@@ -162,7 +162,8 @@ int hl_build_hwmon_channel_info(struct hl_device *hdev, struct cpucp_sensor *sen
break;
if (type >= HWMON_NR_SENSOR_TYPES) {
- dev_err(hdev->dev, "Got wrong sensor type %d from device\n", type);
+ dev_err_ratelimited(hdev->dev,
+ "Got wrong sensor type %d from device\n", type);
return -EINVAL;
}
@@ -584,7 +585,7 @@ int hl_get_temperature(struct hl_device *hdev,
*value = (long) result;
if (rc) {
- dev_err(hdev->dev,
+ dev_err_ratelimited(hdev->dev,
"Failed to get temperature from sensor %d, error %d\n",
sensor_index, rc);
*value = 0;
@@ -611,7 +612,7 @@ int hl_set_temperature(struct hl_device *hdev,
0, NULL);
if (rc)
- dev_err(hdev->dev,
+ dev_err_ratelimited(hdev->dev,
"Failed to set temperature of sensor %d, error %d\n",
sensor_index, rc);
@@ -638,7 +639,7 @@ int hl_get_voltage(struct hl_device *hdev,
*value = (long) result;
if (rc) {
- dev_err(hdev->dev,
+ dev_err_ratelimited(hdev->dev,
"Failed to get voltage from sensor %d, error %d\n",
sensor_index, rc);
*value = 0;
@@ -667,7 +668,7 @@ int hl_get_current(struct hl_device *hdev,
*value = (long) result;
if (rc) {
- dev_err(hdev->dev,
+ dev_err_ratelimited(hdev->dev,
"Failed to get current from sensor %d, error %d\n",
sensor_index, rc);
*value = 0;
@@ -696,7 +697,7 @@ int hl_get_fan_speed(struct hl_device *hdev,
*value = (long) result;
if (rc) {
- dev_err(hdev->dev,
+ dev_err_ratelimited(hdev->dev,
"Failed to get fan speed from sensor %d, error %d\n",
sensor_index, rc);
*value = 0;
@@ -725,7 +726,7 @@ int hl_get_pwm_info(struct hl_device *hdev,
*value = (long) result;
if (rc) {
- dev_err(hdev->dev,
+ dev_err_ratelimited(hdev->dev,
"Failed to get pwm info from sensor %d, error %d\n",
sensor_index, rc);
*value = 0;
@@ -752,7 +753,7 @@ void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
0, NULL);
if (rc)
- dev_err(hdev->dev,
+ dev_err_ratelimited(hdev->dev,
"Failed to set pwm info to sensor %d, error %d\n",
sensor_index, rc);
}
@@ -775,7 +776,7 @@ int hl_set_voltage(struct hl_device *hdev,
0, NULL);
if (rc)
- dev_err(hdev->dev,
+ dev_err_ratelimited(hdev->dev,
"Failed to set voltage of sensor %d, error %d\n",
sensor_index, rc);
@@ -800,7 +801,7 @@ int hl_set_current(struct hl_device *hdev,
0, NULL);
if (rc)
- dev_err(hdev->dev,
+ dev_err_ratelimited(hdev->dev,
"Failed to set current of sensor %d, error %d\n",
sensor_index, rc);
@@ -831,7 +832,7 @@ int hl_set_power(struct hl_device *hdev,
0, NULL);
if (rc)
- dev_err(hdev->dev,
+ dev_err_ratelimited(hdev->dev,
"Failed to set power of sensor %d, error %d\n",
sensor_index, rc);
@@ -858,7 +859,7 @@ int hl_get_power(struct hl_device *hdev,
*value = (long) result;
if (rc) {
- dev_err(hdev->dev,
+ dev_err_ratelimited(hdev->dev,
"Failed to get power of sensor %d, error %d\n",
sensor_index, rc);
*value = 0;
diff --git a/drivers/accel/habanalabs/common/mmu/Makefile b/drivers/accel/habanalabs/common/mmu/Makefile
index 1806c524e0..f4b815bf4f 100644
--- a/drivers/accel/habanalabs/common/mmu/Makefile
+++ b/drivers/accel/habanalabs/common/mmu/Makefile
@@ -1,3 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
HL_COMMON_MMU_FILES := common/mmu/mmu.o common/mmu/mmu_v1.o \
- common/mmu/mmu_v2_hr.o
+ common/mmu/mmu_v2.o common/mmu/mmu_v2_hr.o
diff --git a/drivers/accel/habanalabs/common/mmu/mmu.c b/drivers/accel/habanalabs/common/mmu/mmu.c
index b654302a68..d3eaab9084 100644
--- a/drivers/accel/habanalabs/common/mmu/mmu.c
+++ b/drivers/accel/habanalabs/common/mmu/mmu.c
@@ -585,6 +585,8 @@ int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
int hl_mmu_if_set_funcs(struct hl_device *hdev)
{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+
if (hdev->mmu_disable)
return 0;
@@ -597,8 +599,9 @@ int hl_mmu_if_set_funcs(struct hl_device *hdev)
case ASIC_GAUDI2:
case ASIC_GAUDI2B:
case ASIC_GAUDI2C:
- /* MMUs in Gaudi2 are always host resident */
- hl_mmu_v2_hr_set_funcs(hdev, &hdev->mmu_func[MMU_HR_PGT]);
+ hl_mmu_v2_set_funcs(hdev, &hdev->mmu_func[MMU_DR_PGT]);
+ if (prop->pmmu.host_resident)
+ hl_mmu_v2_hr_set_funcs(hdev, &hdev->mmu_func[MMU_HR_PGT]);
break;
default:
dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
@@ -1209,3 +1212,219 @@ int hl_mmu_hr_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, struct hl_mmu_hop_
return 0;
}
+struct pgt_info *hl_mmu_dr_get_pgt_info(struct hl_ctx *ctx, u64 hop_addr)
+{
+ struct pgt_info *pgt_info = NULL;
+
+ hash_for_each_possible(ctx->mmu_shadow_hash, pgt_info, node,
+ (unsigned long) hop_addr)
+ if (hop_addr == pgt_info->shadow_addr)
+ break;
+
+ return pgt_info;
+}
+
+void hl_mmu_dr_free_hop(struct hl_ctx *ctx, u64 hop_addr)
+{
+ struct pgt_info *pgt_info = hl_mmu_dr_get_pgt_info(ctx, hop_addr);
+
+ hl_mmu_dr_free_pgt_node(ctx, pgt_info);
+}
+
+void hl_mmu_dr_free_pgt_node(struct hl_ctx *ctx, struct pgt_info *pgt_info)
+{
+ struct hl_device *hdev = ctx->hdev;
+
+ gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool, pgt_info->phys_addr,
+ hdev->asic_prop.dmmu.hop_table_size);
+ hash_del(&pgt_info->node);
+ kfree((u64 *) (uintptr_t) pgt_info->shadow_addr);
+ kfree(pgt_info);
+}
+
+u64 hl_mmu_dr_get_phys_hop0_addr(struct hl_ctx *ctx)
+{
+ return ctx->hdev->asic_prop.mmu_pgt_addr +
+ (ctx->asid * ctx->hdev->asic_prop.dmmu.hop_table_size);
+}
+
+u64 hl_mmu_dr_get_hop0_addr(struct hl_ctx *ctx)
+{
+ return (u64) (uintptr_t) ctx->hdev->mmu_priv.dr.mmu_shadow_hop0 +
+ (ctx->asid * ctx->hdev->asic_prop.dmmu.hop_table_size);
+}
+
+u64 hl_mmu_dr_get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr)
+{
+ u64 page_mask = ctx->hdev->asic_prop.dmmu.hop_table_size - 1;
+ u64 shadow_hop_addr = shadow_addr & (~page_mask);
+ u64 pte_offset = shadow_addr & page_mask;
+ u64 phys_hop_addr;
+
+ if (shadow_hop_addr != hl_mmu_dr_get_hop0_addr(ctx))
+ phys_hop_addr = hl_mmu_dr_get_pgt_info(ctx, shadow_hop_addr)->phys_addr;
+ else
+ phys_hop_addr = hl_mmu_dr_get_phys_hop0_addr(ctx);
+
+ return phys_hop_addr + pte_offset;
+}
+
+void hl_mmu_dr_write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val)
+{
+ u64 phys_val = hl_mmu_dr_get_phys_addr(ctx, val);
+
+ ctx->hdev->asic_funcs->write_pte(ctx->hdev, hl_mmu_dr_get_phys_addr(ctx, shadow_pte_addr),
+ phys_val);
+
+ *(u64 *) (uintptr_t) shadow_pte_addr = val;
+}
+
+void hl_mmu_dr_write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val)
+{
+ ctx->hdev->asic_funcs->write_pte(ctx->hdev,
+ hl_mmu_dr_get_phys_addr(ctx, shadow_pte_addr), val);
+ *(u64 *) (uintptr_t) shadow_pte_addr = val;
+}
+
+void hl_mmu_dr_clear_pte(struct hl_ctx *ctx, u64 pte_addr)
+{
+ hl_mmu_dr_write_final_pte(ctx, pte_addr, 0);
+}
+
+void hl_mmu_dr_get_pte(struct hl_ctx *ctx, u64 hop_addr)
+{
+ hl_mmu_dr_get_pgt_info(ctx, hop_addr)->num_of_ptes++;
+}
+
+int hl_mmu_dr_put_pte(struct hl_ctx *ctx, u64 hop_addr)
+{
+ struct pgt_info *pgt_info = hl_mmu_dr_get_pgt_info(ctx, hop_addr);
+ int num_of_ptes_left;
+
+ pgt_info->num_of_ptes--;
+
+ /*
+ * Need to save the number of ptes left because hl_mmu_free_hop might free
+ * the pgt_info
+ */
+ num_of_ptes_left = pgt_info->num_of_ptes;
+ if (!num_of_ptes_left)
+ hl_mmu_dr_free_pgt_node(ctx, pgt_info);
+
+ return num_of_ptes_left;
+}
+
+u64 hl_mmu_dr_alloc_hop(struct hl_ctx *ctx)
+{
+ struct hl_device *hdev = ctx->hdev;
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct pgt_info *pgt_info;
+ u64 phys_addr, shadow_addr;
+
+ pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL);
+ if (!pgt_info)
+ return ULLONG_MAX;
+
+ phys_addr = (u64) gen_pool_alloc(hdev->mmu_priv.dr.mmu_pgt_pool,
+ prop->dmmu.hop_table_size);
+ if (!phys_addr) {
+ dev_err(hdev->dev, "failed to allocate page\n");
+ goto pool_add_err;
+ }
+
+ shadow_addr = (u64) (uintptr_t) kzalloc(prop->dmmu.hop_table_size,
+ GFP_KERNEL);
+ if (!shadow_addr)
+ goto shadow_err;
+
+ pgt_info->phys_addr = phys_addr;
+ pgt_info->shadow_addr = shadow_addr;
+ pgt_info->ctx = ctx;
+ pgt_info->num_of_ptes = 0;
+ hash_add(ctx->mmu_shadow_hash, &pgt_info->node, shadow_addr);
+
+ return shadow_addr;
+
+shadow_err:
+ gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool,
+ phys_addr, prop->dmmu.hop_table_size);
+pool_add_err:
+ kfree(pgt_info);
+
+ return ULLONG_MAX;
+}
+
+u64 hl_mmu_dr_get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte, bool *is_new_hop)
+{
+ u64 hop_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
+
+ if (hop_addr == ULLONG_MAX) {
+ hop_addr = hl_mmu_dr_alloc_hop(ctx);
+ *is_new_hop = (hop_addr != ULLONG_MAX);
+ }
+
+ return hop_addr;
+}
+
+void hl_mmu_dr_flush(struct hl_ctx *ctx)
+{
+ /* flush all writes from all cores to reach PCI */
+ mb();
+ ctx->hdev->asic_funcs->read_pte(ctx->hdev, hl_mmu_dr_get_phys_hop0_addr(ctx));
+}
+
+int hl_mmu_dr_init(struct hl_device *hdev)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ int rc;
+
+ hdev->mmu_priv.dr.mmu_pgt_pool =
+ gen_pool_create(__ffs(prop->dmmu.hop_table_size), -1);
+
+ if (!hdev->mmu_priv.dr.mmu_pgt_pool) {
+ dev_err(hdev->dev, "Failed to create page gen pool\n");
+ return -ENOMEM;
+ }
+
+ rc = gen_pool_add(hdev->mmu_priv.dr.mmu_pgt_pool, prop->mmu_pgt_addr +
+ prop->dmmu.hop0_tables_total_size,
+ prop->dmmu.pgt_size - prop->dmmu.hop0_tables_total_size,
+ -1);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to add memory to page gen pool\n");
+ goto err_pool_add;
+ }
+
+ hdev->mmu_priv.dr.mmu_shadow_hop0 = kvcalloc(prop->max_asid,
+ prop->dmmu.hop_table_size, GFP_KERNEL);
+ if (ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0)) {
+ rc = -ENOMEM;
+ goto err_pool_add;
+ }
+
+ /* MMU H/W init will be done in device hw_init() */
+
+ return 0;
+
+err_pool_add:
+ gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
+
+ return rc;
+}
+
+void hl_mmu_dr_fini(struct hl_device *hdev)
+{
+ /* MMU H/W fini was already done in device hw_fini() */
+
+ if (ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0))
+ return;
+
+ kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0);
+ gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
+
+ /* Make sure that if we arrive here again without init was
+ * called we won't cause kernel panic. This can happen for
+ * example if we fail during hard reset code at certain points
+ */
+ hdev->mmu_priv.dr.mmu_shadow_hop0 = NULL;
+}
diff --git a/drivers/accel/habanalabs/common/mmu/mmu_v1.c b/drivers/accel/habanalabs/common/mmu/mmu_v1.c
index d925dc4dd0..845d16aaa6 100644
--- a/drivers/accel/habanalabs/common/mmu/mmu_v1.c
+++ b/drivers/accel/habanalabs/common/mmu/mmu_v1.c
@@ -12,166 +12,6 @@
#define MMU_V1_MAX_HOPS (MMU_HOP4 + 1)
-static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr);
-
-static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr)
-{
- struct pgt_info *pgt_info = NULL;
-
- hash_for_each_possible(ctx->mmu_shadow_hash, pgt_info, node,
- (unsigned long) hop_addr)
- if (hop_addr == pgt_info->shadow_addr)
- break;
-
- return pgt_info;
-}
-
-static void _free_hop(struct hl_ctx *ctx, struct pgt_info *pgt_info)
-{
- struct hl_device *hdev = ctx->hdev;
-
- gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool, pgt_info->phys_addr,
- hdev->asic_prop.mmu_hop_table_size);
- hash_del(&pgt_info->node);
- kfree((u64 *) (uintptr_t) pgt_info->shadow_addr);
- kfree(pgt_info);
-}
-
-static void free_hop(struct hl_ctx *ctx, u64 hop_addr)
-{
- struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
-
- _free_hop(ctx, pgt_info);
-}
-
-static u64 alloc_hop(struct hl_ctx *ctx)
-{
- struct hl_device *hdev = ctx->hdev;
- struct asic_fixed_properties *prop = &hdev->asic_prop;
- struct pgt_info *pgt_info;
- u64 phys_addr, shadow_addr;
-
- pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL);
- if (!pgt_info)
- return ULLONG_MAX;
-
- phys_addr = (u64) gen_pool_alloc(hdev->mmu_priv.dr.mmu_pgt_pool,
- prop->mmu_hop_table_size);
- if (!phys_addr) {
- dev_err(hdev->dev, "failed to allocate page\n");
- goto pool_add_err;
- }
-
- shadow_addr = (u64) (uintptr_t) kzalloc(prop->mmu_hop_table_size,
- GFP_KERNEL);
- if (!shadow_addr)
- goto shadow_err;
-
- pgt_info->phys_addr = phys_addr;
- pgt_info->shadow_addr = shadow_addr;
- pgt_info->ctx = ctx;
- pgt_info->num_of_ptes = 0;
- hash_add(ctx->mmu_shadow_hash, &pgt_info->node, shadow_addr);
-
- return shadow_addr;
-
-shadow_err:
- gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool, phys_addr,
- prop->mmu_hop_table_size);
-pool_add_err:
- kfree(pgt_info);
-
- return ULLONG_MAX;
-}
-
-static inline u64 get_phys_hop0_addr(struct hl_ctx *ctx)
-{
- return ctx->hdev->asic_prop.mmu_pgt_addr +
- (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
-}
-
-static inline u64 get_hop0_addr(struct hl_ctx *ctx)
-{
- return (u64) (uintptr_t) ctx->hdev->mmu_priv.dr.mmu_shadow_hop0 +
- (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
-}
-
-static void flush(struct hl_ctx *ctx)
-{
- /* flush all writes from all cores to reach PCI */
- mb();
- ctx->hdev->asic_funcs->read_pte(ctx->hdev, get_phys_hop0_addr(ctx));
-}
-
-/* transform the value to physical address when writing to H/W */
-static inline void write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val)
-{
- /*
- * The value to write is actually the address of the next shadow hop +
- * flags at the 12 LSBs.
- * Hence in order to get the value to write to the physical PTE, we
- * clear the 12 LSBs and translate the shadow hop to its associated
- * physical hop, and add back the original 12 LSBs.
- */
- u64 phys_val = get_phys_addr(ctx, val & HOP_PHYS_ADDR_MASK) |
- (val & FLAGS_MASK);
-
- ctx->hdev->asic_funcs->write_pte(ctx->hdev,
- get_phys_addr(ctx, shadow_pte_addr),
- phys_val);
-
- *(u64 *) (uintptr_t) shadow_pte_addr = val;
-}
-
-/* do not transform the value to physical address when writing to H/W */
-static inline void write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr,
- u64 val)
-{
- ctx->hdev->asic_funcs->write_pte(ctx->hdev,
- get_phys_addr(ctx, shadow_pte_addr),
- val);
- *(u64 *) (uintptr_t) shadow_pte_addr = val;
-}
-
-/* clear the last and present bits */
-static inline void clear_pte(struct hl_ctx *ctx, u64 pte_addr)
-{
- /* no need to transform the value to physical address */
- write_final_pte(ctx, pte_addr, 0);
-}
-
-static inline void get_pte(struct hl_ctx *ctx, u64 hop_addr)
-{
- get_pgt_info(ctx, hop_addr)->num_of_ptes++;
-}
-
-/*
- * put_pte - decrement the num of ptes and free the hop if possible
- *
- * @ctx: pointer to the context structure
- * @hop_addr: addr of the hop
- *
- * This function returns the number of ptes left on this hop. If the number is
- * 0, it means the pte was freed.
- */
-static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr)
-{
- struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
- int num_of_ptes_left;
-
- pgt_info->num_of_ptes--;
-
- /*
- * Need to save the number of ptes left because free_hop might free
- * the pgt_info
- */
- num_of_ptes_left = pgt_info->num_of_ptes;
- if (!num_of_ptes_left)
- _free_hop(ctx, pgt_info);
-
- return num_of_ptes_left;
-}
-
static inline u64 get_hop_pte_addr(struct hl_ctx *ctx, struct hl_mmu_properties *mmu_prop,
u64 *hop_addr_arr, u64 virt_addr, enum mmu_hop_num hop_idx)
{
@@ -183,35 +23,6 @@ static inline u64 get_hop_pte_addr(struct hl_ctx *ctx, struct hl_mmu_properties
ctx->hdev->asic_prop.mmu_pte_size * ((virt_addr & mask) >> shift);
}
-static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte,
- bool *is_new_hop)
-{
- u64 hop_addr = hl_mmu_get_next_hop_addr(ctx, curr_pte);
-
- if (hop_addr == ULLONG_MAX) {
- hop_addr = alloc_hop(ctx);
- *is_new_hop = (hop_addr != ULLONG_MAX);
- }
-
- return hop_addr;
-}
-
-/* translates shadow address inside hop to a physical address */
-static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr)
-{
- u64 page_mask = (ctx->hdev->asic_prop.mmu_hop_table_size - 1);
- u64 shadow_hop_addr = shadow_addr & ~page_mask;
- u64 pte_offset = shadow_addr & page_mask;
- u64 phys_hop_addr;
-
- if (shadow_hop_addr != get_hop0_addr(ctx))
- phys_hop_addr = get_pgt_info(ctx, shadow_hop_addr)->phys_addr;
- else
- phys_hop_addr = get_phys_hop0_addr(ctx);
-
- return phys_hop_addr + pte_offset;
-}
-
static int dram_default_mapping_init(struct hl_ctx *ctx)
{
struct hl_device *hdev = ctx->hdev;
@@ -232,13 +43,13 @@ static int dram_default_mapping_init(struct hl_ctx *ctx)
/* add hop1 and hop2 */
total_hops = num_of_hop3 + 2;
- ctx->dram_default_hops = kzalloc(HL_PTE_SIZE * total_hops, GFP_KERNEL);
+ ctx->dram_default_hops = kcalloc(total_hops, HL_PTE_SIZE, GFP_KERNEL);
if (!ctx->dram_default_hops)
return -ENOMEM;
- hop0_addr = get_hop0_addr(ctx);
+ hop0_addr = hl_mmu_dr_get_hop0_addr(ctx);
- hop1_addr = alloc_hop(ctx);
+ hop1_addr = hl_mmu_dr_alloc_hop(ctx);
if (hop1_addr == ULLONG_MAX) {
dev_err(hdev->dev, "failed to alloc hop 1\n");
rc = -ENOMEM;
@@ -247,7 +58,7 @@ static int dram_default_mapping_init(struct hl_ctx *ctx)
ctx->dram_default_hops[total_hops - 1] = hop1_addr;
- hop2_addr = alloc_hop(ctx);
+ hop2_addr = hl_mmu_dr_alloc_hop(ctx);
if (hop2_addr == ULLONG_MAX) {
dev_err(hdev->dev, "failed to alloc hop 2\n");
rc = -ENOMEM;
@@ -257,7 +68,7 @@ static int dram_default_mapping_init(struct hl_ctx *ctx)
ctx->dram_default_hops[total_hops - 2] = hop2_addr;
for (i = 0 ; i < num_of_hop3 ; i++) {
- ctx->dram_default_hops[i] = alloc_hop(ctx);
+ ctx->dram_default_hops[i] = hl_mmu_dr_alloc_hop(ctx);
if (ctx->dram_default_hops[i] == ULLONG_MAX) {
dev_err(hdev->dev, "failed to alloc hop 3, i: %d\n", i);
rc = -ENOMEM;
@@ -268,18 +79,18 @@ static int dram_default_mapping_init(struct hl_ctx *ctx)
/* need only pte 0 in hops 0 and 1 */
pte_val = (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
- write_pte(ctx, hop0_addr, pte_val);
+ hl_mmu_dr_write_pte(ctx, hop0_addr, pte_val);
pte_val = (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
- write_pte(ctx, hop1_addr, pte_val);
- get_pte(ctx, hop1_addr);
+ hl_mmu_dr_write_pte(ctx, hop1_addr, pte_val);
+ hl_mmu_dr_get_pte(ctx, hop1_addr);
hop2_pte_addr = hop2_addr;
for (i = 0 ; i < num_of_hop3 ; i++) {
pte_val = (ctx->dram_default_hops[i] & HOP_PHYS_ADDR_MASK) |
PAGE_PRESENT_MASK;
- write_pte(ctx, hop2_pte_addr, pte_val);
- get_pte(ctx, hop2_addr);
+ hl_mmu_dr_write_pte(ctx, hop2_pte_addr, pte_val);
+ hl_mmu_dr_get_pte(ctx, hop2_addr);
hop2_pte_addr += HL_PTE_SIZE;
}
@@ -289,23 +100,23 @@ static int dram_default_mapping_init(struct hl_ctx *ctx)
for (i = 0 ; i < num_of_hop3 ; i++) {
hop3_pte_addr = ctx->dram_default_hops[i];
for (j = 0 ; j < HOP_PTE_ENTRIES_512 ; j++) {
- write_final_pte(ctx, hop3_pte_addr, pte_val);
- get_pte(ctx, ctx->dram_default_hops[i]);
+ hl_mmu_dr_write_final_pte(ctx, hop3_pte_addr, pte_val);
+ hl_mmu_dr_get_pte(ctx, ctx->dram_default_hops[i]);
hop3_pte_addr += HL_PTE_SIZE;
}
}
- flush(ctx);
+ hl_mmu_dr_flush(ctx);
return 0;
hop3_err:
for (i = 0 ; i < hop3_allocated ; i++)
- free_hop(ctx, ctx->dram_default_hops[i]);
+ hl_mmu_dr_free_hop(ctx, ctx->dram_default_hops[i]);
- free_hop(ctx, hop2_addr);
+ hl_mmu_dr_free_hop(ctx, hop2_addr);
hop2_err:
- free_hop(ctx, hop1_addr);
+ hl_mmu_dr_free_hop(ctx, hop1_addr);
hop1_err:
kfree(ctx->dram_default_hops);
@@ -329,7 +140,7 @@ static void dram_default_mapping_fini(struct hl_ctx *ctx)
do_div(num_of_hop3, prop->dram_page_size);
do_div(num_of_hop3, HOP_PTE_ENTRIES_512);
- hop0_addr = get_hop0_addr(ctx);
+ hop0_addr = hl_mmu_dr_get_hop0_addr(ctx);
/* add hop1 and hop2 */
total_hops = num_of_hop3 + 2;
hop1_addr = ctx->dram_default_hops[total_hops - 1];
@@ -338,101 +149,26 @@ static void dram_default_mapping_fini(struct hl_ctx *ctx)
for (i = 0 ; i < num_of_hop3 ; i++) {
hop3_pte_addr = ctx->dram_default_hops[i];
for (j = 0 ; j < HOP_PTE_ENTRIES_512 ; j++) {
- clear_pte(ctx, hop3_pte_addr);
- put_pte(ctx, ctx->dram_default_hops[i]);
+ hl_mmu_dr_clear_pte(ctx, hop3_pte_addr);
+ hl_mmu_dr_put_pte(ctx, ctx->dram_default_hops[i]);
hop3_pte_addr += HL_PTE_SIZE;
}
}
hop2_pte_addr = hop2_addr;
for (i = 0 ; i < num_of_hop3 ; i++) {
- clear_pte(ctx, hop2_pte_addr);
- put_pte(ctx, hop2_addr);
+ hl_mmu_dr_clear_pte(ctx, hop2_pte_addr);
+ hl_mmu_dr_put_pte(ctx, hop2_addr);
hop2_pte_addr += HL_PTE_SIZE;
}
- clear_pte(ctx, hop1_addr);
- put_pte(ctx, hop1_addr);
- clear_pte(ctx, hop0_addr);
+ hl_mmu_dr_clear_pte(ctx, hop1_addr);
+ hl_mmu_dr_put_pte(ctx, hop1_addr);
+ hl_mmu_dr_clear_pte(ctx, hop0_addr);
kfree(ctx->dram_default_hops);
- flush(ctx);
-}
-
-/**
- * hl_mmu_v1_init() - initialize the MMU module.
- * @hdev: habanalabs device structure.
- *
- * This function does the following:
- * - Create a pool of pages for pgt_infos.
- * - Create a shadow table for pgt
- *
- * Return: 0 for success, non-zero for failure.
- */
-static int hl_mmu_v1_init(struct hl_device *hdev)
-{
- struct asic_fixed_properties *prop = &hdev->asic_prop;
- int rc;
-
- hdev->mmu_priv.dr.mmu_pgt_pool =
- gen_pool_create(__ffs(prop->mmu_hop_table_size), -1);
-
- if (!hdev->mmu_priv.dr.mmu_pgt_pool) {
- dev_err(hdev->dev, "Failed to create page gen pool\n");
- return -ENOMEM;
- }
-
- rc = gen_pool_add(hdev->mmu_priv.dr.mmu_pgt_pool, prop->mmu_pgt_addr +
- prop->mmu_hop0_tables_total_size,
- prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size,
- -1);
- if (rc) {
- dev_err(hdev->dev, "Failed to add memory to page gen pool\n");
- goto err_pool_add;
- }
-
- hdev->mmu_priv.dr.mmu_shadow_hop0 = kvcalloc(prop->max_asid, prop->mmu_hop_table_size,
- GFP_KERNEL);
- if (ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0)) {
- rc = -ENOMEM;
- goto err_pool_add;
- }
-
- /* MMU H/W init will be done in device hw_init() */
-
- return 0;
-
-err_pool_add:
- gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
-
- return rc;
-}
-
-/**
- * hl_mmu_v1_fini() - release the MMU module.
- * @hdev: habanalabs device structure.
- *
- * This function does the following:
- * - Disable MMU in H/W.
- * - Free the pgt_infos pool.
- *
- * All contexts should be freed before calling this function.
- */
-static void hl_mmu_v1_fini(struct hl_device *hdev)
-{
- /* MMU H/W fini was already done in device hw_fini() */
-
- if (!ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0)) {
- kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0);
- gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
-
- /* Make sure that if we arrive here again without init was
- * called we won't cause kernel panic. This can happen for
- * example if we fail during hard reset code at certain points
- */
- hdev->mmu_priv.dr.mmu_shadow_hop0 = NULL;
- }
+ hl_mmu_dr_flush(ctx);
}
/**
@@ -476,7 +212,7 @@ static void hl_mmu_v1_ctx_fini(struct hl_ctx *ctx)
dev_err_ratelimited(hdev->dev,
"pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",
pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes);
- _free_hop(ctx, pgt_info);
+ hl_mmu_dr_free_pgt_node(ctx, pgt_info);
}
}
@@ -495,7 +231,7 @@ static int hl_mmu_v1_unmap(struct hl_ctx *ctx,
for (hop_idx = MMU_HOP0; hop_idx < MMU_HOP4; hop_idx++) {
if (hop_idx == MMU_HOP0) {
- hop_addr[hop_idx] = get_hop0_addr(ctx);
+ hop_addr[hop_idx] = hl_mmu_dr_get_hop0_addr(ctx);
} else {
hop_addr[hop_idx] = hl_mmu_get_next_hop_addr(ctx, curr_pte);
if (hop_addr[hop_idx] == ULLONG_MAX)
@@ -546,30 +282,30 @@ static int hl_mmu_v1_unmap(struct hl_ctx *ctx,
}
hop_idx = MMU_HOP3;
- write_final_pte(ctx, hop_pte_addr[hop_idx], default_pte);
- put_pte(ctx, hop_addr[hop_idx]);
+ hl_mmu_dr_write_final_pte(ctx, hop_pte_addr[hop_idx], default_pte);
+ hl_mmu_dr_put_pte(ctx, hop_addr[hop_idx]);
} else {
if (!(curr_pte & PAGE_PRESENT_MASK))
goto not_mapped;
if (hop_addr[MMU_HOP4])
- clear_pte(ctx, hop_pte_addr[MMU_HOP4]);
+ hl_mmu_dr_clear_pte(ctx, hop_pte_addr[MMU_HOP4]);
else
- clear_pte(ctx, hop_pte_addr[MMU_HOP3]);
+ hl_mmu_dr_clear_pte(ctx, hop_pte_addr[MMU_HOP3]);
- if (hop_addr[MMU_HOP4] && !put_pte(ctx, hop_addr[MMU_HOP4]))
+ if (hop_addr[MMU_HOP4] && !hl_mmu_dr_put_pte(ctx, hop_addr[MMU_HOP4]))
clear_hop3 = true;
if (!clear_hop3)
goto mapped;
for (hop_idx = MMU_HOP3; hop_idx >= 0; hop_idx--) {
- clear_pte(ctx, hop_pte_addr[hop_idx]);
+ hl_mmu_dr_clear_pte(ctx, hop_pte_addr[hop_idx]);
if (hop_idx == MMU_HOP0)
break;
- if (put_pte(ctx, hop_addr[hop_idx]))
+ if (hl_mmu_dr_put_pte(ctx, hop_addr[hop_idx]))
goto mapped;
}
}
@@ -616,10 +352,10 @@ static int hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
for (hop_idx = MMU_HOP0; hop_idx < num_hops; hop_idx++) {
if (hop_idx == MMU_HOP0) {
- hop_addr[hop_idx] = get_hop0_addr(ctx);
+ hop_addr[hop_idx] = hl_mmu_dr_get_hop0_addr(ctx);
} else {
hop_addr[hop_idx] =
- get_alloc_next_hop_addr(ctx, curr_pte, &hop_new[hop_idx]);
+ hl_mmu_dr_get_alloc_next_hop_addr(ctx, curr_pte, &hop_new[hop_idx]);
if (hop_addr[hop_idx] == ULLONG_MAX)
goto err;
}
@@ -666,27 +402,27 @@ static int hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | mmu_prop->last_mask
| PAGE_PRESENT_MASK;
- write_final_pte(ctx, hop_pte_addr[num_hops - 1], curr_pte);
+ hl_mmu_dr_write_final_pte(ctx, hop_pte_addr[num_hops - 1], curr_pte);
for (hop_idx = MMU_HOP1; hop_idx < num_hops; hop_idx++) {
prev_hop = hop_idx - 1;
if (hop_new[hop_idx]) {
curr_pte = (hop_addr[hop_idx] & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
- write_pte(ctx, hop_pte_addr[prev_hop], curr_pte);
+ hl_mmu_dr_write_pte(ctx, hop_pte_addr[prev_hop], curr_pte);
if (hop_idx != MMU_HOP1)
- get_pte(ctx, hop_addr[prev_hop]);
+ hl_mmu_dr_get_pte(ctx, hop_addr[prev_hop]);
}
}
- get_pte(ctx, hop_addr[num_hops - 1]);
+ hl_mmu_dr_get_pte(ctx, hop_addr[num_hops - 1]);
return 0;
err:
for (hop_idx = num_hops; hop_idx > MMU_HOP0; hop_idx--) {
if (hop_new[hop_idx])
- free_hop(ctx, hop_addr[hop_idx]);
+ hl_mmu_dr_free_hop(ctx, hop_addr[hop_idx]);
}
return rc;
@@ -752,7 +488,7 @@ static int hl_mmu_v1_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
if (is_huge)
used_hops--;
- hops->hop_info[0].hop_addr = get_phys_hop0_addr(ctx);
+ hops->hop_info[0].hop_addr = hl_mmu_dr_get_phys_hop0_addr(ctx);
hops->hop_info[0].hop_pte_addr =
hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0,
hops->hop_info[0].hop_addr, virt_addr);
@@ -801,13 +537,13 @@ static int hl_mmu_v1_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
*/
void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu)
{
- mmu->init = hl_mmu_v1_init;
- mmu->fini = hl_mmu_v1_fini;
+ mmu->init = hl_mmu_dr_init;
+ mmu->fini = hl_mmu_dr_fini;
mmu->ctx_init = hl_mmu_v1_ctx_init;
mmu->ctx_fini = hl_mmu_v1_ctx_fini;
mmu->map = hl_mmu_v1_map;
mmu->unmap = hl_mmu_v1_unmap;
- mmu->flush = flush;
+ mmu->flush = hl_mmu_dr_flush;
mmu->swap_out = hl_mmu_v1_swap_out;
mmu->swap_in = hl_mmu_v1_swap_in;
mmu->get_tlb_info = hl_mmu_v1_get_tlb_info;
diff --git a/drivers/accel/habanalabs/common/mmu/mmu_v2.c b/drivers/accel/habanalabs/common/mmu/mmu_v2.c
new file mode 100644
index 0000000000..4bc0268fff
--- /dev/null
+++ b/drivers/accel/habanalabs/common/mmu/mmu_v2.c
@@ -0,0 +1,338 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2020 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include "../habanalabs.h"
+#include "../../include/hw_ip/mmu/mmu_general.h"
+#include "../../include/hw_ip/mmu/mmu_v2_0.h"
+
+#include <linux/slab.h>
+
+/**
+ * hl_mmu_v2_ctx_init() - initialize a context for using the MMU module.
+ * @ctx: pointer to the context structure to initialize.
+ *
+ * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all
+ * page tables hops related to this context.
+ * Return: 0 on success, non-zero otherwise.
+ */
+static int hl_mmu_v2_ctx_init(struct hl_ctx *ctx)
+{
+ hash_init(ctx->mmu_shadow_hash);
+
+ return 0;
+}
+
+/*
+ * hl_mmu_v2_ctx_fini - disable a ctx from using the mmu module
+ *
+ * @ctx: pointer to the context structure
+ *
+ * This function does the following:
+ * - Free any pgts which were not freed yet
+ * - Free the mutex
+ * - Free DRAM default page mapping hops
+ */
+static void hl_mmu_v2_ctx_fini(struct hl_ctx *ctx)
+{
+ struct hl_device *hdev = ctx->hdev;
+ struct pgt_info *pgt_info;
+ struct hlist_node *tmp;
+ int i;
+
+ if (!hash_empty(ctx->mmu_shadow_hash))
+ dev_err(hdev->dev, "ctx %d is freed while it has pgts in use\n",
+ ctx->asid);
+
+ hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) {
+ dev_err_ratelimited(hdev->dev,
+ "pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",
+ pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes);
+ hl_mmu_dr_free_pgt_node(ctx, pgt_info);
+ }
+}
+
+static int hl_mmu_v2_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
+{
+ u64 hop_addr[MMU_ARCH_6_HOPS] = { 0 }, hop_pte_addr[MMU_ARCH_6_HOPS] = { 0 }, curr_pte,
+ scrambled_virt_addr;
+ struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
+ struct hl_device *hdev = ctx->hdev;
+ struct hl_mmu_properties *mmu_prop;
+ bool is_huge = false;
+ int i, hop_last;
+
+ /* device resident in V2 are allowed only for HMMU */
+ if (!is_dram_addr)
+ return -EINVAL;
+
+ mmu_prop = &prop->dmmu;
+
+ hop_last = mmu_prop->num_hops - 1;
+
+ scrambled_virt_addr = hdev->asic_funcs->scramble_addr(hdev, virt_addr);
+
+ hop_addr[0] = hl_mmu_dr_get_hop0_addr(ctx);
+ hop_pte_addr[0] = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0,
+ hop_addr[0], scrambled_virt_addr);
+ if (hop_pte_addr[0] == U64_MAX)
+ return -EFAULT;
+
+ curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[0];
+
+ for (i = 1 ; i < mmu_prop->num_hops ; i++) {
+ hop_addr[i] = hl_mmu_get_next_hop_addr(ctx, curr_pte);
+ if (hop_addr[i] == ULLONG_MAX)
+ goto not_mapped;
+
+ hop_pte_addr[i] = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i,
+ hop_addr[i], scrambled_virt_addr);
+ if (hop_pte_addr[i] == U64_MAX)
+ return -EFAULT;
+
+ curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[i];
+
+ if ((i <= hop_last) && (curr_pte & mmu_prop->last_mask)) {
+ hop_last = i;
+ is_huge = true;
+ break;
+ }
+ }
+
+ if (is_dram_addr && !is_huge) {
+ dev_err(hdev->dev, "DRAM unmapping should use huge pages only\n");
+ return -EFAULT;
+ }
+
+ if (!(curr_pte & PAGE_PRESENT_MASK))
+ goto not_mapped;
+
+ for (i = hop_last ; i > 0 ; i--) {
+ hl_mmu_dr_clear_pte(ctx, hop_pte_addr[i]);
+ if (hl_mmu_dr_put_pte(ctx, hop_addr[i]))
+ goto mapped;
+ }
+ hl_mmu_dr_clear_pte(ctx, hop_pte_addr[0]);
+
+mapped:
+ return 0;
+
+not_mapped:
+ dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
+ virt_addr);
+
+ return -EINVAL;
+}
+
+static int hl_mmu_v2_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
+ u32 page_size, bool is_dram_addr)
+{
+ u64 hop_addr[MMU_ARCH_6_HOPS] = { 0 }, hop_pte_addr[MMU_ARCH_6_HOPS] = { 0 },
+ curr_pte = 0, scrambled_virt_addr, scrambled_phys_addr;
+ struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
+ bool hop_new[MMU_ARCH_6_HOPS] = { false };
+ struct hl_device *hdev = ctx->hdev;
+ struct hl_mmu_properties *mmu_prop;
+ int rc, i, hop_last;
+
+ /* device resident in V2 are allowed only for HMMU */
+ if (!is_dram_addr)
+ return -EINVAL;
+
+ mmu_prop = &prop->dmmu;
+
+ hop_last = mmu_prop->num_hops - 1;
+
+ scrambled_virt_addr = hdev->asic_funcs->scramble_addr(hdev, virt_addr);
+ scrambled_phys_addr = hdev->asic_funcs->scramble_addr(hdev, phys_addr);
+
+ /* First hop is preallocated therefore it is treated differently */
+ hop_addr[0] = hl_mmu_dr_get_hop0_addr(ctx);
+ hop_pte_addr[0] = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0,
+ hop_addr[0], scrambled_virt_addr);
+ curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[0];
+
+ /* Handle hop1 to hop_last */
+ for (i = 1 ; i <= hop_last ; i++) {
+ hop_addr[i] = hl_mmu_dr_get_alloc_next_hop_addr(ctx, curr_pte, &hop_new[i]);
+ if (hop_addr[i] == ULLONG_MAX) {
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ hop_pte_addr[i] = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i,
+ hop_addr[i], scrambled_virt_addr);
+ if (hop_pte_addr[i] == U64_MAX) {
+ rc = -EINVAL;
+ goto err;
+ }
+
+ if (!hop_pte_addr[i]) {
+ rc = -EINVAL;
+ goto err;
+ }
+
+ curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[i];
+ }
+
+ if (curr_pte & PAGE_PRESENT_MASK) {
+ dev_err(hdev->dev,
+ "mapping already exists for virt_addr 0x%llx\n",
+ virt_addr);
+
+ for (i = 0 ; i <= hop_last ; i++)
+ dev_dbg(hdev->dev, "hop%d pte: 0x%llx (0x%llx)\n",
+ i, *(u64 *) (uintptr_t) hop_pte_addr[i],
+ hop_pte_addr[i]);
+
+ rc = -EINVAL;
+ goto err;
+ }
+
+ curr_pte = (scrambled_phys_addr & HOP_PHYS_ADDR_MASK)
+ | mmu_prop->last_mask | PAGE_PRESENT_MASK;
+
+ /* Write the PTEs */
+ hl_mmu_dr_write_final_pte(ctx, hop_pte_addr[hop_last], curr_pte);
+
+ /* for each new hop, add its address to the table of previous-hop */
+ for (i = 1 ; i <= hop_last ; i++) {
+ if (hop_new[i]) {
+ curr_pte = (hop_addr[i] & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
+ hl_mmu_dr_write_pte(ctx, hop_pte_addr[i - 1], curr_pte);
+
+ if (i - 1)
+ hl_mmu_dr_get_pte(ctx, hop_addr[i - 1]);
+ }
+ }
+ hl_mmu_dr_get_pte(ctx, hop_addr[hop_last]);
+
+ return 0;
+
+err:
+ for (i = 1 ; i <= hop_last ; i++)
+ if (hop_new[i] && (hop_addr[i] != U64_MAX))
+ hl_mmu_dr_free_hop(ctx, hop_addr[i]);
+
+ return rc;
+}
+
+/*
+ * hl_mmu_v2_swap_out - marks all mapping of the given ctx as swapped out
+ *
+ * @ctx: pointer to the context structure
+ *
+ */
+static void hl_mmu_v2_swap_out(struct hl_ctx *ctx)
+{
+
+}
+
+/*
+ * hl_mmu_v2_swap_in - marks all mapping of the given ctx as swapped in
+ *
+ * @ctx: pointer to the context structure
+ *
+ */
+static void hl_mmu_v2_swap_in(struct hl_ctx *ctx)
+{
+
+}
+
+static int hl_mmu_v2_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, struct hl_mmu_hop_info *hops)
+{
+ struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
+ struct hl_device *hdev = ctx->hdev;
+ struct hl_mmu_properties *mmu_prop;
+ bool is_dram_addr;
+ int i;
+
+ is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
+ prop->dmmu.start_addr,
+ prop->dmmu.end_addr);
+
+ /* device resident in V2 are allowed only for HMMU */
+ if (!is_dram_addr)
+ return -EINVAL;
+
+ mmu_prop = &prop->dmmu;
+ hops->range_type = HL_VA_RANGE_TYPE_DRAM;
+
+ hops->scrambled_vaddr = hdev->asic_funcs->scramble_addr(hdev, virt_addr);
+
+ hops->hop_info[0].hop_addr = hl_mmu_dr_get_phys_hop0_addr(ctx);
+ hops->hop_info[0].hop_pte_addr = hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, 0,
+ hops->hop_info[0].hop_addr,
+ hops->scrambled_vaddr);
+ if (hops->hop_info[0].hop_pte_addr == U64_MAX)
+ return -EFAULT;
+
+ hops->hop_info[0].hop_pte_val = hdev->asic_funcs->read_pte(hdev,
+ hops->hop_info[0].hop_pte_addr);
+ if (hops->hop_info[0].hop_pte_val == U64_MAX)
+ return -EFAULT;
+
+ for (i = 1 ; i < mmu_prop->num_hops ; i++) {
+ hops->hop_info[i].hop_addr =
+ hl_mmu_get_next_hop_addr(ctx, hops->hop_info[i - 1].hop_pte_val);
+ if (hops->hop_info[i].hop_addr == ULLONG_MAX)
+ return -EFAULT;
+
+ hops->hop_info[i].hop_pte_addr =
+ hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, i,
+ hops->hop_info[i].hop_addr,
+ hops->scrambled_vaddr);
+ if (hops->hop_info[i].hop_pte_addr == U64_MAX)
+ return -EFAULT;
+
+ hops->hop_info[i].hop_pte_val =
+ hdev->asic_funcs->read_pte(hdev,
+ hops->hop_info[i].hop_pte_addr);
+
+ if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK))
+ return -EFAULT;
+
+ if (hops->hop_info[i].hop_pte_val & mmu_prop->last_mask)
+ break;
+ }
+
+ /* if passed over all hops then no last hop was found */
+ if (i == mmu_prop->num_hops)
+ return -EFAULT;
+
+ if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK))
+ return -EFAULT;
+
+ if (hops->scrambled_vaddr != virt_addr)
+ hops->unscrambled_paddr = hdev->asic_funcs->descramble_addr
+ (hdev, hops->hop_info[i].hop_pte_val);
+ else
+ hops->unscrambled_paddr = hops->hop_info[i].hop_pte_val;
+
+ hops->used_hops = i + 1;
+
+ return 0;
+}
+
+/*
+ * hl_mmu_v2_prepare - prepare mmu_if for working with mmu v2
+ *
+ * @hdev: pointer to the device structure
+ * @mmu_if: pointer to the mmu interface structure
+ */
+void hl_mmu_v2_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu)
+{
+ mmu->init = hl_mmu_dr_init;
+ mmu->fini = hl_mmu_dr_fini;
+ mmu->ctx_init = hl_mmu_v2_ctx_init;
+ mmu->ctx_fini = hl_mmu_v2_ctx_fini;
+ mmu->map = hl_mmu_v2_map;
+ mmu->unmap = hl_mmu_v2_unmap;
+ mmu->flush = hl_mmu_dr_flush;
+ mmu->swap_out = hl_mmu_v2_swap_out;
+ mmu->swap_in = hl_mmu_v2_swap_in;
+ mmu->get_tlb_info = hl_mmu_v2_get_tlb_info;
+}
diff --git a/drivers/accel/habanalabs/common/mmu/mmu_v2_hr.c b/drivers/accel/habanalabs/common/mmu/mmu_v2_hr.c
index afe7ef964f..31507b2a43 100644
--- a/drivers/accel/habanalabs/common/mmu/mmu_v2_hr.c
+++ b/drivers/accel/habanalabs/common/mmu/mmu_v2_hr.c
@@ -47,7 +47,7 @@ static inline int hl_mmu_v2_hr_init(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
- return hl_mmu_hr_init(hdev, &hdev->mmu_priv.hr, prop->mmu_hop_table_size,
+ return hl_mmu_hr_init(hdev, &hdev->mmu_priv.hr, prop->pmmu.hop_table_size,
prop->mmu_pgt_size);
}
@@ -65,7 +65,7 @@ static inline void hl_mmu_v2_hr_fini(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
- hl_mmu_hr_fini(hdev, &hdev->mmu_priv.hr, prop->mmu_hop_table_size);
+ hl_mmu_hr_fini(hdev, &hdev->mmu_priv.hr, prop->pmmu.hop_table_size);
}
/**
@@ -108,7 +108,7 @@ static void hl_mmu_v2_hr_ctx_fini(struct hl_ctx *ctx)
"pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",
pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes);
hl_mmu_hr_free_hop_remove_pgt(pgt_info, &ctx->hdev->mmu_priv.hr,
- ctx->hdev->asic_prop.mmu_hop_table_size);
+ ctx->hdev->asic_prop.pmmu.hop_table_size);
}
}
@@ -150,7 +150,7 @@ static int _hl_mmu_v2_hr_unmap(struct hl_ctx *ctx,
curr_pte = *(u64 *) (uintptr_t) hl_mmu_hr_pte_phys_to_virt(ctx, hops_pgt_info[i],
hop_pte_phys_addr[i],
- ctx->hdev->asic_prop.mmu_hop_table_size);
+ ctx->hdev->asic_prop.pmmu.hop_table_size);
if ((i < hop_last) && (curr_pte & mmu_prop->last_mask)) {
hop_last = i;
@@ -169,14 +169,14 @@ static int _hl_mmu_v2_hr_unmap(struct hl_ctx *ctx,
for (i = hop_last ; i > 0 ; i--) {
hl_mmu_hr_clear_pte(ctx, hops_pgt_info[i], hop_pte_phys_addr[i],
- ctx->hdev->asic_prop.mmu_hop_table_size);
+ ctx->hdev->asic_prop.pmmu.hop_table_size);
if (hl_mmu_hr_put_pte(ctx, hops_pgt_info[i], &ctx->hdev->mmu_priv.hr,
- ctx->hdev->asic_prop.mmu_hop_table_size))
+ ctx->hdev->asic_prop.pmmu.hop_table_size))
goto mapped;
}
hl_mmu_hr_clear_pte(ctx, hops_pgt_info[0], hop_pte_phys_addr[0],
- ctx->hdev->asic_prop.mmu_hop_table_size);
+ ctx->hdev->asic_prop.pmmu.hop_table_size);
mapped:
return 0;
@@ -255,7 +255,7 @@ static int _hl_mmu_v2_hr_map(struct hl_ctx *ctx,
scrambled_virt_addr);
curr_pte = *(u64 *) (uintptr_t) hl_mmu_hr_pte_phys_to_virt(ctx, hops_pgt_info[i],
hop_pte_phys_addr[i],
- ctx->hdev->asic_prop.mmu_hop_table_size);
+ ctx->hdev->asic_prop.pmmu.hop_table_size);
}
if (curr_pte & PAGE_PRESENT_MASK) {
@@ -268,7 +268,7 @@ static int _hl_mmu_v2_hr_map(struct hl_ctx *ctx,
*(u64 *) (uintptr_t)
hl_mmu_hr_pte_phys_to_virt(ctx, hops_pgt_info[i],
hop_pte_phys_addr[i],
- ctx->hdev->asic_prop.mmu_hop_table_size),
+ ctx->hdev->asic_prop.pmmu.hop_table_size),
hop_pte_phys_addr[i]);
rc = -EINVAL;
goto err;
@@ -279,7 +279,7 @@ static int _hl_mmu_v2_hr_map(struct hl_ctx *ctx,
/* Write the PTEs */
hl_mmu_hr_write_pte(ctx, hops_pgt_info[hop_last], hop_pte_phys_addr[hop_last], curr_pte,
- ctx->hdev->asic_prop.mmu_hop_table_size);
+ ctx->hdev->asic_prop.pmmu.hop_table_size);
/* for each new hop, add its address to the table of previous-hop */
for (i = 1 ; i <= hop_last ; i++) {
@@ -287,7 +287,7 @@ static int _hl_mmu_v2_hr_map(struct hl_ctx *ctx,
curr_pte = (hops_pgt_info[i]->phys_addr & HOP_PHYS_ADDR_MASK) |
PAGE_PRESENT_MASK;
hl_mmu_hr_write_pte(ctx, hops_pgt_info[i - 1], hop_pte_phys_addr[i - 1],
- curr_pte, ctx->hdev->asic_prop.mmu_hop_table_size);
+ curr_pte, ctx->hdev->asic_prop.pmmu.hop_table_size);
if (i - 1)
hl_mmu_hr_get_pte(ctx, &ctx->hdev->mmu_func[MMU_HR_PGT].hr_funcs,
hops_pgt_info[i - 1]->phys_addr);
@@ -303,7 +303,7 @@ err:
for (i = 1 ; i <= hop_last ; i++)
if (hop_new[i] && hops_pgt_info[i])
hl_mmu_hr_free_hop_remove_pgt(hops_pgt_info[i], &ctx->hdev->mmu_priv.hr,
- ctx->hdev->asic_prop.mmu_hop_table_size);
+ ctx->hdev->asic_prop.pmmu.hop_table_size);
return rc;
}
diff --git a/drivers/accel/habanalabs/common/security.c b/drivers/accel/habanalabs/common/security.c
index fe913965db..5402a3cd04 100644
--- a/drivers/accel/habanalabs/common/security.c
+++ b/drivers/accel/habanalabs/common/security.c
@@ -7,15 +7,31 @@
#include "habanalabs.h"
-static const char * const hl_glbl_error_cause[HL_MAX_NUM_OF_GLBL_ERR_CAUSE] = {
+static const char * const hl_glbl_error_cause[] = {
"Error due to un-priv read",
"Error due to un-secure read",
"Error due to read from unmapped reg",
"Error due to un-priv write",
"Error due to un-secure write",
"Error due to write to unmapped reg",
+ "N/A",
+ "N/A",
+ "N/A",
+ "N/A",
+ "N/A",
+ "N/A",
+ "N/A",
+ "N/A",
+ "N/A",
+ "N/A",
"External I/F write sec violation",
"External I/F write to un-mapped reg",
+ "N/A",
+ "N/A",
+ "N/A",
+ "N/A",
+ "N/A",
+ "N/A",
"Read to write only",
"Write to read only"
};
@@ -671,10 +687,11 @@ static bool hl_check_block_range_exclusion(struct hl_device *hdev,
static int hl_read_glbl_errors(struct hl_device *hdev,
u32 blk_idx, u32 major, u32 minor, u32 sub_minor, void *data)
{
- struct hl_special_block_info *special_blocks = hdev->asic_prop.special_blocks;
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct hl_special_block_info *special_blocks = prop->special_blocks;
struct hl_special_block_info *current_block = &special_blocks[blk_idx];
u32 glbl_err_addr, glbl_err_cause, addr_val, cause_val, block_base,
- base = current_block->base_addr - lower_32_bits(hdev->asic_prop.cfg_base_address);
+ base = current_block->base_addr - lower_32_bits(prop->cfg_base_address);
int i;
block_base = base + major * current_block->major_offset +
@@ -689,13 +706,13 @@ static int hl_read_glbl_errors(struct hl_device *hdev,
glbl_err_addr = block_base + HL_GLBL_ERR_ADDR_OFFSET;
addr_val = RREG32(glbl_err_addr);
- for (i = 0 ; i < hdev->asic_prop.glbl_err_cause_num ; i++) {
+ for (i = 0 ; i <= prop->glbl_err_max_cause_num ; i++) {
if (cause_val & BIT(i))
dev_err_ratelimited(hdev->dev,
- "%s, addr %#llx\n",
- hl_glbl_error_cause[i],
- hdev->asic_prop.cfg_base_address + block_base +
- FIELD_GET(HL_GLBL_ERR_ADDRESS_MASK, addr_val));
+ "%s, addr %#llx\n",
+ hl_glbl_error_cause[i],
+ prop->cfg_base_address + block_base +
+ FIELD_GET(HL_GLBL_ERR_ADDRESS_MASK, addr_val));
}
WREG32(glbl_err_cause, cause_val);
diff --git a/drivers/accel/habanalabs/common/security.h b/drivers/accel/habanalabs/common/security.h
index d7a3b3e82e..476f70687c 100644
--- a/drivers/accel/habanalabs/common/security.h
+++ b/drivers/accel/habanalabs/common/security.h
@@ -13,8 +13,7 @@
struct hl_device;
/* special blocks */
-#define HL_MAX_NUM_OF_GLBL_ERR_CAUSE 10
-#define HL_GLBL_ERR_ADDRESS_MASK GENMASK(11, 0)
+#define HL_GLBL_ERR_ADDRESS_MASK GENMASK(11, 0)
/* GLBL_ERR_ADDR register offset from the start of the block */
#define HL_GLBL_ERR_ADDR_OFFSET 0xF44
/* GLBL_ERR_CAUSE register offset from the start of the block */
diff --git a/drivers/accel/habanalabs/gaudi/gaudi.c b/drivers/accel/habanalabs/gaudi/gaudi.c
index 53292d4c15..f2b04ffb0e 100644
--- a/drivers/accel/habanalabs/gaudi/gaudi.c
+++ b/drivers/accel/habanalabs/gaudi/gaudi.c
@@ -614,8 +614,6 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
else
prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
prop->mmu_pte_size = HL_PTE_SIZE;
- prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
- prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
prop->dram_page_size = PAGE_SIZE_2MB;
prop->device_mem_alloc_default_page_size = prop->dram_page_size;
prop->dram_supports_virtual_memory = false;
@@ -637,8 +635,8 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
prop->pmmu.last_mask = LAST_MASK;
/* TODO: will be duplicated until implementing per-MMU props */
- prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
- prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
+ prop->pmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE;
+ prop->pmmu.hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
/* PMMU and HPMMU are the same except of page size */
memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
@@ -649,6 +647,7 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
prop->dmmu.end_addr = VA_HOST_SPACE_END;
prop->dmmu.page_size = PAGE_SIZE_2MB;
+ prop->dmmu.pgt_size = prop->mmu_pgt_size;
prop->cfg_size = CFG_SIZE;
prop->max_asid = MAX_ASID;
@@ -3652,7 +3651,7 @@ static int gaudi_mmu_init(struct hl_device *hdev)
for (i = 0 ; i < prop->max_asid ; i++) {
hop0_addr = prop->mmu_pgt_addr +
- (i * prop->mmu_hop_table_size);
+ (i * prop->dmmu.hop_table_size);
rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
if (rc) {
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index e0e5615ef9..fa1c4feb9f 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -158,11 +158,13 @@
#define RAZWI_INITIATOR_ID_X_Y(xl, yl, xh) \
(RAZWI_INITIATOR_ID_X_Y_LOW(xl, yl) | RAZWI_INITIATOR_ID_X_HIGH(xh))
-#define PSOC_RAZWI_ENG_STR_SIZE 128
-#define PSOC_RAZWI_MAX_ENG_PER_RTR 5
+#define PSOC_RAZWI_ENG_STR_SIZE 128
+#define PSOC_RAZWI_MAX_ENG_PER_RTR 5
/* HW scrambles only bits 0-25 */
-#define HW_UNSCRAMBLED_BITS_MASK GENMASK_ULL(63, 26)
+#define HW_UNSCRAMBLED_BITS_MASK GENMASK_ULL(63, 26)
+
+#define GAUDI2_GLBL_ERR_MAX_CAUSE_NUM 17
struct gaudi2_razwi_info {
u32 axuser_xy;
@@ -2308,11 +2310,26 @@ static int set_number_of_functional_hbms(struct hl_device *hdev)
return 0;
}
+static bool gaudi2_is_edma_queue_id(u32 queue_id)
+{
+
+ switch (queue_id) {
+ case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
+ case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
+ case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
+ case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
+ return true;
+ default:
+ return false;
+ }
+}
+
static int gaudi2_set_dram_properties(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
- u32 basic_hbm_page_size;
- int rc;
+ u64 hbm_drv_base_offset = 0, edma_pq_base_addr;
+ u32 basic_hbm_page_size, edma_idx = 0;
+ int rc, i;
rc = set_number_of_functional_hbms(hdev);
if (rc)
@@ -2356,9 +2373,35 @@ static int gaudi2_set_dram_properties(struct hl_device *hdev)
prop->dmmu.start_addr = prop->dram_base_address +
(prop->dram_page_size *
DIV_ROUND_UP_SECTOR_T(prop->dram_size, prop->dram_page_size));
-
prop->dmmu.end_addr = prop->dmmu.start_addr + prop->dram_page_size *
div_u64((VA_HBM_SPACE_END - prop->dmmu.start_addr), prop->dmmu.page_size);
+ /*
+ * Driver can't share an (48MB) HBM page with the F/W in order to prevent FW to block
+ * the driver part by range register, so it must start at the next (48MB) page
+ */
+ hbm_drv_base_offset = roundup(CPU_FW_IMAGE_SIZE, prop->num_functional_hbms * SZ_8M);
+
+ /*
+ * The NIC driver section size and the HMMU page tables section in the HBM needs
+ * to be the remaining size in the first dram page after taking into
+ * account the F/W image size
+ */
+
+ /* Reserve region in HBM for HMMU page tables */
+ prop->mmu_pgt_addr = DRAM_PHYS_BASE + hbm_drv_base_offset +
+ ((prop->dram_page_size - hbm_drv_base_offset) -
+ (HMMU_PAGE_TABLES_SIZE + EDMA_PQS_SIZE + EDMA_SCRATCHPAD_SIZE));
+
+ /* Set EDMA PQs HBM addresses */
+ edma_pq_base_addr = prop->mmu_pgt_addr + HMMU_PAGE_TABLES_SIZE;
+
+ for (i = 0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i++) {
+ if (gaudi2_is_edma_queue_id(i)) {
+ prop->hw_queues_props[i].q_dram_bd_address = edma_pq_base_addr +
+ (edma_idx * HL_QUEUE_SIZE_IN_BYTES);
+ edma_idx++;
+ }
+ }
return 0;
}
@@ -2368,7 +2411,7 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev)
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hw_queue_properties *q_props;
u32 num_sync_stream_queues = 0;
- int i;
+ int i, rc;
prop->max_queues = GAUDI2_QUEUE_ID_SIZE;
prop->hw_queues_props = kcalloc(prop->max_queues, sizeof(struct hw_queue_properties),
@@ -2391,6 +2434,9 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev)
}
q_props[i].cb_alloc_flags = CB_ALLOC_USER;
+
+ if (gaudi2_is_edma_queue_id(i))
+ q_props[i].dram_bd = 1;
}
q_props[GAUDI2_QUEUE_ID_CPU_PQ].type = QUEUE_TYPE_CPU;
@@ -2419,46 +2465,43 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev)
prop->rotator_enabled_mask = BIT(NUM_OF_ROT) - 1;
- if (hdev->pldm)
- prop->mmu_pgt_size = 0x800000; /* 8MB */
- else
- prop->mmu_pgt_size = MMU_PAGE_TABLES_INITIAL_SIZE;
+ prop->max_asid = 2;
+ prop->dmmu.pgt_size = HMMU_PAGE_TABLES_SIZE;
prop->mmu_pte_size = HL_PTE_SIZE;
- prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
- prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
prop->dmmu.hop_shifts[MMU_HOP0] = DHOP0_SHIFT;
prop->dmmu.hop_shifts[MMU_HOP1] = DHOP1_SHIFT;
prop->dmmu.hop_shifts[MMU_HOP2] = DHOP2_SHIFT;
prop->dmmu.hop_shifts[MMU_HOP3] = DHOP3_SHIFT;
- prop->dmmu.hop_shifts[MMU_HOP4] = DHOP4_SHIFT;
prop->dmmu.hop_masks[MMU_HOP0] = DHOP0_MASK;
prop->dmmu.hop_masks[MMU_HOP1] = DHOP1_MASK;
prop->dmmu.hop_masks[MMU_HOP2] = DHOP2_MASK;
prop->dmmu.hop_masks[MMU_HOP3] = DHOP3_MASK;
- prop->dmmu.hop_masks[MMU_HOP4] = DHOP4_MASK;
prop->dmmu.page_size = PAGE_SIZE_1GB;
- prop->dmmu.num_hops = MMU_ARCH_6_HOPS;
+ prop->dmmu.num_hops = MMU_ARCH_4_HOPS;
prop->dmmu.last_mask = LAST_MASK;
- prop->dmmu.host_resident = 1;
- prop->dmmu.hop_table_size = prop->mmu_hop_table_size;
- prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
+ prop->dmmu.host_resident = 0;
+ prop->dmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE;
+ prop->dmmu.hop0_tables_total_size = HOP_TABLE_SIZE_512_PTE * prop->max_asid;
- /*
- * this is done in order to be able to validate FW descriptor (i.e. validating that
- * the addresses and allocated space for FW image does not cross memory bounds).
- * for this reason we set the DRAM size to the minimum possible and later it will
- * be modified according to what reported in the cpucp info packet
+ /* As we need to set the pgt address in dram for HMMU init so we cannot
+ * wait to the fw cpucp info to set the dram props as mmu init comes before
+ * hw init
*/
- prop->dram_size = (GAUDI2_HBM_NUM - 1) * SZ_16G;
+ rc = hdev->asic_funcs->set_dram_properties(hdev);
+ if (rc)
+ goto free_qprops;
+ prop->mmu_pgt_size = PMMU_PAGE_TABLES_SIZE;
+
+ prop->pmmu.pgt_size = prop->mmu_pgt_size;
hdev->pmmu_huge_range = true;
prop->pmmu.host_resident = 1;
prop->pmmu.num_hops = MMU_ARCH_6_HOPS;
prop->pmmu.last_mask = LAST_MASK;
- prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
- prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
+ prop->pmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE;
+ prop->pmmu.hop0_tables_total_size = HOP_TABLE_SIZE_512_PTE * prop->max_asid;
prop->hints_host_reserved_va_range.start_addr = RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START;
prop->hints_host_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HOST_END;
@@ -2516,7 +2559,6 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev)
prop->max_num_of_engines = GAUDI2_ENGINE_ID_SIZE;
prop->num_engine_cores = CPU_ID_MAX;
prop->cfg_size = CFG_SIZE;
- prop->max_asid = MAX_ASID;
prop->num_of_events = GAUDI2_EVENT_SIZE;
prop->supports_engine_modes = true;
@@ -2560,6 +2602,10 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev)
prop->hbw_flush_reg = mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0;
return 0;
+
+free_qprops:
+ kfree(prop->hw_queues_props);
+ return rc;
}
static int gaudi2_pci_bars_map(struct hl_device *hdev)
@@ -3033,6 +3079,25 @@ static int gaudi2_fetch_psoc_frequency(struct hl_device *hdev)
return 0;
}
+static int gaudi2_mmu_clear_pgt_range(struct hl_device *hdev)
+{
+ struct gaudi2_device *gaudi2 = hdev->asic_specific;
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ int rc;
+
+ if (!(gaudi2->hw_cap_initialized & HW_CAP_MMU_MASK))
+ return 0;
+
+ if (prop->dmmu.host_resident)
+ return 0;
+
+ rc = gaudi2_memset_device_memory(hdev, prop->mmu_pgt_addr, prop->dmmu.pgt_size, 0);
+ if (rc)
+ dev_err(hdev->dev, "Failed to clear mmu pgt");
+
+ return rc;
+}
+
static int gaudi2_early_init(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
@@ -3258,6 +3323,12 @@ static int gaudi2_late_init(struct hl_device *hdev)
goto disable_pci_access;
}
+ rc = gaudi2_mmu_clear_pgt_range(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
+ goto disable_pci_access;
+ }
+
gaudi2_init_arcs(hdev);
rc = gaudi2_scrub_arcs_dccm(hdev);
@@ -3518,7 +3589,7 @@ static int gaudi2_special_blocks_config(struct hl_device *hdev)
int i, rc;
/* Configure Special blocks */
- prop->glbl_err_cause_num = GAUDI2_NUM_OF_GLBL_ERR_CAUSE;
+ prop->glbl_err_max_cause_num = GAUDI2_GLBL_ERR_MAX_CAUSE_NUM;
prop->num_of_special_blocks = ARRAY_SIZE(gaudi2_special_blocks);
prop->special_blocks = kmalloc_array(prop->num_of_special_blocks,
sizeof(*prop->special_blocks), GFP_KERNEL);
@@ -3697,13 +3768,7 @@ static int gaudi2_sw_init(struct hl_device *hdev)
spin_lock_init(&gaudi2->hw_queues_lock);
- gaudi2->scratchpad_kernel_address = hl_asic_dma_alloc_coherent(hdev, PAGE_SIZE,
- &gaudi2->scratchpad_bus_address,
- GFP_KERNEL | __GFP_ZERO);
- if (!gaudi2->scratchpad_kernel_address) {
- rc = -ENOMEM;
- goto free_virt_msix_db_mem;
- }
+ gaudi2->scratchpad_bus_address = prop->mmu_pgt_addr + HMMU_PAGE_TABLES_SIZE + EDMA_PQS_SIZE;
gaudi2_user_mapped_blocks_init(hdev);
@@ -3727,7 +3792,7 @@ static int gaudi2_sw_init(struct hl_device *hdev)
rc = gaudi2_special_blocks_iterator_config(hdev);
if (rc)
- goto free_scratchpad_mem;
+ goto free_virt_msix_db_mem;
rc = gaudi2_test_queues_msgs_alloc(hdev);
if (rc)
@@ -3737,9 +3802,6 @@ static int gaudi2_sw_init(struct hl_device *hdev)
special_blocks_free:
gaudi2_special_blocks_iterator_free(hdev);
-free_scratchpad_mem:
- hl_asic_dma_free_coherent(hdev, PAGE_SIZE, gaudi2->scratchpad_kernel_address,
- gaudi2->scratchpad_bus_address);
free_virt_msix_db_mem:
hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
free_cpu_accessible_dma_pool:
@@ -3770,9 +3832,6 @@ static int gaudi2_sw_fini(struct hl_device *hdev)
hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
hdev->cpu_accessible_dma_address);
- hl_asic_dma_free_coherent(hdev, PAGE_SIZE, gaudi2->scratchpad_kernel_address,
- gaudi2->scratchpad_bus_address);
-
dma_pool_destroy(hdev->dma_pool);
kfree(gaudi2);
@@ -4254,6 +4313,8 @@ static int gaudi2_enable_msix(struct hl_device *hdev)
if (gaudi2->hw_cap_initialized & HW_CAP_MSIX)
return 0;
+ hl_init_cpu_for_irq(hdev);
+
rc = pci_alloc_irq_vectors(hdev->pdev, GAUDI2_MSIX_ENTRIES, GAUDI2_MSIX_ENTRIES,
PCI_IRQ_MSIX);
if (rc < 0) {
@@ -4307,6 +4368,7 @@ static int gaudi2_enable_msix(struct hl_device *hdev)
i++, j++, user_irq_init_cnt++) {
irq = pci_irq_vector(hdev->pdev, i);
+ hl_set_irq_affinity(hdev, irq);
rc = request_irq(irq, hl_irq_user_interrupt_handler, 0, gaudi2_irq_name(i),
&hdev->user_interrupt[j]);
if (rc) {
@@ -4333,6 +4395,7 @@ free_user_irq:
i < GAUDI2_IRQ_NUM_USER_FIRST + user_irq_init_cnt ; i++, j++) {
irq = pci_irq_vector(hdev->pdev, i);
+ irq_set_affinity_and_hint(irq, NULL);
free_irq(irq, &hdev->user_interrupt[j]);
}
irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
@@ -4413,6 +4476,7 @@ static void gaudi2_disable_msix(struct hl_device *hdev)
k < hdev->asic_prop.user_interrupt_count ; i++, j++, k++) {
irq = pci_irq_vector(hdev->pdev, i);
+ irq_set_affinity_and_hint(irq, NULL);
free_irq(irq, &hdev->user_interrupt[j]);
}
@@ -4957,10 +5021,17 @@ static void gaudi2_init_qman_pq(struct hl_device *hdev, u32 reg_base,
q = &hdev->kernel_queues[queue_id_base + pq_id];
pq_offset = pq_id * 4;
- WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset,
- lower_32_bits(q->bus_address));
- WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset,
- upper_32_bits(q->bus_address));
+ if (q->dram_bd) {
+ WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset,
+ lower_32_bits(q->pq_dram_address));
+ WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset,
+ upper_32_bits(q->pq_dram_address));
+ } else {
+ WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset,
+ lower_32_bits(q->bus_address));
+ WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset,
+ upper_32_bits(q->bus_address));
+ }
WREG32(reg_base + QM_PQ_SIZE_0_OFFSET + pq_offset, ilog2(HL_QUEUE_LENGTH));
WREG32(reg_base + QM_PQ_PI_0_OFFSET + pq_offset, 0);
WREG32(reg_base + QM_PQ_CI_0_OFFSET + pq_offset, 0);
@@ -5847,7 +5918,8 @@ static int gaudi2_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_har
return rc;
}
-static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base)
+static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base,
+ bool host_resident_pgt)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
u64 hop0_addr;
@@ -5859,7 +5931,11 @@ static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base)
max_asid = min((u32) 8, max_asid);
for (asid = 0 ; asid < max_asid ; asid++) {
- hop0_addr = hdev->mmu_priv.hr.mmu_asid_hop0[asid].phys_addr;
+ if (host_resident_pgt)
+ hop0_addr = hdev->mmu_priv.hr.mmu_asid_hop0[asid].phys_addr;
+ else
+ hop0_addr = prop->mmu_pgt_addr + (asid * prop->dmmu.hop_table_size);
+
rc = gaudi2_mmu_update_asid_hop0_addr(hdev, stlb_base, asid, hop0_addr);
if (rc) {
dev_err(hdev->dev, "failed to set hop0 addr for asid %d\n", asid);
@@ -5870,7 +5946,8 @@ static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base)
return 0;
}
-static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb_base)
+static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb_base,
+ bool host_resident_pgt)
{
u32 status, timeout_usec;
int rc;
@@ -5893,7 +5970,7 @@ static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb
if (rc)
dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU SRAM init\n");
- rc = gaudi2_mmu_update_hop0_addr(hdev, stlb_base);
+ rc = gaudi2_mmu_update_hop0_addr(hdev, stlb_base, host_resident_pgt);
if (rc)
return rc;
@@ -5917,6 +5994,7 @@ static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb
static int gaudi2_pci_mmu_init(struct hl_device *hdev)
{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
struct gaudi2_device *gaudi2 = hdev->asic_specific;
u32 mmu_base, stlb_base;
int rc;
@@ -5956,7 +6034,7 @@ static int gaudi2_pci_mmu_init(struct hl_device *hdev)
WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_PMMU_SPI_SEI_ENABLE_MASK);
- rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base);
+ rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base, prop->pmmu.host_resident);
if (rc)
return rc;
@@ -6008,7 +6086,7 @@ static int gaudi2_dcore_hmmu_init(struct hl_device *hdev, int dcore_id,
WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_HMMU_SPI_SEI_ENABLE_MASK);
- rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base);
+ rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base, prop->dmmu.host_resident);
if (rc)
return rc;
@@ -7046,7 +7124,7 @@ static int gaudi2_test_queues(struct hl_device *hdev)
/* send test message on all enabled Qs */
for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) {
- if (!gaudi2_is_queue_enabled(hdev, i))
+ if (!gaudi2_is_queue_enabled(hdev, i) || gaudi2_is_edma_queue_id(i))
continue;
msg_info = &gaudi2->queues_test_info[i - GAUDI2_QUEUE_ID_PDMA_0_0];
@@ -7063,7 +7141,7 @@ static int gaudi2_test_queues(struct hl_device *hdev)
/* verify that all messages were processed */
for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) {
- if (!gaudi2_is_queue_enabled(hdev, i))
+ if (!gaudi2_is_queue_enabled(hdev, i) || gaudi2_is_edma_queue_id(i))
continue;
rc = gaudi2_test_queue_wait_completion(hdev, i, sob_val);
@@ -8907,9 +8985,6 @@ static int gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u16 event_typ
u32 error_count = 0;
int i;
- gaudi2_print_event(hdev, event_type, true,
- "intr_cause_data: %#llx", intr_cause_data);
-
for (i = 0 ; i < GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE ; i++) {
if (!(intr_cause_data & BIT_ULL(i)))
continue;
@@ -8918,15 +8993,16 @@ static int gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u16 event_typ
"err cause: %s", gaudi2_pcie_addr_dec_error_cause[i]);
error_count++;
- /*
- * Always check for LBW and HBW additional info as the indication itself is
- * sometimes missing
- */
+ switch (intr_cause_data & BIT_ULL(i)) {
+ case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK:
+ hl_check_for_glbl_errors(hdev);
+ break;
+ case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK:
+ gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev, event_mask);
+ break;
+ }
}
- hl_check_for_glbl_errors(hdev);
- gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev, event_mask);
-
return error_count;
}
@@ -8983,7 +9059,6 @@ static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool
if (is_pmmu) {
dev_err_ratelimited(hdev->dev, "PMMU page fault on va 0x%llx\n", addr);
} else {
-
addr = gaudi2_mmu_descramble_addr(hdev, addr);
addr &= HW_UNSCRAMBLED_BITS_MASK;
dev_err_ratelimited(hdev->dev, "HMMU page fault on va range 0x%llx - 0x%llx\n",
@@ -9514,25 +9589,17 @@ static int gaudi2_handle_pcie_p2p_msix(struct hl_device *hdev, u16 event_type)
static int gaudi2_handle_pcie_drain(struct hl_device *hdev,
struct hl_eq_pcie_drain_ind_data *drain_data)
{
- u64 lbw_rd, lbw_wr, hbw_rd, hbw_wr, cause, error_count = 0;
+ u64 cause, error_count = 0;
cause = le64_to_cpu(drain_data->intr_cause.intr_cause_data);
- lbw_rd = le64_to_cpu(drain_data->drain_rd_addr_lbw);
- lbw_wr = le64_to_cpu(drain_data->drain_wr_addr_lbw);
- hbw_rd = le64_to_cpu(drain_data->drain_rd_addr_hbw);
- hbw_wr = le64_to_cpu(drain_data->drain_wr_addr_hbw);
if (cause & BIT_ULL(0)) {
- dev_err_ratelimited(hdev->dev,
- "PCIE AXI drain LBW completed, read_err %u, write_err %u\n",
- !!lbw_rd, !!lbw_wr);
+ dev_err_ratelimited(hdev->dev, "PCIE AXI drain LBW completed\n");
error_count++;
}
if (cause & BIT_ULL(1)) {
- dev_err_ratelimited(hdev->dev,
- "PCIE AXI drain HBW completed, raddr %#llx, waddr %#llx\n",
- hbw_rd, hbw_wr);
+ dev_err_ratelimited(hdev->dev, "PCIE AXI drain HBW completed\n");
error_count++;
}
@@ -10250,11 +10317,11 @@ reset_device:
}
static int gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device *hdev,
- struct packet_lin_dma *lin_dma_pkt, dma_addr_t pkt_dma_addr,
- u32 hw_queue_id, u32 size, u64 addr, u32 val)
+ struct packet_lin_dma *lin_dma_pkt,
+ u64 phys_addr, u32 hw_queue_id, u32 size, u64 addr, u32 val)
{
u32 ctl, pkt_size;
- int rc = 0;
+ int rc = 0, i;
ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
@@ -10268,9 +10335,20 @@ static int gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device *hdev,
pkt_size = sizeof(struct packet_lin_dma);
- rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr);
+ for (i = 0; i < 3; i++) {
+ rc = hdev->asic_funcs->access_dev_mem(hdev, PCI_REGION_DRAM,
+ phys_addr + (i * sizeof(u64)),
+ ((u64 *)(lin_dma_pkt)) + i, DEBUGFS_WRITE64);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to copy lin_dma packet to HBM (%#llx)\n",
+ phys_addr);
+ return rc;
+ }
+ }
+
+ rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, phys_addr);
if (rc)
- dev_err(hdev->dev, "Failed to send lin dma packet to H/W queue %d\n",
+ dev_err(hdev->dev, "Failed to send lin_dma packet to H/W queue %d\n",
hw_queue_id);
return rc;
@@ -10283,12 +10361,11 @@ static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 siz
GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0};
u32 chunk_size, dcore, edma_idx, sob_offset, sob_addr, comp_val,
- old_mmubp, mmubp, num_of_pkts, busy, pkt_size;
+ old_mmubp, mmubp, num_of_pkts, busy, pkt_size, cb_len;
u64 comp_addr, cur_addr = addr, end_addr = addr + size;
struct asic_fixed_properties *prop = &hdev->asic_prop;
+ int rc = 0, dma_num = 0, i;
void *lin_dma_pkts_arr;
- dma_addr_t pkt_dma_addr;
- int rc = 0, dma_num = 0;
if (prop->edma_enabled_mask == 0) {
dev_info(hdev->dev, "non of the EDMA engines is enabled - skip dram scrubbing\n");
@@ -10306,9 +10383,19 @@ static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 siz
/* Calculate how many lin dma pkts we'll need */
num_of_pkts = div64_u64(round_up(size, SZ_2G), SZ_2G);
pkt_size = sizeof(struct packet_lin_dma);
+ cb_len = pkt_size * num_of_pkts;
+
+ /*
+ * if we're not scrubing HMMU or NIC reserved sections in hbm,
+ * then it the scrubing of the user section, as we use the start of the user section
+ * to store the CB of the EDMA QM, so shift the start address of the scrubbing accordingly
+ * and scrub the CB section before leaving this function.
+ */
+ if ((addr >= prop->dram_user_base_address) &&
+ (addr < prop->dram_user_base_address + cb_len))
+ cur_addr += (prop->dram_user_base_address + cb_len) - addr;
- lin_dma_pkts_arr = hl_asic_dma_alloc_coherent(hdev, pkt_size * num_of_pkts,
- &pkt_dma_addr, GFP_KERNEL);
+ lin_dma_pkts_arr = kvcalloc(num_of_pkts, pkt_size, GFP_KERNEL);
if (!lin_dma_pkts_arr)
return -ENOMEM;
@@ -10354,7 +10441,7 @@ static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 siz
rc = gaudi2_memset_memory_chunk_using_edma_qm(hdev,
(struct packet_lin_dma *)lin_dma_pkts_arr + dma_num,
- pkt_dma_addr + dma_num * pkt_size,
+ prop->dram_user_base_address + (dma_num * pkt_size),
edma_queues_id[dcore] + edma_idx * 4,
chunk_size, cur_addr, val);
if (rc)
@@ -10363,14 +10450,16 @@ static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 siz
dma_num++;
cur_addr += chunk_size;
if (cur_addr == end_addr)
- break;
+ goto edma_wait;
}
}
}
+edma_wait:
rc = hl_poll_timeout(hdev, sob_addr, busy, (busy == dma_num), 1000, 1000000);
if (rc) {
- dev_err(hdev->dev, "DMA Timeout during HBM scrubbing\n");
+ dev_err(hdev->dev, "DMA Timeout during HBM scrubbing(sob: 0x%x, dma_num: 0x%x)\n",
+ busy, dma_num);
goto end;
}
end:
@@ -10391,8 +10480,16 @@ end:
}
}
+ memset(lin_dma_pkts_arr, 0, sizeof(u64));
+
+ /* Zero the HBM area where we copied the CB */
+ for (i = 0; i < cb_len / sizeof(u64); i += sizeof(u64))
+ rc = hdev->asic_funcs->access_dev_mem(hdev, PCI_REGION_DRAM,
+ prop->dram_user_base_address + i,
+ (u64 *)(lin_dma_pkts_arr), DEBUGFS_WRITE64);
WREG32(sob_addr, 0);
- hl_asic_dma_free_coherent(hdev, pkt_size * num_of_pkts, lin_dma_pkts_arr, pkt_dma_addr);
+
+ kfree(lin_dma_pkts_arr);
return rc;
}
@@ -11450,7 +11547,7 @@ static int gaudi2_mmu_get_real_page_size(struct hl_device *hdev, struct hl_mmu_p
return 0;
page_size_err:
- dev_err(hdev->dev, "page size of %u is not %uKB aligned, can't map\n",
+ dev_err(hdev->dev, "page size of 0x%X is not 0x%X aligned, can't map\n",
page_size, mmu_prop->page_size >> 10);
return -EFAULT;
}
@@ -11470,6 +11567,29 @@ int gaudi2_send_device_activity(struct hl_device *hdev, bool open)
return hl_fw_send_device_activity(hdev, open);
}
+static u64 gaudi2_read_pte(struct hl_device *hdev, u64 addr)
+{
+ struct gaudi2_device *gaudi2 = hdev->asic_specific;
+ u64 val;
+
+ if (hdev->reset_info.hard_reset_pending)
+ return U64_MAX;
+
+ val = readq(hdev->pcie_bar[DRAM_BAR_ID] + (addr - gaudi2->dram_bar_cur_addr));
+
+ return val;
+}
+
+static void gaudi2_write_pte(struct hl_device *hdev, u64 addr, u64 val)
+{
+ struct gaudi2_device *gaudi2 = hdev->asic_specific;
+
+ if (hdev->reset_info.hard_reset_pending)
+ return;
+
+ writeq(val, hdev->pcie_bar[DRAM_BAR_ID] + (addr - gaudi2->dram_bar_cur_addr));
+}
+
static const struct hl_asic_funcs gaudi2_funcs = {
.early_init = gaudi2_early_init,
.early_fini = gaudi2_early_fini,
@@ -11506,8 +11626,8 @@ static const struct hl_asic_funcs gaudi2_funcs = {
.add_device_attr = gaudi2_add_device_attr,
.handle_eqe = gaudi2_handle_eqe,
.get_events_stat = gaudi2_get_events_stat,
- .read_pte = NULL,
- .write_pte = NULL,
+ .read_pte = gaudi2_read_pte,
+ .write_pte = gaudi2_write_pte,
.mmu_invalidate_cache = gaudi2_mmu_invalidate_cache,
.mmu_invalidate_cache_range = gaudi2_mmu_invalidate_cache_range,
.mmu_prefetch_cache_range = NULL,
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2P.h b/drivers/accel/habanalabs/gaudi2/gaudi2P.h
index 9b9eef0d97..eee41387b2 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2P.h
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2P.h
@@ -19,8 +19,6 @@
#define GAUDI2_LINUX_FW_FILE "habanalabs/gaudi2/gaudi2-fit.itb"
#define GAUDI2_BOOT_FIT_FILE "habanalabs/gaudi2/gaudi2-boot-fit.itb"
-#define MMU_PAGE_TABLES_INITIAL_SIZE 0x10000000 /* 256MB */
-
#define GAUDI2_CPU_TIMEOUT_USEC 30000000 /* 30s */
#define NUMBER_OF_PDMA_QUEUES 2
@@ -109,13 +107,11 @@
/* DRAM Memory Map */
#define CPU_FW_IMAGE_SIZE 0x10000000 /* 256MB */
-
-/* This define should be used only when working in a debug mode without dram.
- * When working with dram, the driver size will be calculated dynamically.
- */
-#define NIC_DEFAULT_DRV_SIZE 0x20000000 /* 512MB */
-
#define CPU_FW_IMAGE_ADDR DRAM_PHYS_BASE
+#define PMMU_PAGE_TABLES_SIZE 0x10000000 /* 256MB */
+#define EDMA_PQS_SIZE SZ_2M
+#define EDMA_SCRATCHPAD_SIZE SZ_1M
+#define HMMU_PAGE_TABLES_SIZE SZ_1M
#define NIC_NUMBER_OF_PORTS NIC_NUMBER_OF_ENGINES
@@ -241,9 +237,8 @@
#define GAUDI2_SOB_INCREMENT_BY_ONE (FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1) | \
FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1))
-#define GAUDI2_NUM_TESTED_QS (GAUDI2_QUEUE_ID_CPU_PQ - GAUDI2_QUEUE_ID_PDMA_0_0)
+#define GAUDI2_NUM_TESTED_QS (GAUDI2_QUEUE_ID_CPU_PQ - GAUDI2_QUEUE_ID_PDMA_0_0)
-#define GAUDI2_NUM_OF_GLBL_ERR_CAUSE 8
enum gaudi2_reserved_sob_id {
GAUDI2_RESERVED_SOB_CS_COMPLETION_FIRST,
diff --git a/drivers/accel/habanalabs/goya/goya.c b/drivers/accel/habanalabs/goya/goya.c
index 1322cb330c..5a359c3bdc 100644
--- a/drivers/accel/habanalabs/goya/goya.c
+++ b/drivers/accel/habanalabs/goya/goya.c
@@ -413,8 +413,6 @@ int goya_set_fixed_properties(struct hl_device *hdev)
else
prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
prop->mmu_pte_size = HL_PTE_SIZE;
- prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
- prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
prop->dram_page_size = PAGE_SIZE_2MB;
prop->device_mem_alloc_default_page_size = prop->dram_page_size;
prop->dram_supports_virtual_memory = true;
@@ -435,8 +433,8 @@ int goya_set_fixed_properties(struct hl_device *hdev)
prop->dmmu.num_hops = MMU_ARCH_5_HOPS;
prop->dmmu.last_mask = LAST_MASK;
/* TODO: will be duplicated until implementing per-MMU props */
- prop->dmmu.hop_table_size = prop->mmu_hop_table_size;
- prop->dmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
+ prop->dmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE;
+ prop->dmmu.hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
/* shifts and masks are the same in PMMU and DMMU */
memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
@@ -446,8 +444,8 @@ int goya_set_fixed_properties(struct hl_device *hdev)
prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
prop->pmmu.last_mask = LAST_MASK;
/* TODO: will be duplicated until implementing per-MMU props */
- prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
- prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
+ prop->pmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE;
+ prop->pmmu.hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
/* PMMU and HPMMU are the same except of page size */
memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
@@ -2678,7 +2676,7 @@ int goya_mmu_init(struct hl_device *hdev)
for (i = 0 ; i < prop->max_asid ; i++) {
hop0_addr = prop->mmu_pgt_addr +
- (i * prop->mmu_hop_table_size);
+ (i * prop->dmmu.hop_table_size);
rc = goya_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
if (rc) {
diff --git a/drivers/accel/habanalabs/goya/goya_coresight.c b/drivers/accel/habanalabs/goya/goya_coresight.c
index 41cae5fd84..3827ea4c02 100644
--- a/drivers/accel/habanalabs/goya/goya_coresight.c
+++ b/drivers/accel/habanalabs/goya/goya_coresight.c
@@ -576,7 +576,6 @@ static int goya_config_spmu(struct hl_device *hdev,
struct hl_debug_params *params)
{
u64 base_reg;
- struct hl_debug_params_spmu *input = params->input;
u64 *output;
u32 output_arr_len;
u32 events_num;
@@ -592,7 +591,7 @@ static int goya_config_spmu(struct hl_device *hdev,
base_reg = debug_spmu_regs[params->reg_idx] - CFG_BASE;
if (params->enable) {
- input = params->input;
+ struct hl_debug_params_spmu *input = params->input;
if (!input)
return -EINVAL;
diff --git a/drivers/accel/habanalabs/include/hw_ip/mmu/mmu_general.h b/drivers/accel/habanalabs/include/hw_ip/mmu/mmu_general.h
index d408feecd4..b4a5e95be3 100644
--- a/drivers/accel/habanalabs/include/hw_ip/mmu/mmu_general.h
+++ b/drivers/accel/habanalabs/include/hw_ip/mmu/mmu_general.h
@@ -26,6 +26,8 @@
#define LAST_MASK 0x0000000000800ull
#define FLAGS_MASK 0x0000000000FFFull
+#define MMU_ARCH_3_HOPS 3
+#define MMU_ARCH_4_HOPS 4
#define MMU_ARCH_5_HOPS 5
#define MMU_ARCH_6_HOPS 6
diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c
index 7cb962e214..d09d29775b 100644
--- a/drivers/accel/ivpu/ivpu_debugfs.c
+++ b/drivers/accel/ivpu/ivpu_debugfs.c
@@ -287,22 +287,6 @@ static const struct file_operations fw_trace_level_fops = {
};
static ssize_t
-ivpu_reset_engine_fn(struct file *file, const char __user *user_buf, size_t size, loff_t *pos)
-{
- struct ivpu_device *vdev = file->private_data;
-
- if (!size)
- return -EINVAL;
-
- if (ivpu_jsm_reset_engine(vdev, DRM_IVPU_ENGINE_COMPUTE))
- return -ENODEV;
- if (ivpu_jsm_reset_engine(vdev, DRM_IVPU_ENGINE_COPY))
- return -ENODEV;
-
- return size;
-}
-
-static ssize_t
ivpu_force_recovery_fn(struct file *file, const char __user *user_buf, size_t size, loff_t *pos)
{
struct ivpu_device *vdev = file->private_data;
@@ -327,6 +311,22 @@ static const struct file_operations ivpu_force_recovery_fops = {
.write = ivpu_force_recovery_fn,
};
+static ssize_t
+ivpu_reset_engine_fn(struct file *file, const char __user *user_buf, size_t size, loff_t *pos)
+{
+ struct ivpu_device *vdev = file->private_data;
+
+ if (!size)
+ return -EINVAL;
+
+ if (ivpu_jsm_reset_engine(vdev, DRM_IVPU_ENGINE_COMPUTE))
+ return -ENODEV;
+ if (ivpu_jsm_reset_engine(vdev, DRM_IVPU_ENGINE_COPY))
+ return -ENODEV;
+
+ return size;
+}
+
static const struct file_operations ivpu_reset_engine_fops = {
.owner = THIS_MODULE,
.open = simple_open,
diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c
index bad1ccc81a..51d3f1a55d 100644
--- a/drivers/accel/ivpu/ivpu_drv.c
+++ b/drivers/accel/ivpu/ivpu_drv.c
@@ -45,11 +45,11 @@ MODULE_PARM_DESC(test_mode, "Test mode mask. See IVPU_TEST_MODE_* macros.");
u8 ivpu_pll_min_ratio;
module_param_named(pll_min_ratio, ivpu_pll_min_ratio, byte, 0644);
-MODULE_PARM_DESC(pll_min_ratio, "Minimum PLL ratio used to set VPU frequency");
+MODULE_PARM_DESC(pll_min_ratio, "Minimum PLL ratio used to set NPU frequency");
u8 ivpu_pll_max_ratio = U8_MAX;
module_param_named(pll_max_ratio, ivpu_pll_max_ratio, byte, 0644);
-MODULE_PARM_DESC(pll_max_ratio, "Maximum PLL ratio used to set VPU frequency");
+MODULE_PARM_DESC(pll_max_ratio, "Maximum PLL ratio used to set NPU frequency");
bool ivpu_disable_mmu_cont_pages;
module_param_named(disable_mmu_cont_pages, ivpu_disable_mmu_cont_pages, bool, 0644);
@@ -312,13 +312,13 @@ static int ivpu_wait_for_ready(struct ivpu_device *vdev)
ivpu_ipc_consumer_del(vdev, &cons);
if (!ret && ipc_hdr.data_addr != IVPU_IPC_BOOT_MSG_DATA_ADDR) {
- ivpu_err(vdev, "Invalid VPU ready message: 0x%x\n",
+ ivpu_err(vdev, "Invalid NPU ready message: 0x%x\n",
ipc_hdr.data_addr);
return -EIO;
}
if (!ret)
- ivpu_dbg(vdev, PM, "VPU ready message received successfully\n");
+ ivpu_dbg(vdev, PM, "NPU ready message received successfully\n");
return ret;
}
@@ -519,6 +519,7 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
atomic64_set(&vdev->unique_id_counter, 0);
xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1);
+ xa_init_flags(&vdev->db_xa, XA_FLAGS_ALLOC1);
lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key);
INIT_LIST_HEAD(&vdev->bo_list);
@@ -590,6 +591,7 @@ err_mmu_gctx_fini:
err_shutdown:
ivpu_shutdown(vdev);
err_xa_destroy:
+ xa_destroy(&vdev->db_xa);
xa_destroy(&vdev->submitted_jobs_xa);
xa_destroy(&vdev->context_xa);
return ret;
@@ -624,6 +626,8 @@ static void ivpu_dev_fini(struct ivpu_device *vdev)
ivpu_mmu_reserved_context_fini(vdev);
ivpu_mmu_global_context_fini(vdev);
+ drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->db_xa));
+ xa_destroy(&vdev->db_xa);
drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa));
xa_destroy(&vdev->submitted_jobs_xa);
drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->context_xa));
diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h
index e7a9e84994..bb4374d0ea 100644
--- a/drivers/accel/ivpu/ivpu_drv.h
+++ b/drivers/accel/ivpu/ivpu_drv.h
@@ -36,6 +36,9 @@
#define IVPU_USER_CONTEXT_MIN_SSID 2
#define IVPU_USER_CONTEXT_MAX_SSID (IVPU_USER_CONTEXT_MIN_SSID + 63)
+#define IVPU_MIN_DB 1
+#define IVPU_MAX_DB 255
+
#define IVPU_NUM_ENGINES 2
#define IVPU_PLATFORM_SILICON 0
@@ -118,6 +121,8 @@ struct ivpu_device {
struct xarray context_xa;
struct xa_limit context_xa_limit;
+ struct xarray db_xa;
+
struct mutex bo_list_lock; /* Protects bo_list */
struct list_head bo_list;
@@ -188,7 +193,7 @@ static inline int ivpu_hw_gen(struct ivpu_device *vdev)
case PCI_DEVICE_ID_LNL:
return IVPU_HW_40XX;
default:
- ivpu_err(vdev, "Unknown VPU device\n");
+ ivpu_err(vdev, "Unknown NPU device\n");
return 0;
}
}
diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c
index 5fa8bd4603..1457300828 100644
--- a/drivers/accel/ivpu/ivpu_fw.c
+++ b/drivers/accel/ivpu/ivpu_fw.c
@@ -46,15 +46,13 @@
static char *ivpu_firmware;
module_param_named_unsafe(firmware, ivpu_firmware, charp, 0644);
-MODULE_PARM_DESC(firmware, "VPU firmware binary in /lib/firmware/..");
+MODULE_PARM_DESC(firmware, "NPU firmware binary in /lib/firmware/..");
-/* TODO: Remove mtl_vpu.bin from names after transition to generation based FW names */
static struct {
int gen;
const char *name;
} fw_names[] = {
{ IVPU_HW_37XX, "vpu_37xx.bin" },
- { IVPU_HW_37XX, "mtl_vpu.bin" },
{ IVPU_HW_37XX, "intel/vpu/vpu_37xx_v0.0.bin" },
{ IVPU_HW_40XX, "vpu_40xx.bin" },
{ IVPU_HW_40XX, "intel/vpu/vpu_40xx_v0.0.bin" },
@@ -250,6 +248,7 @@ static int ivpu_fw_update_global_range(struct ivpu_device *vdev)
static int ivpu_fw_mem_init(struct ivpu_device *vdev)
{
struct ivpu_fw_info *fw = vdev->fw;
+ struct ivpu_addr_range fw_range;
int log_verb_size;
int ret;
@@ -257,16 +256,19 @@ static int ivpu_fw_mem_init(struct ivpu_device *vdev)
if (ret)
return ret;
- fw->mem = ivpu_bo_alloc_internal(vdev, fw->runtime_addr, fw->runtime_size, DRM_IVPU_BO_WC);
+ fw_range.start = fw->runtime_addr;
+ fw_range.end = fw->runtime_addr + fw->runtime_size;
+ fw->mem = ivpu_bo_create(vdev, &vdev->gctx, &fw_range, fw->runtime_size,
+ DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE);
if (!fw->mem) {
- ivpu_err(vdev, "Failed to allocate firmware runtime memory\n");
+ ivpu_err(vdev, "Failed to create firmware runtime memory buffer\n");
return -ENOMEM;
}
- fw->mem_log_crit = ivpu_bo_alloc_internal(vdev, 0, IVPU_FW_CRITICAL_BUFFER_SIZE,
- DRM_IVPU_BO_CACHED);
+ fw->mem_log_crit = ivpu_bo_create_global(vdev, IVPU_FW_CRITICAL_BUFFER_SIZE,
+ DRM_IVPU_BO_CACHED | DRM_IVPU_BO_MAPPABLE);
if (!fw->mem_log_crit) {
- ivpu_err(vdev, "Failed to allocate critical log buffer\n");
+ ivpu_err(vdev, "Failed to create critical log buffer\n");
ret = -ENOMEM;
goto err_free_fw_mem;
}
@@ -276,18 +278,19 @@ static int ivpu_fw_mem_init(struct ivpu_device *vdev)
else
log_verb_size = IVPU_FW_VERBOSE_BUFFER_SMALL_SIZE;
- fw->mem_log_verb = ivpu_bo_alloc_internal(vdev, 0, log_verb_size, DRM_IVPU_BO_CACHED);
+ fw->mem_log_verb = ivpu_bo_create_global(vdev, log_verb_size,
+ DRM_IVPU_BO_CACHED | DRM_IVPU_BO_MAPPABLE);
if (!fw->mem_log_verb) {
- ivpu_err(vdev, "Failed to allocate verbose log buffer\n");
+ ivpu_err(vdev, "Failed to create verbose log buffer\n");
ret = -ENOMEM;
goto err_free_log_crit;
}
if (fw->shave_nn_size) {
- fw->mem_shave_nn = ivpu_bo_alloc_internal(vdev, vdev->hw->ranges.shave.start,
- fw->shave_nn_size, DRM_IVPU_BO_WC);
+ fw->mem_shave_nn = ivpu_bo_create(vdev, &vdev->gctx, &vdev->hw->ranges.shave,
+ fw->shave_nn_size, DRM_IVPU_BO_WC);
if (!fw->mem_shave_nn) {
- ivpu_err(vdev, "Failed to allocate shavenn buffer\n");
+ ivpu_err(vdev, "Failed to create shavenn buffer\n");
ret = -ENOMEM;
goto err_free_log_verb;
}
@@ -296,11 +299,11 @@ static int ivpu_fw_mem_init(struct ivpu_device *vdev)
return 0;
err_free_log_verb:
- ivpu_bo_free_internal(fw->mem_log_verb);
+ ivpu_bo_free(fw->mem_log_verb);
err_free_log_crit:
- ivpu_bo_free_internal(fw->mem_log_crit);
+ ivpu_bo_free(fw->mem_log_crit);
err_free_fw_mem:
- ivpu_bo_free_internal(fw->mem);
+ ivpu_bo_free(fw->mem);
return ret;
}
@@ -309,13 +312,13 @@ static void ivpu_fw_mem_fini(struct ivpu_device *vdev)
struct ivpu_fw_info *fw = vdev->fw;
if (fw->mem_shave_nn) {
- ivpu_bo_free_internal(fw->mem_shave_nn);
+ ivpu_bo_free(fw->mem_shave_nn);
fw->mem_shave_nn = NULL;
}
- ivpu_bo_free_internal(fw->mem_log_verb);
- ivpu_bo_free_internal(fw->mem_log_crit);
- ivpu_bo_free_internal(fw->mem);
+ ivpu_bo_free(fw->mem_log_verb);
+ ivpu_bo_free(fw->mem_log_crit);
+ ivpu_bo_free(fw->mem);
fw->mem_log_verb = NULL;
fw->mem_log_crit = NULL;
@@ -469,6 +472,8 @@ static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_
boot_params->d0i3_residency_time_us);
ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_entry_vpu_ts = %llu\n",
boot_params->d0i3_entry_vpu_ts);
+ ivpu_dbg(vdev, FW_BOOT, "boot_params.system_time_us = %llu\n",
+ boot_params->system_time_us);
}
void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params *boot_params)
@@ -480,11 +485,14 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
boot_params->d0i3_residency_time_us =
ktime_us_delta(ktime_get_boottime(), vdev->hw->d0i3_entry_host_ts);
boot_params->d0i3_entry_vpu_ts = vdev->hw->d0i3_entry_vpu_ts;
+ boot_params->system_time_us = ktime_to_us(ktime_get_real());
ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_residency_time_us = %lld\n",
boot_params->d0i3_residency_time_us);
ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_entry_vpu_ts = %llu\n",
boot_params->d0i3_entry_vpu_ts);
+ ivpu_dbg(vdev, FW_BOOT, "boot_params.system_time_us = %llu\n",
+ boot_params->system_time_us);
boot_params->save_restore_ret_address = 0;
vdev->pm->is_warmboot = true;
@@ -562,6 +570,7 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
boot_params->d0i3_residency_time_us = 0;
boot_params->d0i3_entry_vpu_ts = 0;
+ boot_params->system_time_us = ktime_to_us(ktime_get_real());
wmb(); /* Flush WC buffers after writing bootparams */
ivpu_fw_boot_params_print(vdev, boot_params);
diff --git a/drivers/accel/ivpu/ivpu_fw_log.c b/drivers/accel/ivpu/ivpu_fw_log.c
index f6770f5e82..ef0adb5e0f 100644
--- a/drivers/accel/ivpu/ivpu_fw_log.c
+++ b/drivers/accel/ivpu/ivpu_fw_log.c
@@ -20,7 +20,7 @@
unsigned int ivpu_log_level = IVPU_FW_LOG_ERROR;
module_param(ivpu_log_level, uint, 0444);
MODULE_PARM_DESC(ivpu_log_level,
- "VPU firmware default trace level: debug=" __stringify(IVPU_FW_LOG_DEBUG)
+ "NPU firmware default trace level: debug=" __stringify(IVPU_FW_LOG_DEBUG)
" info=" __stringify(IVPU_FW_LOG_INFO)
" warn=" __stringify(IVPU_FW_LOG_WARN)
" error=" __stringify(IVPU_FW_LOG_ERROR)
@@ -121,11 +121,11 @@ void ivpu_fw_log_print(struct ivpu_device *vdev, bool only_new_msgs, struct drm_
u32 next = 0;
while (fw_log_ptr(vdev, vdev->fw->mem_log_crit, &next, &log_header) == 0)
- fw_log_print_buffer(vdev, log_header, "VPU critical", only_new_msgs, p);
+ fw_log_print_buffer(vdev, log_header, "NPU critical", only_new_msgs, p);
next = 0;
while (fw_log_ptr(vdev, vdev->fw->mem_log_verb, &next, &log_header) == 0)
- fw_log_print_buffer(vdev, log_header, "VPU verbose", only_new_msgs, p);
+ fw_log_print_buffer(vdev, log_header, "NPU verbose", only_new_msgs, p);
}
void ivpu_fw_log_clear(struct ivpu_device *vdev)
diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c
index e9ddbe9f50..1b409dbd33 100644
--- a/drivers/accel/ivpu/ivpu_gem.c
+++ b/drivers/accel/ivpu/ivpu_gem.c
@@ -172,8 +172,7 @@ struct drm_gem_object *ivpu_gem_create_object(struct drm_device *dev, size_t siz
return &bo->base.base;
}
-static struct ivpu_bo *
-ivpu_bo_create(struct ivpu_device *vdev, u64 size, u32 flags)
+static struct ivpu_bo *ivpu_bo_alloc(struct ivpu_device *vdev, u64 size, u32 flags)
{
struct drm_gem_shmem_object *shmem;
struct ivpu_bo *bo;
@@ -201,7 +200,7 @@ ivpu_bo_create(struct ivpu_device *vdev, u64 size, u32 flags)
return bo;
}
-static int ivpu_bo_open(struct drm_gem_object *obj, struct drm_file *file)
+static int ivpu_gem_bo_open(struct drm_gem_object *obj, struct drm_file *file)
{
struct ivpu_file_priv *file_priv = file->driver_priv;
struct ivpu_device *vdev = file_priv->vdev;
@@ -224,7 +223,7 @@ static int ivpu_bo_open(struct drm_gem_object *obj, struct drm_file *file)
return ivpu_bo_alloc_vpu_addr(bo, &file_priv->ctx, range);
}
-static void ivpu_bo_free(struct drm_gem_object *obj)
+static void ivpu_gem_bo_free(struct drm_gem_object *obj)
{
struct ivpu_device *vdev = to_ivpu_device(obj->dev);
struct ivpu_bo *bo = to_ivpu_bo(obj);
@@ -245,8 +244,8 @@ static void ivpu_bo_free(struct drm_gem_object *obj)
}
static const struct drm_gem_object_funcs ivpu_gem_funcs = {
- .free = ivpu_bo_free,
- .open = ivpu_bo_open,
+ .free = ivpu_gem_bo_free,
+ .open = ivpu_gem_bo_open,
.print_info = drm_gem_shmem_object_print_info,
.pin = drm_gem_shmem_object_pin,
.unpin = drm_gem_shmem_object_unpin,
@@ -272,9 +271,9 @@ int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *fi
if (size == 0)
return -EINVAL;
- bo = ivpu_bo_create(vdev, size, args->flags);
+ bo = ivpu_bo_alloc(vdev, size, args->flags);
if (IS_ERR(bo)) {
- ivpu_err(vdev, "Failed to create BO: %pe (ctx %u size %llu flags 0x%x)",
+ ivpu_err(vdev, "Failed to allocate BO: %pe (ctx %u size %llu flags 0x%x)",
bo, file_priv->ctx.id, args->size, args->flags);
return PTR_ERR(bo);
}
@@ -289,33 +288,28 @@ int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *fi
}
struct ivpu_bo *
-ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 flags)
+ivpu_bo_create(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
+ struct ivpu_addr_range *range, u64 size, u32 flags)
{
- const struct ivpu_addr_range *range;
- struct ivpu_addr_range fixed_range;
struct iosys_map map;
struct ivpu_bo *bo;
int ret;
- drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(vpu_addr));
- drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(size));
+ if (drm_WARN_ON(&vdev->drm, !range))
+ return NULL;
- if (vpu_addr) {
- fixed_range.start = vpu_addr;
- fixed_range.end = vpu_addr + size;
- range = &fixed_range;
- } else {
- range = &vdev->hw->ranges.global;
- }
+ drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(range->start));
+ drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(range->end));
+ drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(size));
- bo = ivpu_bo_create(vdev, size, flags);
+ bo = ivpu_bo_alloc(vdev, size, flags);
if (IS_ERR(bo)) {
- ivpu_err(vdev, "Failed to create BO: %pe (vpu_addr 0x%llx size %llu flags 0x%x)",
- bo, vpu_addr, size, flags);
+ ivpu_err(vdev, "Failed to allocate BO: %pe (vpu_addr 0x%llx size %llu flags 0x%x)",
+ bo, range->start, size, flags);
return NULL;
}
- ret = ivpu_bo_alloc_vpu_addr(bo, &vdev->gctx, range);
+ ret = ivpu_bo_alloc_vpu_addr(bo, ctx, range);
if (ret)
goto err_put;
@@ -323,11 +317,14 @@ ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 fla
if (ret)
goto err_put;
- dma_resv_lock(bo->base.base.resv, NULL);
- ret = drm_gem_shmem_vmap(&bo->base, &map);
- dma_resv_unlock(bo->base.base.resv);
- if (ret)
- goto err_put;
+ if (flags & DRM_IVPU_BO_MAPPABLE) {
+ dma_resv_lock(bo->base.base.resv, NULL);
+ ret = drm_gem_shmem_vmap(&bo->base, &map);
+ dma_resv_unlock(bo->base.base.resv);
+
+ if (ret)
+ goto err_put;
+ }
return bo;
@@ -336,13 +333,20 @@ err_put:
return NULL;
}
-void ivpu_bo_free_internal(struct ivpu_bo *bo)
+struct ivpu_bo *ivpu_bo_create_global(struct ivpu_device *vdev, u64 size, u32 flags)
+{
+ return ivpu_bo_create(vdev, &vdev->gctx, &vdev->hw->ranges.global, size, flags);
+}
+
+void ivpu_bo_free(struct ivpu_bo *bo)
{
struct iosys_map map = IOSYS_MAP_INIT_VADDR(bo->base.vaddr);
- dma_resv_lock(bo->base.base.resv, NULL);
- drm_gem_shmem_vunmap(&bo->base, &map);
- dma_resv_unlock(bo->base.base.resv);
+ if (bo->flags & DRM_IVPU_BO_MAPPABLE) {
+ dma_resv_lock(bo->base.base.resv, NULL);
+ drm_gem_shmem_vunmap(&bo->base, &map);
+ dma_resv_unlock(bo->base.base.resv);
+ }
drm_gem_object_put(&bo->base.base);
}
diff --git a/drivers/accel/ivpu/ivpu_gem.h b/drivers/accel/ivpu/ivpu_gem.h
index a8559211c7..fb7117c13e 100644
--- a/drivers/accel/ivpu/ivpu_gem.h
+++ b/drivers/accel/ivpu/ivpu_gem.h
@@ -28,8 +28,10 @@ int ivpu_bo_pin(struct ivpu_bo *bo);
void ivpu_bo_unbind_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx);
struct drm_gem_object *ivpu_gem_create_object(struct drm_device *dev, size_t size);
-struct ivpu_bo *ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 flags);
-void ivpu_bo_free_internal(struct ivpu_bo *bo);
+struct ivpu_bo *ivpu_bo_create(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
+ struct ivpu_addr_range *range, u64 size, u32 flags);
+struct ivpu_bo *ivpu_bo_create_global(struct ivpu_device *vdev, u64 size, u32 flags);
+void ivpu_bo_free(struct ivpu_bo *bo);
int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
diff --git a/drivers/accel/ivpu/ivpu_hw_37xx.c b/drivers/accel/ivpu/ivpu_hw_37xx.c
index 5e392b6823..bd25e2d9fb 100644
--- a/drivers/accel/ivpu/ivpu_hw_37xx.c
+++ b/drivers/accel/ivpu/ivpu_hw_37xx.c
@@ -13,7 +13,7 @@
#include "ivpu_pm.h"
#define TILE_FUSE_ENABLE_BOTH 0x0
-#define TILE_SKU_BOTH_MTL 0x3630
+#define TILE_SKU_BOTH 0x3630
/* Work point configuration values */
#define CONFIG_1_TILE 0x01
@@ -226,7 +226,7 @@ static int ivpu_pll_drive(struct ivpu_device *vdev, bool enable)
ret = ivpu_hw_37xx_wait_for_vpuip_bar(vdev);
if (ret) {
- ivpu_err(vdev, "Timed out waiting for VPUIP bar\n");
+ ivpu_err(vdev, "Timed out waiting for NPU IP bar\n");
return ret;
}
}
@@ -587,7 +587,7 @@ static int ivpu_hw_37xx_info_init(struct ivpu_device *vdev)
struct ivpu_hw_info *hw = vdev->hw;
hw->tile_fuse = TILE_FUSE_ENABLE_BOTH;
- hw->sku = TILE_SKU_BOTH_MTL;
+ hw->sku = TILE_SKU_BOTH;
hw->config = WP_CONFIG_2_TILE_4_3_RATIO;
ivpu_pll_init_frequency_ratios(vdev);
@@ -760,10 +760,10 @@ static int ivpu_hw_37xx_power_down(struct ivpu_device *vdev)
ivpu_hw_37xx_save_d0i3_entry_timestamp(vdev);
if (!ivpu_hw_37xx_is_idle(vdev))
- ivpu_warn(vdev, "VPU not idle during power down\n");
+ ivpu_warn(vdev, "NPU not idle during power down\n");
if (ivpu_hw_37xx_reset(vdev)) {
- ivpu_err(vdev, "Failed to reset VPU\n");
+ ivpu_err(vdev, "Failed to reset NPU\n");
ret = -EIO;
}
diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c
index 0be353ad87..b0b88d4c89 100644
--- a/drivers/accel/ivpu/ivpu_hw_40xx.c
+++ b/drivers/accel/ivpu/ivpu_hw_40xx.c
@@ -80,11 +80,11 @@ static char *ivpu_platform_to_str(u32 platform)
{
switch (platform) {
case IVPU_PLATFORM_SILICON:
- return "IVPU_PLATFORM_SILICON";
+ return "SILICON";
case IVPU_PLATFORM_SIMICS:
- return "IVPU_PLATFORM_SIMICS";
+ return "SIMICS";
case IVPU_PLATFORM_FPGA:
- return "IVPU_PLATFORM_FPGA";
+ return "FPGA";
default:
return "Invalid platform";
}
@@ -768,7 +768,7 @@ static int ivpu_hw_40xx_reset(struct ivpu_device *vdev)
int ret = 0;
if (ivpu_hw_40xx_ip_reset(vdev)) {
- ivpu_err(vdev, "Failed to reset VPU IP\n");
+ ivpu_err(vdev, "Failed to reset NPU IP\n");
ret = -EIO;
}
@@ -926,7 +926,7 @@ static int ivpu_hw_40xx_power_down(struct ivpu_device *vdev)
ivpu_hw_40xx_save_d0i3_entry_timestamp(vdev);
if (!ivpu_hw_40xx_is_idle(vdev) && ivpu_hw_40xx_ip_reset(vdev))
- ivpu_warn(vdev, "Failed to reset the VPU\n");
+ ivpu_warn(vdev, "Failed to reset the NPU\n");
if (ivpu_pll_disable(vdev)) {
ivpu_err(vdev, "Failed to disable PLL\n");
diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c
index f4c5392483..56ff067f63 100644
--- a/drivers/accel/ivpu/ivpu_ipc.c
+++ b/drivers/accel/ivpu/ivpu_ipc.c
@@ -58,8 +58,8 @@ static void ivpu_ipc_mem_fini(struct ivpu_device *vdev)
{
struct ivpu_ipc_info *ipc = vdev->ipc;
- ivpu_bo_free_internal(ipc->mem_rx);
- ivpu_bo_free_internal(ipc->mem_tx);
+ ivpu_bo_free(ipc->mem_rx);
+ ivpu_bo_free(ipc->mem_tx);
}
static int
@@ -471,13 +471,13 @@ int ivpu_ipc_init(struct ivpu_device *vdev)
struct ivpu_ipc_info *ipc = vdev->ipc;
int ret;
- ipc->mem_tx = ivpu_bo_alloc_internal(vdev, 0, SZ_16K, DRM_IVPU_BO_WC);
+ ipc->mem_tx = ivpu_bo_create_global(vdev, SZ_16K, DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE);
if (!ipc->mem_tx) {
ivpu_err(vdev, "Failed to allocate mem_tx\n");
return -ENOMEM;
}
- ipc->mem_rx = ivpu_bo_alloc_internal(vdev, 0, SZ_16K, DRM_IVPU_BO_WC);
+ ipc->mem_rx = ivpu_bo_create_global(vdev, SZ_16K, DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE);
if (!ipc->mem_rx) {
ivpu_err(vdev, "Failed to allocate mem_rx\n");
ret = -ENOMEM;
@@ -510,9 +510,9 @@ int ivpu_ipc_init(struct ivpu_device *vdev)
return 0;
err_free_rx:
- ivpu_bo_free_internal(ipc->mem_rx);
+ ivpu_bo_free(ipc->mem_rx);
err_free_tx:
- ivpu_bo_free_internal(ipc->mem_tx);
+ ivpu_bo_free(ipc->mem_tx);
return ret;
}
diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c
index e70cfb8593..a49bc9105e 100644
--- a/drivers/accel/ivpu/ivpu_job.c
+++ b/drivers/accel/ivpu/ivpu_job.c
@@ -30,19 +30,26 @@ static void ivpu_cmdq_ring_db(struct ivpu_device *vdev, struct ivpu_cmdq *cmdq)
static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv, u16 engine)
{
+ struct xa_limit db_xa_limit = {.max = IVPU_MAX_DB, .min = IVPU_MIN_DB};
struct ivpu_device *vdev = file_priv->vdev;
struct vpu_job_queue_header *jobq_header;
struct ivpu_cmdq *cmdq;
+ int ret;
cmdq = kzalloc(sizeof(*cmdq), GFP_KERNEL);
if (!cmdq)
return NULL;
- cmdq->mem = ivpu_bo_alloc_internal(vdev, 0, SZ_4K, DRM_IVPU_BO_WC);
+ ret = xa_alloc(&vdev->db_xa, &cmdq->db_id, NULL, db_xa_limit, GFP_KERNEL);
+ if (ret) {
+ ivpu_err(vdev, "Failed to allocate doorbell id: %d\n", ret);
+ goto err_free_cmdq;
+ }
+
+ cmdq->mem = ivpu_bo_create_global(vdev, SZ_4K, DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE);
if (!cmdq->mem)
- goto cmdq_free;
+ goto err_erase_xa;
- cmdq->db_id = file_priv->ctx.id + engine * ivpu_get_context_count(vdev);
cmdq->entry_count = (u32)((ivpu_bo_size(cmdq->mem) - sizeof(struct vpu_job_queue_header)) /
sizeof(struct vpu_job_queue_entry));
@@ -55,7 +62,9 @@ static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv, u16 e
return cmdq;
-cmdq_free:
+err_erase_xa:
+ xa_erase(&vdev->db_xa, cmdq->db_id);
+err_free_cmdq:
kfree(cmdq);
return NULL;
}
@@ -65,7 +74,8 @@ static void ivpu_cmdq_free(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *c
if (!cmdq)
return;
- ivpu_bo_free_internal(cmdq->mem);
+ ivpu_bo_free(cmdq->mem);
+ xa_erase(&file_priv->vdev->db_xa, cmdq->db_id);
kfree(cmdq);
}
diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c
index 64618fc2ce..4f5ea46673 100644
--- a/drivers/accel/ivpu/ivpu_pm.c
+++ b/drivers/accel/ivpu/ivpu_pm.c
@@ -22,7 +22,7 @@
static bool ivpu_disable_recovery;
module_param_named_unsafe(disable_recovery, ivpu_disable_recovery, bool, 0644);
-MODULE_PARM_DESC(disable_recovery, "Disables recovery when VPU hang is detected");
+MODULE_PARM_DESC(disable_recovery, "Disables recovery when NPU hang is detected");
static unsigned long ivpu_tdr_timeout_ms;
module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, ulong, 0644);
@@ -116,11 +116,11 @@ static void ivpu_pm_recovery_work(struct work_struct *work)
char *evt[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL};
int ret;
- ivpu_err(vdev, "Recovering the VPU (reset #%d)\n", atomic_read(&vdev->pm->reset_counter));
+ ivpu_err(vdev, "Recovering the NPU (reset #%d)\n", atomic_read(&vdev->pm->reset_counter));
ret = pm_runtime_resume_and_get(vdev->drm.dev);
if (ret)
- ivpu_err(vdev, "Failed to resume VPU: %d\n", ret);
+ ivpu_err(vdev, "Failed to resume NPU: %d\n", ret);
ivpu_fw_log_dump(vdev);
@@ -258,10 +258,10 @@ int ivpu_pm_runtime_suspend_cb(struct device *dev)
ret = ivpu_suspend(vdev);
if (ret)
- ivpu_err(vdev, "Failed to set suspend VPU: %d\n", ret);
+ ivpu_err(vdev, "Failed to suspend NPU: %d\n", ret);
if (!hw_is_idle) {
- ivpu_err(vdev, "VPU failed to enter idle, force suspended.\n");
+ ivpu_err(vdev, "NPU failed to enter idle, force suspended.\n");
ivpu_fw_log_dump(vdev);
ivpu_pm_prepare_cold_boot(vdev);
} else {
@@ -307,7 +307,7 @@ int ivpu_rpm_get_if_active(struct ivpu_device *vdev)
{
int ret;
- ret = pm_runtime_get_if_active(vdev->drm.dev, false);
+ ret = pm_runtime_get_if_in_use(vdev->drm.dev);
drm_WARN_ON(&vdev->drm, ret < 0);
return ret;
diff --git a/drivers/accel/ivpu/vpu_boot_api.h b/drivers/accel/ivpu/vpu_boot_api.h
index 04c9542585..87cac7bc73 100644
--- a/drivers/accel/ivpu/vpu_boot_api.h
+++ b/drivers/accel/ivpu/vpu_boot_api.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: MIT */
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (c) 2020-2023, Intel Corporation.
*/
#ifndef VPU_BOOT_API_H
@@ -27,12 +27,12 @@
* Minor version changes when API backward compatibility is preserved.
* Resets to 0 if Major version is incremented.
*/
-#define VPU_BOOT_API_VER_MINOR 20
+#define VPU_BOOT_API_VER_MINOR 22
/*
* API header changed (field names, documentation, formatting) but API itself has not been changed
*/
-#define VPU_BOOT_API_VER_PATCH 4
+#define VPU_BOOT_API_VER_PATCH 0
/*
* Index in the API version table
@@ -41,7 +41,7 @@
#define VPU_BOOT_API_VER_INDEX 0
/* ------------ FW API version information end ---------------------*/
-#pragma pack(push, 1)
+#pragma pack(push, 4)
/*
* Firmware image header format
@@ -66,9 +66,17 @@ struct vpu_firmware_header {
/* Size of memory require for firmware execution */
u32 runtime_size;
u32 shave_nn_fw_size;
- /* Size of primary preemption buffer. */
+ /*
+ * Size of primary preemption buffer, assuming a 2-job submission queue.
+ * NOTE: host driver is expected to adapt size accordingly to actual
+ * submission queue size and device capabilities.
+ */
u32 preemption_buffer_1_size;
- /* Size of secondary preemption buffer. */
+ /*
+ * Size of secondary preemption buffer, assuming a 2-job submission queue.
+ * NOTE: host driver is expected to adapt size accordingly to actual
+ * submission queue size and device capabilities.
+ */
u32 preemption_buffer_2_size;
/* Space reserved for future preemption-related fields. */
u32 preemption_reserved[6];
@@ -181,10 +189,10 @@ struct vpu_warm_boot_section {
#define VPU_PRESENT_CALL_PERIOD_MS_MAX 10000
/**
- * Macros to enable various operation modes within the VPU.
+ * Macros to enable various power profiles within the NPU.
* To be defined as part of 32 bit mask.
*/
-#define VPU_OP_MODE_SURVIVABILITY 0x1
+#define POWER_PROFILE_SURVIVABILITY 0x1
struct vpu_boot_params {
u32 magic;
@@ -317,7 +325,15 @@ struct vpu_boot_params {
u64 d0i3_residency_time_us;
/* Value of VPU perf counter at the time of entering D0i3 state . */
u64 d0i3_entry_vpu_ts;
- u32 pad4[20];
+ /*
+ * The system time of the host operating system in microseconds.
+ * E.g the number of microseconds since 1st of January 1970, or whatever date the
+ * host operating system uses to maintain system time.
+ * This value will be used to track system time on the VPU.
+ * The KMD is required to update this value on every VPU reset.
+ */
+ u64 system_time_us;
+ u32 pad4[18];
/* Warm boot information: 0x400 - 0x43F */
u32 warm_boot_sections_count;
u32 warm_boot_start_address_reference;
@@ -344,10 +360,14 @@ struct vpu_boot_params {
u32 vpu_focus_present_timer_ms;
/* VPU ECC Signaling */
u32 vpu_uses_ecc_mca_signal;
- /* Values defined by VPU_OP_MODE* macros */
- u32 vpu_operation_mode;
- /* Unused/reserved: 0x480 - 0xFFF */
- u32 pad6[736];
+ /* Values defined by POWER_PROFILE* macros */
+ u32 power_profile;
+ /* Microsecond value for DCT active cycle */
+ u32 dct_active_us;
+ /* Microsecond value for DCT inactive cycle */
+ u32 dct_inactive_us;
+ /* Unused/reserved: 0x488 - 0xFFF */
+ u32 pad6[734];
};
/*
diff --git a/drivers/accel/ivpu/vpu_jsm_api.h b/drivers/accel/ivpu/vpu_jsm_api.h
index 7da7622742..e46f353121 100644
--- a/drivers/accel/ivpu/vpu_jsm_api.h
+++ b/drivers/accel/ivpu/vpu_jsm_api.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: MIT */
/*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (c) 2020-2023, Intel Corporation.
*/
/**
@@ -27,7 +27,7 @@
/*
* API header changed (field names, documentation, formatting) but API itself has not been changed
*/
-#define VPU_JSM_API_VER_PATCH 0
+#define VPU_JSM_API_VER_PATCH 6
/*
* Index in the API version table
@@ -43,8 +43,11 @@
/* Max number of impacted contexts that can be dealt with the engine reset command */
#define VPU_MAX_ENGINE_RESET_IMPACTED_CONTEXTS 3
-/** Pack the API structures for now, once alignment issues are fixed this can be removed */
-#pragma pack(push, 1)
+/*
+ * Pack the API structures to enforce binary compatibility
+ * Align to 8 bytes for optimal performance
+ */
+#pragma pack(push, 8)
/*
* Engine indexes.
@@ -125,6 +128,19 @@
#define VPU_HWS_MAX_REALTIME_PRIORITY_LEVEL 31U
/*
+ * vpu_jsm_engine_reset_context flag definitions
+ */
+#define VPU_ENGINE_RESET_CONTEXT_FLAG_COLLATERAL_DAMAGE_MASK BIT(0)
+#define VPU_ENGINE_RESET_CONTEXT_HANG_PRIMARY_CAUSE 0
+#define VPU_ENGINE_RESET_CONTEXT_COLLATERAL_DAMAGE 1
+
+/*
+ * Invalid command queue handle identifier. Applies to cmdq_id and cmdq_group
+ * in this API.
+ */
+#define VPU_HWS_INVALID_CMDQ_HANDLE 0ULL
+
+/*
* Job format.
*/
struct vpu_job_queue_entry {
@@ -613,7 +629,7 @@ struct vpu_jsm_engine_reset_context {
u32 reserved_0;
/* Command queue id */
u64 cmdq_id;
- /* Flags: 0: cause of hang; 1: collateral damage of reset */
+ /* See VPU_ENGINE_RESET_CONTEXT_* defines */
u64 flags;
};
@@ -730,11 +746,7 @@ struct vpu_ipc_msg_payload_hws_create_cmdq {
u32 host_ssid;
/* Engine for which queue is being created */
u32 engine_idx;
- /*
- * Cmdq group may be set to 0 or equal to
- * cmdq_id while each priority band contains
- * only single engine instances.
- */
+ /* Cmdq group: only used for HWS logging of state changes */
u64 cmdq_group;
/* Command queue id */
u64 cmdq_id;
diff --git a/drivers/accel/qaic/mhi_controller.c b/drivers/accel/qaic/mhi_controller.c
index cb77d048ed..ada9b1eb07 100644
--- a/drivers/accel/qaic/mhi_controller.c
+++ b/drivers/accel/qaic/mhi_controller.c
@@ -20,7 +20,7 @@ static unsigned int mhi_timeout_ms = 2000; /* 2 sec default */
module_param(mhi_timeout_ms, uint, 0600);
MODULE_PARM_DESC(mhi_timeout_ms, "MHI controller timeout value");
-static struct mhi_channel_config aic100_channels[] = {
+static const struct mhi_channel_config aic100_channels[] = {
{
.name = "QAIC_LOOPBACK",
.num = 0,
@@ -358,8 +358,8 @@ static struct mhi_channel_config aic100_channels[] = {
.wake_capable = false,
},
{
- .num = 21,
.name = "QAIC_TIMESYNC",
+ .num = 21,
.num_elements = 32,
.local_elements = 0,
.event_ring = 0,
@@ -390,8 +390,8 @@ static struct mhi_channel_config aic100_channels[] = {
.wake_capable = false,
},
{
- .num = 23,
.name = "QAIC_TIMESYNC_PERIODIC",
+ .num = 23,
.num_elements = 32,
.local_elements = 0,
.event_ring = 0,
diff --git a/drivers/accel/qaic/qaic.h b/drivers/accel/qaic/qaic.h
index 582836f953..9256653b30 100644
--- a/drivers/accel/qaic/qaic.h
+++ b/drivers/accel/qaic/qaic.h
@@ -30,6 +30,7 @@
#define to_qaic_drm_device(dev) container_of(dev, struct qaic_drm_device, drm)
#define to_drm(qddev) (&(qddev)->drm)
#define to_accel_kdev(qddev) (to_drm(qddev)->accel->kdev) /* Return Linux device of accel node */
+#define to_qaic_device(dev) (to_qaic_drm_device((dev))->qdev)
enum __packed dev_states {
/* Device is offline or will be very soon */
@@ -191,8 +192,6 @@ struct qaic_bo {
u32 nr_slice;
/* Number of slice that have been transferred by DMA engine */
u32 nr_slice_xfer_done;
- /* true = BO is queued for execution, true = BO is not queued */
- bool queued;
/*
* If true then user has attached slicing information to this BO by
* calling DRM_IOCTL_QAIC_ATTACH_SLICE_BO ioctl.
diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c
index 03c9a793da..2459fe4a3f 100644
--- a/drivers/accel/qaic/qaic_data.c
+++ b/drivers/accel/qaic/qaic_data.c
@@ -141,6 +141,11 @@ struct dbc_rsp {
__le16 status;
} __packed;
+static inline bool bo_queued(struct qaic_bo *bo)
+{
+ return !list_empty(&bo->xfer_list);
+}
+
inline int get_dbc_req_elem_size(void)
{
return sizeof(struct dbc_req);
@@ -569,6 +574,9 @@ static void qaic_free_sgt(struct sg_table *sgt)
{
struct scatterlist *sg;
+ if (!sgt)
+ return;
+
for (sg = sgt->sgl; sg; sg = sg_next(sg))
if (sg_page(sg))
__free_pages(sg_page(sg), get_order(sg->length));
@@ -648,6 +656,7 @@ static void qaic_init_bo(struct qaic_bo *bo, bool reinit)
}
complete_all(&bo->xfer_done);
INIT_LIST_HEAD(&bo->slices);
+ INIT_LIST_HEAD(&bo->xfer_list);
}
static struct qaic_bo *qaic_alloc_init_bo(void)
@@ -709,9 +718,13 @@ int qaic_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *fi
if (ret)
goto free_bo;
+ ret = drm_gem_create_mmap_offset(obj);
+ if (ret)
+ goto free_bo;
+
ret = drm_gem_handle_create(file_priv, obj, &args->handle);
if (ret)
- goto free_sgt;
+ goto free_bo;
bo->handle = args->handle;
drm_gem_object_put(obj);
@@ -720,10 +733,8 @@ int qaic_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *fi
return 0;
-free_sgt:
- qaic_free_sgt(bo->sgt);
free_bo:
- kfree(bo);
+ drm_gem_object_put(obj);
unlock_dev_srcu:
srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
unlock_usr_srcu:
@@ -738,7 +749,7 @@ int qaic_mmap_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file
struct drm_gem_object *obj;
struct qaic_device *qdev;
struct qaic_user *usr;
- int ret;
+ int ret = 0;
usr = file_priv->driver_priv;
usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
@@ -760,9 +771,7 @@ int qaic_mmap_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file
goto unlock_dev_srcu;
}
- ret = drm_gem_create_mmap_offset(obj);
- if (ret == 0)
- args->offset = drm_vma_node_offset_addr(&obj->vma_node);
+ args->offset = drm_vma_node_offset_addr(&obj->vma_node);
drm_gem_object_put(obj);
@@ -828,9 +837,6 @@ static int qaic_prepare_import_bo(struct qaic_bo *bo, struct qaic_attach_slice_h
struct sg_table *sgt;
int ret;
- if (obj->import_attach->dmabuf->size < hdr->size)
- return -EINVAL;
-
sgt = dma_buf_map_attachment(obj->import_attach, hdr->dir);
if (IS_ERR(sgt)) {
ret = PTR_ERR(sgt);
@@ -847,9 +853,6 @@ static int qaic_prepare_export_bo(struct qaic_device *qdev, struct qaic_bo *bo,
{
int ret;
- if (bo->base.size < hdr->size)
- return -EINVAL;
-
ret = dma_map_sgtable(&qdev->pdev->dev, bo->sgt, hdr->dir, 0);
if (ret)
return -EFAULT;
@@ -950,9 +953,6 @@ int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi
if (arg_size / args->hdr.count != sizeof(*slice_ent))
return -EINVAL;
- if (args->hdr.size == 0)
- return -EINVAL;
-
if (!(args->hdr.dir == DMA_TO_DEVICE || args->hdr.dir == DMA_FROM_DEVICE))
return -EINVAL;
@@ -992,16 +992,16 @@ int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi
goto free_slice_ent;
}
- ret = qaic_validate_req(qdev, slice_ent, args->hdr.count, args->hdr.size);
- if (ret)
- goto free_slice_ent;
-
obj = drm_gem_object_lookup(file_priv, args->hdr.handle);
if (!obj) {
ret = -ENOENT;
goto free_slice_ent;
}
+ ret = qaic_validate_req(qdev, slice_ent, args->hdr.count, obj->size);
+ if (ret)
+ goto put_bo;
+
bo = to_qaic_bo(obj);
ret = mutex_lock_interruptible(&bo->lock);
if (ret)
@@ -1173,7 +1173,6 @@ static int send_bo_list_to_device(struct qaic_device *qdev, struct drm_file *fil
struct bo_slice *slice;
unsigned long flags;
struct qaic_bo *bo;
- bool queued;
int i, j;
int ret;
@@ -1205,9 +1204,7 @@ static int send_bo_list_to_device(struct qaic_device *qdev, struct drm_file *fil
}
spin_lock_irqsave(&dbc->xfer_lock, flags);
- queued = bo->queued;
- bo->queued = true;
- if (queued) {
+ if (bo_queued(bo)) {
spin_unlock_irqrestore(&dbc->xfer_lock, flags);
ret = -EINVAL;
goto unlock_bo;
@@ -1230,7 +1227,6 @@ static int send_bo_list_to_device(struct qaic_device *qdev, struct drm_file *fil
else
ret = copy_exec_reqs(qdev, slice, dbc->id, head, tail);
if (ret) {
- bo->queued = false;
spin_unlock_irqrestore(&dbc->xfer_lock, flags);
goto unlock_bo;
}
@@ -1253,8 +1249,7 @@ failed_to_send_bo:
spin_lock_irqsave(&dbc->xfer_lock, flags);
bo = list_last_entry(&dbc->xfer_list, struct qaic_bo, xfer_list);
obj = &bo->base;
- bo->queued = false;
- list_del(&bo->xfer_list);
+ list_del_init(&bo->xfer_list);
spin_unlock_irqrestore(&dbc->xfer_lock, flags);
dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, bo->dir);
drm_gem_object_put(obj);
@@ -1615,8 +1610,7 @@ read_fifo:
*/
dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, bo->dir);
bo->nr_slice_xfer_done = 0;
- bo->queued = false;
- list_del(&bo->xfer_list);
+ list_del_init(&bo->xfer_list);
bo->perf_stats.req_processed_ts = ktime_get_ns();
complete_all(&bo->xfer_done);
drm_gem_object_put(&bo->base);
@@ -1875,7 +1869,7 @@ int qaic_detach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi
/* Check if BO is committed to H/W for DMA */
spin_lock_irqsave(&dbc->xfer_lock, flags);
- if (bo->queued) {
+ if (bo_queued(bo)) {
spin_unlock_irqrestore(&dbc->xfer_lock, flags);
ret = -EBUSY;
goto unlock_ch_srcu;
@@ -1905,8 +1899,7 @@ static void empty_xfer_list(struct qaic_device *qdev, struct dma_bridge_chan *db
spin_lock_irqsave(&dbc->xfer_lock, flags);
while (!list_empty(&dbc->xfer_list)) {
bo = list_first_entry(&dbc->xfer_list, typeof(*bo), xfer_list);
- bo->queued = false;
- list_del(&bo->xfer_list);
+ list_del_init(&bo->xfer_list);
spin_unlock_irqrestore(&dbc->xfer_lock, flags);
bo->nr_slice_xfer_done = 0;
bo->req_id = 0;
diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c
index 2a313eb69b..d1a632dbae 100644
--- a/drivers/accel/qaic/qaic_drv.c
+++ b/drivers/accel/qaic/qaic_drv.c
@@ -44,6 +44,53 @@ MODULE_PARM_DESC(datapath_polling, "Operate the datapath in polling mode");
static bool link_up;
static DEFINE_IDA(qaic_usrs);
+static void qaicm_wq_release(struct drm_device *dev, void *res)
+{
+ struct workqueue_struct *wq = res;
+
+ destroy_workqueue(wq);
+}
+
+static struct workqueue_struct *qaicm_wq_init(struct drm_device *dev, const char *fmt)
+{
+ struct workqueue_struct *wq;
+ int ret;
+
+ wq = alloc_workqueue(fmt, WQ_UNBOUND, 0);
+ if (!wq)
+ return ERR_PTR(-ENOMEM);
+ ret = drmm_add_action_or_reset(dev, qaicm_wq_release, wq);
+ if (ret)
+ return ERR_PTR(ret);
+
+ return wq;
+}
+
+static void qaicm_srcu_release(struct drm_device *dev, void *res)
+{
+ struct srcu_struct *lock = res;
+
+ cleanup_srcu_struct(lock);
+}
+
+static int qaicm_srcu_init(struct drm_device *dev, struct srcu_struct *lock)
+{
+ int ret;
+
+ ret = init_srcu_struct(lock);
+ if (ret)
+ return ret;
+
+ return drmm_add_action_or_reset(dev, qaicm_srcu_release, lock);
+}
+
+static void qaicm_pci_release(struct drm_device *dev, void *res)
+{
+ struct qaic_device *qdev = to_qaic_device(dev);
+
+ pci_set_drvdata(qdev->pdev, NULL);
+}
+
static void free_usr(struct kref *kref)
{
struct qaic_user *usr = container_of(kref, struct qaic_user, ref_count);
@@ -299,74 +346,73 @@ void qaic_dev_reset_clean_local_state(struct qaic_device *qdev)
release_dbc(qdev, i);
}
-static void cleanup_qdev(struct qaic_device *qdev)
-{
- int i;
-
- for (i = 0; i < qdev->num_dbc; ++i)
- cleanup_srcu_struct(&qdev->dbc[i].ch_lock);
- cleanup_srcu_struct(&qdev->dev_lock);
- pci_set_drvdata(qdev->pdev, NULL);
- destroy_workqueue(qdev->cntl_wq);
- destroy_workqueue(qdev->qts_wq);
-}
-
static struct qaic_device *create_qdev(struct pci_dev *pdev, const struct pci_device_id *id)
{
+ struct device *dev = &pdev->dev;
struct qaic_drm_device *qddev;
struct qaic_device *qdev;
- int i;
+ struct drm_device *drm;
+ int i, ret;
- qdev = devm_kzalloc(&pdev->dev, sizeof(*qdev), GFP_KERNEL);
+ qdev = devm_kzalloc(dev, sizeof(*qdev), GFP_KERNEL);
if (!qdev)
return NULL;
qdev->dev_state = QAIC_OFFLINE;
if (id->device == PCI_DEV_AIC100) {
qdev->num_dbc = 16;
- qdev->dbc = devm_kcalloc(&pdev->dev, qdev->num_dbc, sizeof(*qdev->dbc), GFP_KERNEL);
+ qdev->dbc = devm_kcalloc(dev, qdev->num_dbc, sizeof(*qdev->dbc), GFP_KERNEL);
if (!qdev->dbc)
return NULL;
}
- qdev->cntl_wq = alloc_workqueue("qaic_cntl", WQ_UNBOUND, 0);
- if (!qdev->cntl_wq)
+ qddev = devm_drm_dev_alloc(&pdev->dev, &qaic_accel_driver, struct qaic_drm_device, drm);
+ if (IS_ERR(qddev))
+ return NULL;
+
+ drm = to_drm(qddev);
+ pci_set_drvdata(pdev, qdev);
+
+ ret = drmm_mutex_init(drm, &qddev->users_mutex);
+ if (ret)
+ return NULL;
+ ret = drmm_add_action_or_reset(drm, qaicm_pci_release, NULL);
+ if (ret)
+ return NULL;
+ ret = drmm_mutex_init(drm, &qdev->cntl_mutex);
+ if (ret)
return NULL;
- qdev->qts_wq = alloc_workqueue("qaic_ts", WQ_UNBOUND, 0);
- if (!qdev->qts_wq) {
- destroy_workqueue(qdev->cntl_wq);
+ qdev->cntl_wq = qaicm_wq_init(drm, "qaic_cntl");
+ if (IS_ERR(qdev->cntl_wq))
+ return NULL;
+ qdev->qts_wq = qaicm_wq_init(drm, "qaic_ts");
+ if (IS_ERR(qdev->qts_wq))
return NULL;
- }
- pci_set_drvdata(pdev, qdev);
+ ret = qaicm_srcu_init(drm, &qdev->dev_lock);
+ if (ret)
+ return NULL;
+
+ qdev->qddev = qddev;
qdev->pdev = pdev;
+ qddev->qdev = qdev;
- mutex_init(&qdev->cntl_mutex);
INIT_LIST_HEAD(&qdev->cntl_xfer_list);
- init_srcu_struct(&qdev->dev_lock);
+ INIT_LIST_HEAD(&qddev->users);
for (i = 0; i < qdev->num_dbc; ++i) {
spin_lock_init(&qdev->dbc[i].xfer_lock);
qdev->dbc[i].qdev = qdev;
qdev->dbc[i].id = i;
INIT_LIST_HEAD(&qdev->dbc[i].xfer_list);
- init_srcu_struct(&qdev->dbc[i].ch_lock);
+ ret = qaicm_srcu_init(drm, &qdev->dbc[i].ch_lock);
+ if (ret)
+ return NULL;
init_waitqueue_head(&qdev->dbc[i].dbc_release);
INIT_LIST_HEAD(&qdev->dbc[i].bo_lists);
}
- qddev = devm_drm_dev_alloc(&pdev->dev, &qaic_accel_driver, struct qaic_drm_device, drm);
- if (IS_ERR(qddev)) {
- cleanup_qdev(qdev);
- return NULL;
- }
-
- drmm_mutex_init(to_drm(qddev), &qddev->users_mutex);
- INIT_LIST_HEAD(&qddev->users);
- qddev->qdev = qdev;
- qdev->qddev = qddev;
-
return qdev;
}
@@ -472,35 +518,28 @@ static int qaic_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
ret = init_pci(qdev, pdev);
if (ret)
- goto cleanup_qdev;
+ return ret;
for (i = 0; i < qdev->num_dbc; ++i)
qdev->dbc[i].dbc_base = qdev->bar_2 + QAIC_DBC_OFF(i);
mhi_irq = init_msi(qdev, pdev);
- if (mhi_irq < 0) {
- ret = mhi_irq;
- goto cleanup_qdev;
- }
+ if (mhi_irq < 0)
+ return mhi_irq;
ret = qaic_create_drm_device(qdev, QAIC_NO_PARTITION);
if (ret)
- goto cleanup_qdev;
+ return ret;
qdev->mhi_cntrl = qaic_mhi_register_controller(pdev, qdev->bar_0, mhi_irq,
qdev->single_msi);
if (IS_ERR(qdev->mhi_cntrl)) {
ret = PTR_ERR(qdev->mhi_cntrl);
- goto cleanup_drm_dev;
+ qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION);
+ return ret;
}
return 0;
-
-cleanup_drm_dev:
- qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION);
-cleanup_qdev:
- cleanup_qdev(qdev);
- return ret;
}
static void qaic_pci_remove(struct pci_dev *pdev)
@@ -511,9 +550,8 @@ static void qaic_pci_remove(struct pci_dev *pdev)
return;
qaic_dev_reset_clean_local_state(qdev);
- qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION);
qaic_mhi_free_controller(qdev->mhi_cntrl, link_up);
- cleanup_qdev(qdev);
+ qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION);
}
static void qaic_pci_shutdown(struct pci_dev *pdev)