diff options
Diffstat (limited to '')
58 files changed, 2508 insertions, 1546 deletions
diff --git a/drivers/accel/drm_accel.c b/drivers/accel/drm_accel.c index 4a9baf0243..294b572a9c 100644 --- a/drivers/accel/drm_accel.c +++ b/drivers/accel/drm_accel.c @@ -21,7 +21,6 @@ static DEFINE_SPINLOCK(accel_minor_lock); static struct idr accel_minors_idr; static struct dentry *accel_debugfs_root; -static struct class *accel_class; static struct device_type accel_sysfs_device_minor = { .name = "accel_minor" @@ -32,23 +31,19 @@ static char *accel_devnode(const struct device *dev, umode_t *mode) return kasprintf(GFP_KERNEL, "accel/%s", dev_name(dev)); } +static const struct class accel_class = { + .name = "accel", + .devnode = accel_devnode, +}; + static int accel_sysfs_init(void) { - accel_class = class_create("accel"); - if (IS_ERR(accel_class)) - return PTR_ERR(accel_class); - - accel_class->devnode = accel_devnode; - - return 0; + return class_register(&accel_class); } static void accel_sysfs_destroy(void) { - if (IS_ERR_OR_NULL(accel_class)) - return; - class_destroy(accel_class); - accel_class = NULL; + class_unregister(&accel_class); } static int accel_name_info(struct seq_file *m, void *data) @@ -79,29 +74,30 @@ static const struct drm_info_list accel_debugfs_list[] = { #define ACCEL_DEBUGFS_ENTRIES ARRAY_SIZE(accel_debugfs_list) /** - * accel_debugfs_init() - Initialize debugfs for accel minor - * @minor: Pointer to the drm_minor instance. - * @minor_id: The minor's id + * accel_debugfs_init() - Initialize debugfs for device + * @dev: Pointer to the device instance. * - * This function initializes the drm minor's debugfs members and creates - * a root directory for the minor in debugfs. It also creates common files - * for accelerators and calls the driver's debugfs init callback. + * This function creates a root directory for the device in debugfs. */ -void accel_debugfs_init(struct drm_minor *minor, int minor_id) +void accel_debugfs_init(struct drm_device *dev) { - struct drm_device *dev = minor->dev; - char name[64]; + drm_debugfs_dev_init(dev, accel_debugfs_root); +} - INIT_LIST_HEAD(&minor->debugfs_list); - mutex_init(&minor->debugfs_lock); - sprintf(name, "%d", minor_id); - minor->debugfs_root = debugfs_create_dir(name, accel_debugfs_root); +/** + * accel_debugfs_register() - Register debugfs for device + * @dev: Pointer to the device instance. + * + * Creates common files for accelerators. + */ +void accel_debugfs_register(struct drm_device *dev) +{ + struct drm_minor *minor = dev->accel; - drm_debugfs_create_files(accel_debugfs_list, ACCEL_DEBUGFS_ENTRIES, - minor->debugfs_root, minor); + minor->debugfs_root = dev->debugfs_root; - if (dev->driver->debugfs_init) - dev->driver->debugfs_init(minor); + drm_debugfs_create_files(accel_debugfs_list, ACCEL_DEBUGFS_ENTRIES, + dev->debugfs_root, minor); } /** @@ -116,7 +112,7 @@ void accel_debugfs_init(struct drm_minor *minor, int minor_id) void accel_set_device_instance_params(struct device *kdev, int index) { kdev->devt = MKDEV(ACCEL_MAJOR, index); - kdev->class = accel_class; + kdev->class = &accel_class; kdev->type = &accel_sysfs_device_minor; } diff --git a/drivers/accel/habanalabs/common/command_buffer.c b/drivers/accel/habanalabs/common/command_buffer.c index 08f7aee426..0f0d295116 100644 --- a/drivers/accel/habanalabs/common/command_buffer.c +++ b/drivers/accel/habanalabs/common/command_buffer.c @@ -361,10 +361,11 @@ out: return rc; } -int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data) +int hl_cb_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv) { - union hl_cb_args *args = data; + struct hl_fpriv *hpriv = file_priv->driver_priv; struct hl_device *hdev = hpriv->hdev; + union hl_cb_args *args = data; u64 handle = 0, device_va = 0; enum hl_device_status status; u32 usage_cnt = 0; diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c index c23829dab9..3aa6eeef44 100644 --- a/drivers/accel/habanalabs/common/command_submission.c +++ b/drivers/accel/habanalabs/common/command_submission.c @@ -31,6 +31,24 @@ enum hl_cs_wait_status { CS_WAIT_STATUS_GONE }; +/* + * Data used while handling wait/timestamp nodes. + * The purpose of this struct is to store the needed data for both operations + * in one variable instead of passing large number of arguments to functions. + */ +struct wait_interrupt_data { + struct hl_user_interrupt *interrupt; + struct hl_mmap_mem_buf *buf; + struct hl_mem_mgr *mmg; + struct hl_cb *cq_cb; + u64 ts_handle; + u64 ts_offset; + u64 cq_handle; + u64 cq_offset; + u64 target_value; + u64 intr_timeout_us; +}; + static void job_wq_completion(struct work_struct *work); static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, u64 timeout_us, u64 seq, enum hl_cs_wait_status *status, s64 *timestamp); @@ -1079,19 +1097,22 @@ static void wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt) { struct hl_user_pending_interrupt *pend, *temp; + unsigned long flags; - spin_lock(&interrupt->wait_list_lock); - list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, wait_list_node) { - if (pend->ts_reg_info.buf) { - list_del(&pend->wait_list_node); - hl_mmap_mem_buf_put(pend->ts_reg_info.buf); - hl_cb_put(pend->ts_reg_info.cq_cb); - } else { - pend->fence.error = -EIO; - complete_all(&pend->fence.completion); - } + spin_lock_irqsave(&interrupt->wait_list_lock, flags); + list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, list_node) { + pend->fence.error = -EIO; + complete_all(&pend->fence.completion); } - spin_unlock(&interrupt->wait_list_lock); + spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); + + spin_lock_irqsave(&interrupt->ts_list_lock, flags); + list_for_each_entry_safe(pend, temp, &interrupt->ts_list_head, list_node) { + list_del(&pend->list_node); + hl_mmap_mem_buf_put(pend->ts_reg_info.buf); + hl_cb_put(pend->ts_reg_info.cq_cb); + } + spin_unlock_irqrestore(&interrupt->ts_list_lock, flags); } void hl_release_pending_user_interrupts(struct hl_device *hdev) @@ -1730,16 +1751,11 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args, /* Need to wait for restore completion before execution phase */ if (num_chunks) { enum hl_cs_wait_status status; -wait_again: + ret = _hl_cs_wait_ioctl(hdev, ctx, jiffies_to_usecs(hdev->timeout_jiffies), *cs_seq, &status, NULL); if (ret) { - if (ret == -ERESTARTSYS) { - usleep_range(100, 200); - goto wait_again; - } - dev_err(hdev->dev, "Restore CS for context %d failed to complete %d\n", ctx->asid, ret); @@ -2539,8 +2555,9 @@ static int cs_ioctl_flush_pci_hbw_writes(struct hl_fpriv *hpriv) return 0; } -int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) +int hl_cs_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv) { + struct hl_fpriv *hpriv = file_priv->driver_priv; union hl_cs_args *args = data; enum hl_cs_type cs_type = 0; u64 cs_seq = ULONG_MAX; @@ -3197,166 +3214,241 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) return 0; } -static int ts_buff_get_kernel_ts_record(struct hl_mmap_mem_buf *buf, - struct hl_cb *cq_cb, - u64 ts_offset, u64 cq_offset, u64 target_value, - spinlock_t *wait_list_lock, - struct hl_user_pending_interrupt **pend) +static inline void set_record_cq_info(struct hl_user_pending_interrupt *record, + struct hl_cb *cq_cb, u32 cq_offset, u32 target_value) { - struct hl_ts_buff *ts_buff = buf->private; - struct hl_user_pending_interrupt *requested_offset_record = - (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address + - ts_offset; - struct hl_user_pending_interrupt *cb_last = - (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address + + record->ts_reg_info.cq_cb = cq_cb; + record->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_offset; + record->cq_target_value = target_value; +} + +static int validate_and_get_ts_record(struct device *dev, + struct hl_ts_buff *ts_buff, u64 ts_offset, + struct hl_user_pending_interrupt **req_event_record) +{ + struct hl_user_pending_interrupt *ts_cb_last; + + *req_event_record = (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address + + ts_offset; + ts_cb_last = (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address + (ts_buff->kernel_buff_size / sizeof(struct hl_user_pending_interrupt)); - unsigned long iter_counter = 0; - u64 current_cq_counter; - ktime_t timestamp; /* Validate ts_offset not exceeding last max */ - if (requested_offset_record >= cb_last) { - dev_err(buf->mmg->dev, "Ts offset exceeds max CB offset(0x%llx)\n", - (u64)(uintptr_t)cb_last); + if (*req_event_record >= ts_cb_last) { + dev_err(dev, "Ts offset(%llu) exceeds max CB offset(0x%llx)\n", + ts_offset, (u64)(uintptr_t)ts_cb_last); return -EINVAL; } - timestamp = ktime_get(); + return 0; +} -start_over: - spin_lock(wait_list_lock); +static void unregister_timestamp_node(struct hl_device *hdev, + struct hl_user_pending_interrupt *record, bool need_lock) +{ + struct hl_user_interrupt *interrupt = record->ts_reg_info.interrupt; + bool ts_rec_found = false; + unsigned long flags; - /* Unregister only if we didn't reach the target value - * since in this case there will be no handling in irq context - * and then it's safe to delete the node out of the interrupt list - * then re-use it on other interrupt - */ - if (requested_offset_record->ts_reg_info.in_use) { - current_cq_counter = *requested_offset_record->cq_kernel_addr; - if (current_cq_counter < requested_offset_record->cq_target_value) { - list_del(&requested_offset_record->wait_list_node); - spin_unlock(wait_list_lock); + if (need_lock) + spin_lock_irqsave(&interrupt->ts_list_lock, flags); - hl_mmap_mem_buf_put(requested_offset_record->ts_reg_info.buf); - hl_cb_put(requested_offset_record->ts_reg_info.cq_cb); + if (record->ts_reg_info.in_use) { + record->ts_reg_info.in_use = false; + list_del(&record->list_node); + ts_rec_found = true; + } - dev_dbg(buf->mmg->dev, - "ts node removed from interrupt list now can re-use\n"); - } else { - dev_dbg(buf->mmg->dev, - "ts node in middle of irq handling\n"); - - /* irq thread handling in the middle give it time to finish */ - spin_unlock(wait_list_lock); - usleep_range(100, 1000); - if (++iter_counter == MAX_TS_ITER_NUM) { - dev_err(buf->mmg->dev, - "Timestamp offset processing reached timeout of %lld ms\n", - ktime_ms_delta(ktime_get(), timestamp)); - return -EAGAIN; - } + if (need_lock) + spin_unlock_irqrestore(&interrupt->ts_list_lock, flags); - goto start_over; + /* Put refcounts that were taken when we registered the event */ + if (ts_rec_found) { + hl_mmap_mem_buf_put(record->ts_reg_info.buf); + hl_cb_put(record->ts_reg_info.cq_cb); + } +} + +static int ts_get_and_handle_kernel_record(struct hl_device *hdev, struct hl_ctx *ctx, + struct wait_interrupt_data *data, unsigned long *flags, + struct hl_user_pending_interrupt **pend) +{ + struct hl_user_pending_interrupt *req_offset_record; + struct hl_ts_buff *ts_buff = data->buf->private; + bool need_lock = false; + int rc; + + rc = validate_and_get_ts_record(data->buf->mmg->dev, ts_buff, data->ts_offset, + &req_offset_record); + if (rc) + return rc; + + /* In case the node already registered, need to unregister first then re-use */ + if (req_offset_record->ts_reg_info.in_use) { + dev_dbg(data->buf->mmg->dev, + "Requested record %p is in use on irq: %u ts addr: %p, unregister first then put on irq: %u\n", + req_offset_record, + req_offset_record->ts_reg_info.interrupt->interrupt_id, + req_offset_record->ts_reg_info.timestamp_kernel_addr, + data->interrupt->interrupt_id); + /* + * Since interrupt here can be different than the one the node currently registered + * on, and we don't want to lock two lists while we're doing unregister, so + * unlock the new interrupt wait list here and acquire the lock again after you done + */ + if (data->interrupt->interrupt_id != + req_offset_record->ts_reg_info.interrupt->interrupt_id) { + + need_lock = true; + spin_unlock_irqrestore(&data->interrupt->ts_list_lock, *flags); } - } else { - /* Fill up the new registration node info */ - requested_offset_record->ts_reg_info.buf = buf; - requested_offset_record->ts_reg_info.cq_cb = cq_cb; - requested_offset_record->ts_reg_info.timestamp_kernel_addr = - (u64 *) ts_buff->user_buff_address + ts_offset; - requested_offset_record->cq_kernel_addr = - (u64 *) cq_cb->kernel_address + cq_offset; - requested_offset_record->cq_target_value = target_value; - spin_unlock(wait_list_lock); + unregister_timestamp_node(hdev, req_offset_record, need_lock); + + if (need_lock) + spin_lock_irqsave(&data->interrupt->ts_list_lock, *flags); } - *pend = requested_offset_record; + /* Fill up the new registration node info and add it to the list */ + req_offset_record->ts_reg_info.in_use = true; + req_offset_record->ts_reg_info.buf = data->buf; + req_offset_record->ts_reg_info.timestamp_kernel_addr = + (u64 *) ts_buff->user_buff_address + data->ts_offset; + req_offset_record->ts_reg_info.interrupt = data->interrupt; + set_record_cq_info(req_offset_record, data->cq_cb, data->cq_offset, + data->target_value); - dev_dbg(buf->mmg->dev, "Found available node in TS kernel CB %p\n", - requested_offset_record); - return 0; + *pend = req_offset_record; + + return rc; +} + +static int _hl_interrupt_ts_reg_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, + struct wait_interrupt_data *data, + u32 *status, u64 *timestamp) +{ + struct hl_user_pending_interrupt *pend; + unsigned long flags; + int rc = 0; + + hl_ctx_get(ctx); + + data->cq_cb = hl_cb_get(data->mmg, data->cq_handle); + if (!data->cq_cb) { + rc = -EINVAL; + goto put_ctx; + } + + /* Validate the cq offset */ + if (((u64 *) data->cq_cb->kernel_address + data->cq_offset) >= + ((u64 *) data->cq_cb->kernel_address + (data->cq_cb->size / sizeof(u64)))) { + rc = -EINVAL; + goto put_cq_cb; + } + + dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, handle: 0x%llx, ts offset: %llu, cq_offset: %llu\n", + data->interrupt->interrupt_id, data->ts_handle, + data->ts_offset, data->cq_offset); + + data->buf = hl_mmap_mem_buf_get(data->mmg, data->ts_handle); + if (!data->buf) { + rc = -EINVAL; + goto put_cq_cb; + } + + spin_lock_irqsave(&data->interrupt->ts_list_lock, flags); + + /* get ts buffer record */ + rc = ts_get_and_handle_kernel_record(hdev, ctx, data, &flags, &pend); + if (rc) { + spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags); + goto put_ts_buff; + } + + /* We check for completion value as interrupt could have been received + * before we add the timestamp node to the ts list. + */ + if (*pend->cq_kernel_addr >= data->target_value) { + spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags); + + dev_dbg(hdev->dev, "Target value already reached release ts record: pend: %p, offset: %llu, interrupt: %u\n", + pend, data->ts_offset, data->interrupt->interrupt_id); + + pend->ts_reg_info.in_use = 0; + *status = HL_WAIT_CS_STATUS_COMPLETED; + *pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns(); + + goto put_ts_buff; + } + + list_add_tail(&pend->list_node, &data->interrupt->ts_list_head); + spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags); + + rc = *status = HL_WAIT_CS_STATUS_COMPLETED; + + hl_ctx_put(ctx); + + return rc; + +put_ts_buff: + hl_mmap_mem_buf_put(data->buf); +put_cq_cb: + hl_cb_put(data->cq_cb); +put_ctx: + hl_ctx_put(ctx); + + return rc; } static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, - struct hl_mem_mgr *cb_mmg, struct hl_mem_mgr *mmg, - u64 timeout_us, u64 cq_counters_handle, u64 cq_counters_offset, - u64 target_value, struct hl_user_interrupt *interrupt, - bool register_ts_record, u64 ts_handle, u64 ts_offset, + struct wait_interrupt_data *data, u32 *status, u64 *timestamp) { struct hl_user_pending_interrupt *pend; - struct hl_mmap_mem_buf *buf; - struct hl_cb *cq_cb; - unsigned long timeout; + unsigned long timeout, flags; long completion_rc; int rc = 0; - timeout = hl_usecs64_to_jiffies(timeout_us); + timeout = hl_usecs64_to_jiffies(data->intr_timeout_us); hl_ctx_get(ctx); - cq_cb = hl_cb_get(cb_mmg, cq_counters_handle); - if (!cq_cb) { + data->cq_cb = hl_cb_get(data->mmg, data->cq_handle); + if (!data->cq_cb) { rc = -EINVAL; goto put_ctx; } /* Validate the cq offset */ - if (((u64 *) cq_cb->kernel_address + cq_counters_offset) >= - ((u64 *) cq_cb->kernel_address + (cq_cb->size / sizeof(u64)))) { + if (((u64 *) data->cq_cb->kernel_address + data->cq_offset) >= + ((u64 *) data->cq_cb->kernel_address + (data->cq_cb->size / sizeof(u64)))) { rc = -EINVAL; goto put_cq_cb; } - if (register_ts_record) { - dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, ts offset: %llu, cq_offset: %llu\n", - interrupt->interrupt_id, ts_offset, cq_counters_offset); - buf = hl_mmap_mem_buf_get(mmg, ts_handle); - if (!buf) { - rc = -EINVAL; - goto put_cq_cb; - } - - /* get ts buffer record */ - rc = ts_buff_get_kernel_ts_record(buf, cq_cb, ts_offset, - cq_counters_offset, target_value, - &interrupt->wait_list_lock, &pend); - if (rc) - goto put_ts_buff; - } else { - pend = kzalloc(sizeof(*pend), GFP_KERNEL); - if (!pend) { - rc = -ENOMEM; - goto put_cq_cb; - } - hl_fence_init(&pend->fence, ULONG_MAX); - pend->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_counters_offset; - pend->cq_target_value = target_value; + pend = kzalloc(sizeof(*pend), GFP_KERNEL); + if (!pend) { + rc = -ENOMEM; + goto put_cq_cb; } - spin_lock(&interrupt->wait_list_lock); + hl_fence_init(&pend->fence, ULONG_MAX); + pend->cq_kernel_addr = (u64 *) data->cq_cb->kernel_address + data->cq_offset; + pend->cq_target_value = data->target_value; + spin_lock_irqsave(&data->interrupt->wait_list_lock, flags); + /* We check for completion value as interrupt could have been received - * before we added the node to the wait list + * before we add the wait node to the wait list. */ - if (*pend->cq_kernel_addr >= target_value) { - if (register_ts_record) - pend->ts_reg_info.in_use = 0; - spin_unlock(&interrupt->wait_list_lock); + if (*pend->cq_kernel_addr >= data->target_value || (!data->intr_timeout_us)) { + spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags); - *status = HL_WAIT_CS_STATUS_COMPLETED; + if (*pend->cq_kernel_addr >= data->target_value) + *status = HL_WAIT_CS_STATUS_COMPLETED; + else + *status = HL_WAIT_CS_STATUS_BUSY; - if (register_ts_record) { - *pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns(); - goto put_ts_buff; - } else { - pend->fence.timestamp = ktime_get(); - goto set_timestamp; - } - } else if (!timeout_us) { - spin_unlock(&interrupt->wait_list_lock); - *status = HL_WAIT_CS_STATUS_BUSY; pend->fence.timestamp = ktime_get(); goto set_timestamp; } @@ -3366,55 +3458,38 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, * Note that we cannot have sorted list by target value, * in order to shorten the list pass loop, since * same list could have nodes for different cq counter handle. - * Note: - * Mark ts buff offset as in use here in the spinlock protection area - * to avoid getting in the re-use section in ts_buff_get_kernel_ts_record - * before adding the node to the list. this scenario might happen when - * multiple threads are racing on same offset and one thread could - * set the ts buff in ts_buff_get_kernel_ts_record then the other thread - * takes over and get to ts_buff_get_kernel_ts_record and then we will try - * to re-use the same ts buff offset, and will try to delete a non existing - * node from the list. */ - if (register_ts_record) - pend->ts_reg_info.in_use = 1; - - list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head); - spin_unlock(&interrupt->wait_list_lock); - - if (register_ts_record) { - rc = *status = HL_WAIT_CS_STATUS_COMPLETED; - goto ts_registration_exit; - } + list_add_tail(&pend->list_node, &data->interrupt->wait_list_head); + spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags); /* Wait for interrupt handler to signal completion */ completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion, timeout); if (completion_rc > 0) { - *status = HL_WAIT_CS_STATUS_COMPLETED; + if (pend->fence.error == -EIO) { + dev_err_ratelimited(hdev->dev, + "interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n", + pend->fence.error); + rc = -EIO; + *status = HL_WAIT_CS_STATUS_ABORTED; + } else { + *status = HL_WAIT_CS_STATUS_COMPLETED; + } } else { if (completion_rc == -ERESTARTSYS) { dev_err_ratelimited(hdev->dev, "user process got signal while waiting for interrupt ID %d\n", - interrupt->interrupt_id); + data->interrupt->interrupt_id); rc = -EINTR; *status = HL_WAIT_CS_STATUS_ABORTED; } else { - if (pend->fence.error == -EIO) { - dev_err_ratelimited(hdev->dev, - "interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n", - pend->fence.error); - rc = -EIO; - *status = HL_WAIT_CS_STATUS_ABORTED; - } else { - /* The wait has timed-out. We don't know anything beyond that - * because the workload wasn't submitted through the driver. - * Therefore, from driver's perspective, the workload is still - * executing. - */ - rc = 0; - *status = HL_WAIT_CS_STATUS_BUSY; - } + /* The wait has timed-out. We don't know anything beyond that + * because the workload was not submitted through the driver. + * Therefore, from driver's perspective, the workload is still + * executing. + */ + rc = 0; + *status = HL_WAIT_CS_STATUS_BUSY; } } @@ -3424,23 +3499,20 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, * for ts record, the node will be deleted in the irq handler after * we reach the target value. */ - spin_lock(&interrupt->wait_list_lock); - list_del(&pend->wait_list_node); - spin_unlock(&interrupt->wait_list_lock); + spin_lock_irqsave(&data->interrupt->wait_list_lock, flags); + list_del(&pend->list_node); + spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags); set_timestamp: *timestamp = ktime_to_ns(pend->fence.timestamp); kfree(pend); - hl_cb_put(cq_cb); -ts_registration_exit: + hl_cb_put(data->cq_cb); hl_ctx_put(ctx); return rc; -put_ts_buff: - hl_mmap_mem_buf_put(buf); put_cq_cb: - hl_cb_put(cq_cb); + hl_cb_put(data->cq_cb); put_ctx: hl_ctx_put(ctx); @@ -3454,7 +3526,7 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_ u64 *timestamp) { struct hl_user_pending_interrupt *pend; - unsigned long timeout; + unsigned long timeout, flags; u64 completion_value; long completion_rc; int rc = 0; @@ -3474,9 +3546,9 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_ /* Add pending user interrupt to relevant list for the interrupt * handler to monitor */ - spin_lock(&interrupt->wait_list_lock); - list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head); - spin_unlock(&interrupt->wait_list_lock); + spin_lock_irqsave(&interrupt->wait_list_lock, flags); + list_add_tail(&pend->list_node, &interrupt->wait_list_head); + spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); /* We check for completion value as interrupt could have been received * before we added the node to the wait list @@ -3507,14 +3579,14 @@ wait_again: * If comparison fails, keep waiting until timeout expires */ if (completion_rc > 0) { - spin_lock(&interrupt->wait_list_lock); + spin_lock_irqsave(&interrupt->wait_list_lock, flags); /* reinit_completion must be called before we check for user * completion value, otherwise, if interrupt is received after * the comparison and before the next wait_for_completion, * we will reach timeout and fail */ reinit_completion(&pend->fence.completion); - spin_unlock(&interrupt->wait_list_lock); + spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) { dev_err(hdev->dev, "Failed to copy completion value from user\n"); @@ -3551,9 +3623,9 @@ wait_again: } remove_pending_user_interrupt: - spin_lock(&interrupt->wait_list_lock); - list_del(&pend->wait_list_node); - spin_unlock(&interrupt->wait_list_lock); + spin_lock_irqsave(&interrupt->wait_list_lock, flags); + list_del(&pend->list_node); + spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); *timestamp = ktime_to_ns(pend->fence.timestamp); @@ -3611,19 +3683,42 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data) return -EINVAL; } - if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ) - rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->mem_mgr, &hpriv->mem_mgr, - args->in.interrupt_timeout_us, args->in.cq_counters_handle, - args->in.cq_counters_offset, - args->in.target, interrupt, - !!(args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT), - args->in.timestamp_handle, args->in.timestamp_offset, - &status, ×tamp); - else + if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ) { + struct wait_interrupt_data wait_intr_data = {0}; + + wait_intr_data.interrupt = interrupt; + wait_intr_data.mmg = &hpriv->mem_mgr; + wait_intr_data.cq_handle = args->in.cq_counters_handle; + wait_intr_data.cq_offset = args->in.cq_counters_offset; + wait_intr_data.ts_handle = args->in.timestamp_handle; + wait_intr_data.ts_offset = args->in.timestamp_offset; + wait_intr_data.target_value = args->in.target; + wait_intr_data.intr_timeout_us = args->in.interrupt_timeout_us; + + if (args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT) { + /* + * Allow only one registration at a time. this is needed in order to prevent + * issues while handling the flow of re-use of the same offset. + * Since the registration flow is protected only by the interrupt lock, + * re-use flow might request to move ts node to another interrupt list, + * and in such case we're not protected. + */ + mutex_lock(&hpriv->ctx->ts_reg_lock); + + rc = _hl_interrupt_ts_reg_ioctl(hdev, hpriv->ctx, &wait_intr_data, + &status, ×tamp); + + mutex_unlock(&hpriv->ctx->ts_reg_lock); + } else + rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &wait_intr_data, + &status, ×tamp); + } else { rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx, args->in.interrupt_timeout_us, args->in.addr, args->in.target, interrupt, &status, ×tamp); + } + if (rc) return rc; @@ -3638,8 +3733,9 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data) return 0; } -int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data) +int hl_wait_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv) { + struct hl_fpriv *hpriv = file_priv->driver_priv; struct hl_device *hdev = hpriv->hdev; union hl_wait_cs_args *args = data; u32 flags = args->in.flags; diff --git a/drivers/accel/habanalabs/common/context.c b/drivers/accel/habanalabs/common/context.c index 9c8b1b37b5..b83141f583 100644 --- a/drivers/accel/habanalabs/common/context.c +++ b/drivers/accel/habanalabs/common/context.c @@ -102,7 +102,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx) kfree(ctx->cs_pending); if (ctx->asid != HL_KERNEL_ASID_ID) { - dev_dbg(hdev->dev, "closing user context %d\n", ctx->asid); + dev_dbg(hdev->dev, "closing user context, asid=%u\n", ctx->asid); /* The engines are stopped as there is no executing CS, but the * Coresight might be still working by accessing addresses @@ -119,6 +119,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx) hl_vm_ctx_fini(ctx); hl_asid_free(hdev, ctx->asid); hl_encaps_sig_mgr_fini(hdev, &ctx->sig_mgr); + mutex_destroy(&ctx->ts_reg_lock); } else { dev_dbg(hdev->dev, "closing kernel context\n"); hdev->asic_funcs->ctx_fini(ctx); @@ -198,6 +199,7 @@ out_err: int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx) { + char task_comm[TASK_COMM_LEN]; int rc = 0, i; ctx->hdev = hdev; @@ -267,7 +269,10 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx) hl_encaps_sig_mgr_init(&ctx->sig_mgr); - dev_dbg(hdev->dev, "create user context %d\n", ctx->asid); + mutex_init(&ctx->ts_reg_lock); + + dev_dbg(hdev->dev, "create user context, comm=\"%s\", asid=%u\n", + get_task_comm(task_comm, current), ctx->asid); } return 0; diff --git a/drivers/accel/habanalabs/common/debugfs.c b/drivers/accel/habanalabs/common/debugfs.c index 9e84a47a21..01f071d525 100644 --- a/drivers/accel/habanalabs/common/debugfs.c +++ b/drivers/accel/habanalabs/common/debugfs.c @@ -18,8 +18,6 @@ #define MMU_KBUF_SIZE (MMU_ADDR_BUF_SIZE + MMU_ASID_BUF_SIZE) #define I2C_MAX_TRANSACTION_LEN 8 -static struct dentry *hl_debug_root; - static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, u8 i2c_reg, u8 i2c_len, u64 *val) { @@ -1788,20 +1786,14 @@ void hl_debugfs_add_device(struct hl_device *hdev) { struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs; - dev_entry->root = debugfs_create_dir(dev_name(hdev->dev), hl_debug_root); + dev_entry->root = hdev->drm.accel->debugfs_root; add_files_to_device(hdev, dev_entry, dev_entry->root); + if (!hdev->asic_prop.fw_security_enabled) add_secured_nodes(dev_entry, dev_entry->root); } -void hl_debugfs_remove_device(struct hl_device *hdev) -{ - struct hl_dbg_device_entry *entry = &hdev->hl_debugfs; - - debugfs_remove_recursive(entry->root); -} - void hl_debugfs_add_file(struct hl_fpriv *hpriv) { struct hl_dbg_device_entry *dev_entry = &hpriv->hdev->hl_debugfs; @@ -1932,13 +1924,3 @@ void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data, up_write(&dev_entry->state_dump_sem); } - -void __init hl_debugfs_init(void) -{ - hl_debug_root = debugfs_create_dir("habanalabs", NULL); -} - -void hl_debugfs_fini(void) -{ - debugfs_remove_recursive(hl_debug_root); -} diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c index b97339d1f7..9290d43745 100644 --- a/drivers/accel/habanalabs/common/device.c +++ b/drivers/accel/habanalabs/common/device.c @@ -14,11 +14,14 @@ #include <linux/hwmon.h> #include <linux/vmalloc.h> +#include <drm/drm_accel.h> +#include <drm/drm_drv.h> + #include <trace/events/habanalabs.h> #define HL_RESET_DELAY_USEC 10000 /* 10ms */ -#define HL_DEVICE_RELEASE_WATCHDOG_TIMEOUT_SEC 5 +#define HL_DEVICE_RELEASE_WATCHDOG_TIMEOUT_SEC 30 enum dma_alloc_type { DMA_ALLOC_COHERENT, @@ -185,7 +188,36 @@ void hl_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void * hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, size, vaddr); } -int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir) +int hl_dma_map_sgtable_caller(struct hl_device *hdev, struct sg_table *sgt, + enum dma_data_direction dir, const char *caller) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct scatterlist *sg; + int rc, i; + + rc = hdev->asic_funcs->dma_map_sgtable(hdev, sgt, dir); + if (rc) + return rc; + + if (!trace_habanalabs_dma_map_page_enabled()) + return 0; + + for_each_sgtable_dma_sg(sgt, sg, i) + trace_habanalabs_dma_map_page(hdev->dev, + page_to_phys(sg_page(sg)), + sg->dma_address - prop->device_dma_offset_for_host_access, +#ifdef CONFIG_NEED_SG_DMA_LENGTH + sg->dma_length, +#else + sg->length, +#endif + dir, caller); + + return 0; +} + +int hl_asic_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, + enum dma_data_direction dir) { struct asic_fixed_properties *prop = &hdev->asic_prop; struct scatterlist *sg; @@ -203,7 +235,30 @@ int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_da return 0; } -void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir) +void hl_dma_unmap_sgtable_caller(struct hl_device *hdev, struct sg_table *sgt, + enum dma_data_direction dir, const char *caller) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct scatterlist *sg; + int i; + + hdev->asic_funcs->dma_unmap_sgtable(hdev, sgt, dir); + + if (trace_habanalabs_dma_unmap_page_enabled()) { + for_each_sgtable_dma_sg(sgt, sg, i) + trace_habanalabs_dma_unmap_page(hdev->dev, page_to_phys(sg_page(sg)), + sg->dma_address - prop->device_dma_offset_for_host_access, +#ifdef CONFIG_NEED_SG_DMA_LENGTH + sg->dma_length, +#else + sg->length, +#endif + dir, caller); + } +} + +void hl_asic_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt, + enum dma_data_direction dir) { struct asic_fixed_properties *prop = &hdev->asic_prop; struct scatterlist *sg; @@ -315,7 +370,9 @@ enum hl_device_status hl_device_status(struct hl_device *hdev) { enum hl_device_status status; - if (hdev->reset_info.in_reset) { + if (hdev->device_fini_pending) { + status = HL_DEVICE_STATUS_MALFUNCTION; + } else if (hdev->reset_info.in_reset) { if (hdev->reset_info.in_compute_reset) status = HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE; else @@ -343,9 +400,9 @@ bool hl_device_operational(struct hl_device *hdev, *status = current_status; switch (current_status) { + case HL_DEVICE_STATUS_MALFUNCTION: case HL_DEVICE_STATUS_IN_RESET: case HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE: - case HL_DEVICE_STATUS_MALFUNCTION: case HL_DEVICE_STATUS_NEEDS_RESET: return false; case HL_DEVICE_STATUS_OPERATIONAL: @@ -406,8 +463,6 @@ static void hpriv_release(struct kref *ref) hdev->asic_funcs->send_device_activity(hdev, false); - put_pid(hpriv->taskpid); - hl_debugfs_remove_file(hpriv); mutex_destroy(&hpriv->ctx_lock); @@ -424,7 +479,7 @@ static void hpriv_release(struct kref *ref) /* Check the device idle status and reset if not idle. * Skip it if already in reset, or if device is going to be reset in any case. */ - if (!hdev->reset_info.in_reset && !reset_device && hdev->pdev && !hdev->pldm) + if (!hdev->reset_info.in_reset && !reset_device && !hdev->pldm) device_is_idle = hdev->asic_funcs->is_device_idle(hdev, idle_mask, HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL); if (!device_is_idle) { @@ -446,14 +501,18 @@ static void hpriv_release(struct kref *ref) list_del(&hpriv->dev_node); mutex_unlock(&hdev->fpriv_list_lock); + put_pid(hpriv->taskpid); + if (reset_device) { hl_device_reset(hdev, HL_DRV_RESET_DEV_RELEASE); } else { /* Scrubbing is handled within hl_device_reset(), so here need to do it directly */ int rc = hdev->asic_funcs->scrub_device_mem(hdev); - if (rc) + if (rc) { dev_err(hdev->dev, "failed to scrub memory from hpriv release (%d)\n", rc); + hl_device_reset(hdev, HL_DRV_RESET_HARD); + } } /* Now we can mark the compute_ctx as not active. Even if a reset is running in a different @@ -516,24 +575,20 @@ static void print_device_in_use_info(struct hl_device *hdev, const char *message } /* - * hl_device_release - release function for habanalabs device - * - * @inode: pointer to inode structure - * @filp: pointer to file structure + * hl_device_release() - release function for habanalabs device. + * @ddev: pointer to DRM device structure. + * @file: pointer to DRM file private data structure. * * Called when process closes an habanalabs device */ -static int hl_device_release(struct inode *inode, struct file *filp) +void hl_device_release(struct drm_device *ddev, struct drm_file *file_priv) { - struct hl_fpriv *hpriv = filp->private_data; - struct hl_device *hdev = hpriv->hdev; - - filp->private_data = NULL; + struct hl_fpriv *hpriv = file_priv->driver_priv; + struct hl_device *hdev = to_hl_device(ddev); if (!hdev) { pr_crit("Closing FD after device was removed. Memory leak will occur and it is advised to reboot.\n"); put_pid(hpriv->taskpid); - return 0; } hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr); @@ -551,8 +606,6 @@ static int hl_device_release(struct inode *inode, struct file *filp) } hdev->last_open_session_duration_jif = jiffies - hdev->last_successful_open_jif; - - return 0; } static int hl_device_release_ctrl(struct inode *inode, struct file *filp) @@ -571,11 +624,6 @@ static int hl_device_release_ctrl(struct inode *inode, struct file *filp) list_del(&hpriv->dev_node); mutex_unlock(&hdev->fpriv_ctrl_list_lock); out: - /* release the eventfd */ - if (hpriv->notifier_event.eventfd) - eventfd_ctx_put(hpriv->notifier_event.eventfd); - - mutex_destroy(&hpriv->notifier_event.lock); put_pid(hpriv->taskpid); kfree(hpriv); @@ -583,18 +631,8 @@ out: return 0; } -/* - * hl_mmap - mmap function for habanalabs device - * - * @*filp: pointer to file structure - * @*vma: pointer to vm_area_struct of the process - * - * Called when process does an mmap on habanalabs device. Call the relevant mmap - * function at the end of the common code. - */ -static int hl_mmap(struct file *filp, struct vm_area_struct *vma) +static int __hl_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma) { - struct hl_fpriv *hpriv = filp->private_data; struct hl_device *hdev = hpriv->hdev; unsigned long vm_pgoff; @@ -617,14 +655,22 @@ static int hl_mmap(struct file *filp, struct vm_area_struct *vma) return -EINVAL; } -static const struct file_operations hl_ops = { - .owner = THIS_MODULE, - .open = hl_device_open, - .release = hl_device_release, - .mmap = hl_mmap, - .unlocked_ioctl = hl_ioctl, - .compat_ioctl = hl_ioctl -}; +/* + * hl_mmap - mmap function for habanalabs device + * + * @*filp: pointer to file structure + * @*vma: pointer to vm_area_struct of the process + * + * Called when process does an mmap on habanalabs device. Call the relevant mmap + * function at the end of the common code. + */ +int hl_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct drm_file *file_priv = filp->private_data; + struct hl_fpriv *hpriv = file_priv->driver_priv; + + return __hl_mmap(hpriv, vma); +} static const struct file_operations hl_ctrl_ops = { .owner = THIS_MODULE, @@ -645,14 +691,14 @@ static void device_release_func(struct device *dev) * @hdev: pointer to habanalabs device structure * @class: pointer to the class object of the device * @minor: minor number of the specific device - * @fpos: file operations to install for this device + * @fops: file operations to install for this device * @name: name of the device as it will appear in the filesystem * @cdev: pointer to the char device object that will be initialized * @dev: pointer to the device object that will be initialized * * Initialize a cdev and a Linux device for habanalabs's device. */ -static int device_init_cdev(struct hl_device *hdev, struct class *class, +static int device_init_cdev(struct hl_device *hdev, const struct class *class, int minor, const struct file_operations *fops, char *name, struct cdev *cdev, struct device **dev) @@ -676,23 +722,26 @@ static int device_init_cdev(struct hl_device *hdev, struct class *class, static int cdev_sysfs_debugfs_add(struct hl_device *hdev) { + const struct class *accel_class = hdev->drm.accel->kdev->class; + char name[32]; int rc; - rc = cdev_device_add(&hdev->cdev, hdev->dev); - if (rc) { - dev_err(hdev->dev, - "failed to add a char device to the system\n"); + hdev->cdev_idx = hdev->drm.accel->index; + + /* Initialize cdev and device structures for the control device */ + snprintf(name, sizeof(name), "accel_controlD%d", hdev->cdev_idx); + rc = device_init_cdev(hdev, accel_class, hdev->cdev_idx, &hl_ctrl_ops, name, + &hdev->cdev_ctrl, &hdev->dev_ctrl); + if (rc) return rc; - } rc = cdev_device_add(&hdev->cdev_ctrl, hdev->dev_ctrl); if (rc) { - dev_err(hdev->dev, - "failed to add a control char device to the system\n"); - goto delete_cdev_device; + dev_err(hdev->dev_ctrl, + "failed to add an accel control char device to the system\n"); + goto free_ctrl_device; } - /* hl_sysfs_init() must be done after adding the device to the system */ rc = hl_sysfs_init(hdev); if (rc) { dev_err(hdev->dev, "failed to initialize sysfs\n"); @@ -707,23 +756,19 @@ static int cdev_sysfs_debugfs_add(struct hl_device *hdev) delete_ctrl_cdev_device: cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl); -delete_cdev_device: - cdev_device_del(&hdev->cdev, hdev->dev); +free_ctrl_device: + put_device(hdev->dev_ctrl); return rc; } static void cdev_sysfs_debugfs_remove(struct hl_device *hdev) { if (!hdev->cdev_sysfs_debugfs_created) - goto put_devices; + return; - hl_debugfs_remove_device(hdev); hl_sysfs_fini(hdev); - cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl); - cdev_device_del(&hdev->cdev, hdev->dev); -put_devices: - put_device(hdev->dev); + cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl); put_device(hdev->dev_ctrl); } @@ -808,6 +853,9 @@ static int device_early_init(struct hl_device *hdev) gaudi2_set_asic_funcs(hdev); strscpy(hdev->asic_name, "GAUDI2B", sizeof(hdev->asic_name)); break; + case ASIC_GAUDI2C: + gaudi2_set_asic_funcs(hdev); + strscpy(hdev->asic_name, "GAUDI2C", sizeof(hdev->asic_name)); break; default: dev_err(hdev->dev, "Unrecognized ASIC type %d\n", @@ -996,6 +1044,21 @@ static bool is_pci_link_healthy(struct hl_device *hdev) return (vendor_id == PCI_VENDOR_ID_HABANALABS); } +static int hl_device_eq_heartbeat_check(struct hl_device *hdev) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + + if (!prop->cpucp_info.eq_health_check_supported) + return 0; + + if (hdev->eq_heartbeat_received) + hdev->eq_heartbeat_received = false; + else + return -EIO; + + return 0; +} + static void hl_device_heartbeat(struct work_struct *work) { struct hl_device *hdev = container_of(work, struct hl_device, @@ -1003,10 +1066,16 @@ static void hl_device_heartbeat(struct work_struct *work) struct hl_info_fw_err_info info = {0}; u64 event_mask = HL_NOTIFIER_EVENT_DEVICE_RESET | HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE; - if (!hl_device_operational(hdev, NULL)) + /* Start heartbeat checks only after driver has enabled events from FW */ + if (!hl_device_operational(hdev, NULL) || !hdev->init_done) goto reschedule; - if (!hdev->asic_funcs->send_heartbeat(hdev)) + /* + * For EQ health check need to check if driver received the heartbeat eq event + * in order to validate the eq is working. + * Only if both the EQ is healthy and we managed to send the next heartbeat reschedule. + */ + if ((!hl_device_eq_heartbeat_check(hdev)) && (!hdev->asic_funcs->send_heartbeat(hdev))) goto reschedule; if (hl_device_operational(hdev, NULL)) @@ -1062,7 +1131,15 @@ static int device_late_init(struct hl_device *hdev) hdev->high_pll = hdev->asic_prop.high_pll; if (hdev->heartbeat) { + /* + * Before scheduling the heartbeat driver will check if eq event has received. + * for the first schedule we need to set the indication as true then for the next + * one this indication will be true only if eq event was sent by FW. + */ + hdev->eq_heartbeat_received = true; + INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat); + schedule_delayed_work(&hdev->work_heartbeat, usecs_to_jiffies(HL_HEARTBEAT_PER_USEC)); } @@ -1302,18 +1379,18 @@ disable_device: static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool control_dev) { struct task_struct *task = NULL; - struct list_head *fd_list; - struct hl_fpriv *hpriv; - struct mutex *fd_lock; + struct list_head *hpriv_list; + struct hl_fpriv *hpriv; + struct mutex *hpriv_lock; u32 pending_cnt; - fd_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock; - fd_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list; + hpriv_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock; + hpriv_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list; /* Giving time for user to close FD, and for processes that are inside * hl_device_open to finish */ - if (!list_empty(fd_list)) + if (!list_empty(hpriv_list)) ssleep(1); if (timeout) { @@ -1329,12 +1406,12 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool } } - mutex_lock(fd_lock); + mutex_lock(hpriv_lock); /* This section must be protected because we are dereferencing * pointers that are freed if the process exits */ - list_for_each_entry(hpriv, fd_list, dev_node) { + list_for_each_entry(hpriv, hpriv_list, dev_node) { task = get_pid_task(hpriv->taskpid, PIDTYPE_PID); if (task) { dev_info(hdev->dev, "Killing user process pid=%d\n", @@ -1344,17 +1421,13 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool put_task_struct(task); } else { - /* - * If we got here, it means that process was killed from outside the driver - * right after it started looping on fd_list and before get_pid_task, thus - * we don't need to kill it. - */ dev_dbg(hdev->dev, - "Can't get task struct for user process, assuming process was killed from outside the driver\n"); + "Can't get task struct for user process %d, process was killed from outside the driver\n", + pid_nr(hpriv->taskpid)); } } - mutex_unlock(fd_lock); + mutex_unlock(hpriv_lock); /* * We killed the open users, but that doesn't mean they are closed. @@ -1366,7 +1439,7 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool */ wait_for_processes: - while ((!list_empty(fd_list)) && (pending_cnt)) { + while ((!list_empty(hpriv_list)) && (pending_cnt)) { dev_dbg(hdev->dev, "Waiting for all unmap operations to finish before hard reset\n"); @@ -1376,7 +1449,7 @@ wait_for_processes: } /* All processes exited successfully */ - if (list_empty(fd_list)) + if (list_empty(hpriv_list)) return 0; /* Give up waiting for processes to exit */ @@ -1390,17 +1463,17 @@ wait_for_processes: static void device_disable_open_processes(struct hl_device *hdev, bool control_dev) { - struct list_head *fd_list; + struct list_head *hpriv_list; struct hl_fpriv *hpriv; - struct mutex *fd_lock; + struct mutex *hpriv_lock; - fd_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock; - fd_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list; + hpriv_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock; + hpriv_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list; - mutex_lock(fd_lock); - list_for_each_entry(hpriv, fd_list, dev_node) + mutex_lock(hpriv_lock); + list_for_each_entry(hpriv, hpriv_list, dev_node) hpriv->hdev = NULL; - mutex_unlock(fd_lock); + mutex_unlock(hpriv_lock); } static void send_disable_pci_access(struct hl_device *hdev, u32 flags) @@ -1916,7 +1989,16 @@ int hl_device_cond_reset(struct hl_device *hdev, u32 flags, u64 event_mask) } ctx = hl_get_compute_ctx(hdev); - if (!ctx || !ctx->hpriv->notifier_event.eventfd) + if (!ctx) + goto device_reset; + + /* + * There is no point in postponing the reset if user is not registered for events. + * However if no eventfd_ctx exists but the device release watchdog is already scheduled, it + * just implies that user has unregistered as part of handling a previous event. In this + * case an immediate reset is not required. + */ + if (!ctx->hpriv->notifier_event.eventfd && !hdev->reset_info.watchdog_active) goto device_reset; /* Schedule the device release watchdog work unless reset is already in progress or if the @@ -1928,8 +2010,10 @@ int hl_device_cond_reset(struct hl_device *hdev, u32 flags, u64 event_mask) goto device_reset; } - if (hdev->reset_info.watchdog_active) + if (hdev->reset_info.watchdog_active) { + hdev->device_release_watchdog_work.flags |= flags; goto out; + } hdev->device_release_watchdog_work.flags = flags; dev_dbg(hdev->dev, "Device is going to be hard-reset in %u sec unless being released\n", @@ -1990,59 +2074,6 @@ void hl_notifier_event_send_all(struct hl_device *hdev, u64 event_mask) hl_notifier_event_send(&hpriv->notifier_event, event_mask); mutex_unlock(&hdev->fpriv_list_lock); - - /* control device */ - mutex_lock(&hdev->fpriv_ctrl_list_lock); - - list_for_each_entry(hpriv, &hdev->fpriv_ctrl_list, dev_node) - hl_notifier_event_send(&hpriv->notifier_event, event_mask); - - mutex_unlock(&hdev->fpriv_ctrl_list_lock); -} - -static int create_cdev(struct hl_device *hdev) -{ - char *name; - int rc; - - hdev->cdev_idx = hdev->id / 2; - - name = kasprintf(GFP_KERNEL, "hl%d", hdev->cdev_idx); - if (!name) { - rc = -ENOMEM; - goto out_err; - } - - /* Initialize cdev and device structures */ - rc = device_init_cdev(hdev, hdev->hclass, hdev->id, &hl_ops, name, - &hdev->cdev, &hdev->dev); - - kfree(name); - - if (rc) - goto out_err; - - name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->cdev_idx); - if (!name) { - rc = -ENOMEM; - goto free_dev; - } - - /* Initialize cdev and device structures for control device */ - rc = device_init_cdev(hdev, hdev->hclass, hdev->id_control, &hl_ctrl_ops, - name, &hdev->cdev_ctrl, &hdev->dev_ctrl); - - kfree(name); - - if (rc) - goto free_dev; - - return 0; - -free_dev: - put_device(hdev->dev); -out_err: - return rc; } /* @@ -2057,16 +2088,14 @@ out_err: int hl_device_init(struct hl_device *hdev) { int i, rc, cq_cnt, user_interrupt_cnt, cq_ready_cnt; + struct hl_ts_free_jobs *free_jobs_data; bool expose_interfaces_on_err = false; - - rc = create_cdev(hdev); - if (rc) - goto out_disabled; + void *p; /* Initialize ASIC function pointers and perform early init */ rc = device_early_init(hdev); if (rc) - goto free_dev; + goto out_disabled; user_interrupt_cnt = hdev->asic_prop.user_dec_intr_count + hdev->asic_prop.user_interrupt_count; @@ -2078,15 +2107,43 @@ int hl_device_init(struct hl_device *hdev) rc = -ENOMEM; goto early_fini; } + + /* Timestamp records supported only if CQ supported in device */ + if (hdev->asic_prop.first_available_cq[0] != USHRT_MAX) { + for (i = 0 ; i < user_interrupt_cnt ; i++) { + p = vzalloc(TIMESTAMP_FREE_NODES_NUM * + sizeof(struct timestamp_reg_free_node)); + if (!p) { + rc = -ENOMEM; + goto free_usr_intr_mem; + } + free_jobs_data = &hdev->user_interrupt[i].ts_free_jobs_data; + free_jobs_data->free_nodes_pool = p; + free_jobs_data->free_nodes_length = TIMESTAMP_FREE_NODES_NUM; + free_jobs_data->next_avail_free_node_idx = 0; + } + } + } + + free_jobs_data = &hdev->common_user_cq_interrupt.ts_free_jobs_data; + p = vzalloc(TIMESTAMP_FREE_NODES_NUM * + sizeof(struct timestamp_reg_free_node)); + if (!p) { + rc = -ENOMEM; + goto free_usr_intr_mem; } + free_jobs_data->free_nodes_pool = p; + free_jobs_data->free_nodes_length = TIMESTAMP_FREE_NODES_NUM; + free_jobs_data->next_avail_free_node_idx = 0; + /* * Start calling ASIC initialization. First S/W then H/W and finally * late init */ rc = hdev->asic_funcs->sw_init(hdev); if (rc) - goto free_usr_intr_mem; + goto free_common_usr_intr_mem; /* initialize completion structure for multi CS wait */ @@ -2253,6 +2310,14 @@ int hl_device_init(struct hl_device *hdev) * From here there is no need to expose them in case of an error. */ expose_interfaces_on_err = false; + + rc = drm_dev_register(&hdev->drm, 0); + if (rc) { + dev_err(hdev->dev, "Failed to register DRM device, rc %d\n", rc); + rc = 0; + goto out_disabled; + } + rc = cdev_sysfs_debugfs_add(hdev); if (rc) { dev_err(hdev->dev, "Failed to add char devices and sysfs/debugfs files\n"); @@ -2284,8 +2349,6 @@ int hl_device_init(struct hl_device *hdev) "Successfully added device %s to habanalabs driver\n", dev_name(&(hdev)->pdev->dev)); - hdev->init_done = true; - /* After initialization is done, we are ready to receive events from * the F/W. We can't do it before because we will ignore events and if * those events are fatal, we won't know about it and the device will @@ -2293,6 +2356,8 @@ int hl_device_init(struct hl_device *hdev) */ hdev->asic_funcs->enable_events_from_fw(hdev); + hdev->init_done = true; + return 0; cb_pool_fini: @@ -2317,19 +2382,27 @@ hw_queues_destroy: hl_hw_queues_destroy(hdev); sw_fini: hdev->asic_funcs->sw_fini(hdev); +free_common_usr_intr_mem: + vfree(hdev->common_user_cq_interrupt.ts_free_jobs_data.free_nodes_pool); free_usr_intr_mem: - kfree(hdev->user_interrupt); + if (user_interrupt_cnt) { + for (i = 0 ; i < user_interrupt_cnt ; i++) { + if (!hdev->user_interrupt[i].ts_free_jobs_data.free_nodes_pool) + break; + vfree(hdev->user_interrupt[i].ts_free_jobs_data.free_nodes_pool); + } + kfree(hdev->user_interrupt); + } early_fini: device_early_fini(hdev); -free_dev: - put_device(hdev->dev_ctrl); - put_device(hdev->dev); out_disabled: hdev->disabled = true; - if (expose_interfaces_on_err) + if (expose_interfaces_on_err) { + drm_dev_register(&hdev->drm, 0); cdev_sysfs_debugfs_add(hdev); - dev_err(&hdev->pdev->dev, - "Failed to initialize hl%d. Device %s is NOT usable !\n", + } + + pr_err("Failed to initialize accel%d. Device %s is NOT usable!\n", hdev->cdev_idx, dev_name(&hdev->pdev->dev)); return rc; @@ -2344,12 +2417,13 @@ out_disabled: */ void hl_device_fini(struct hl_device *hdev) { + u32 user_interrupt_cnt; bool device_in_reset; ktime_t timeout; u64 reset_sec; int i, rc; - dev_info(hdev->dev, "Removing device\n"); + dev_info(hdev->dev, "Removing device %s\n", dev_name(&(hdev)->pdev->dev)); hdev->device_fini_pending = 1; flush_delayed_work(&hdev->device_reset_work.reset_work); @@ -2425,14 +2499,14 @@ void hl_device_fini(struct hl_device *hdev) hdev->process_kill_trial_cnt = 0; rc = device_kill_open_processes(hdev, HL_WAIT_PROCESS_KILL_ON_DEVICE_FINI, false); if (rc) { - dev_crit(hdev->dev, "Failed to kill all open processes\n"); + dev_crit(hdev->dev, "Failed to kill all open processes (%d)\n", rc); device_disable_open_processes(hdev, false); } hdev->process_kill_trial_cnt = 0; rc = device_kill_open_processes(hdev, 0, true); if (rc) { - dev_crit(hdev->dev, "Failed to kill all control device open processes\n"); + dev_crit(hdev->dev, "Failed to kill all control device open processes (%d)\n", rc); device_disable_open_processes(hdev, true); } @@ -2464,7 +2538,20 @@ void hl_device_fini(struct hl_device *hdev) for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) hl_cq_fini(hdev, &hdev->completion_queue[i]); kfree(hdev->completion_queue); - kfree(hdev->user_interrupt); + + user_interrupt_cnt = hdev->asic_prop.user_dec_intr_count + + hdev->asic_prop.user_interrupt_count; + + if (user_interrupt_cnt) { + if (hdev->asic_prop.first_available_cq[0] != USHRT_MAX) { + for (i = 0 ; i < user_interrupt_cnt ; i++) + vfree(hdev->user_interrupt[i].ts_free_jobs_data.free_nodes_pool); + } + + kfree(hdev->user_interrupt); + } + + vfree(hdev->common_user_cq_interrupt.ts_free_jobs_data.free_nodes_pool); hl_hw_queues_destroy(hdev); @@ -2475,6 +2562,7 @@ void hl_device_fini(struct hl_device *hdev) /* Hide devices and sysfs/debugfs files from user */ cdev_sysfs_debugfs_remove(hdev); + drm_dev_unregister(&hdev->drm); hl_debugfs_device_fini(hdev); @@ -2690,6 +2778,20 @@ void hl_handle_fw_err(struct hl_device *hdev, struct hl_info_fw_err_info *info) *info->event_mask |= HL_NOTIFIER_EVENT_CRITICL_FW_ERR; } +void hl_capture_engine_err(struct hl_device *hdev, u16 engine_id, u16 error_count) +{ + struct engine_err_info *info = &hdev->captured_err_info.engine_err; + + /* Capture only the first engine error */ + if (atomic_cmpxchg(&info->event_detected, 0, 1)) + return; + + info->event.timestamp = ktime_to_ns(ktime_get()); + info->event.engine_id = engine_id; + info->event.error_count = error_count; + info->event_info_available = true; +} + void hl_enable_err_info_capture(struct hl_error_info *captured_err_info) { vfree(captured_err_info->page_fault_info.user_mappings); diff --git a/drivers/accel/habanalabs/common/firmware_if.c b/drivers/accel/habanalabs/common/firmware_if.c index acbc1a6b5c..47e8384134 100644 --- a/drivers/accel/habanalabs/common/firmware_if.c +++ b/drivers/accel/habanalabs/common/firmware_if.c @@ -6,7 +6,7 @@ */ #include "habanalabs.h" -#include "../include/common/hl_boot_if.h" +#include <linux/habanalabs/hl_boot_if.h> #include <linux/firmware.h> #include <linux/crc32.h> @@ -724,6 +724,11 @@ static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val, err_exists = true; } + if (err_val & CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL) { + dev_err(hdev->dev, "Device boot error - Failed to set threshold for temperature sensor\n"); + err_exists = true; + } + if (err_val & CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL) { /* Ignore this bit, don't prevent driver loading */ dev_dbg(hdev->dev, "device unusable status is set\n"); @@ -1459,6 +1464,10 @@ static void detect_cpu_boot_status(struct hl_device *hdev, u32 status) dev_err(hdev->dev, "Device boot progress - Stuck in preboot after security initialization\n"); break; + case CPU_BOOT_STATUS_FW_SHUTDOWN_PREP: + dev_err(hdev->dev, + "Device boot progress - Stuck in preparation for shutdown\n"); + break; default: dev_err(hdev->dev, "Device boot progress - Invalid or unexpected status code %d\n", status); @@ -1469,8 +1478,9 @@ static void detect_cpu_boot_status(struct hl_device *hdev, u32 status) int hl_fw_wait_preboot_ready(struct hl_device *hdev) { struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load; - u32 status; - int rc; + u32 status = 0, timeout; + int rc, tries = 1; + bool preboot_still_runs; /* Need to check two possible scenarios: * @@ -1480,6 +1490,8 @@ int hl_fw_wait_preboot_ready(struct hl_device *hdev) * All other status values - for older firmwares where the uboot was * loaded from the FLASH */ + timeout = pre_fw_load->wait_for_preboot_timeout; +retry: rc = hl_poll_timeout( hdev, pre_fw_load->cpu_boot_status_reg, @@ -1488,7 +1500,24 @@ int hl_fw_wait_preboot_ready(struct hl_device *hdev) (status == CPU_BOOT_STATUS_READY_TO_BOOT) || (status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT), hdev->fw_poll_interval_usec, - pre_fw_load->wait_for_preboot_timeout); + timeout); + /* + * if F/W reports "security-ready" it means preboot might take longer. + * If the field 'wait_for_preboot_extended_timeout' is non 0 we wait again + * with that timeout + */ + preboot_still_runs = (status == CPU_BOOT_STATUS_SECURITY_READY || + status == CPU_BOOT_STATUS_IN_PREBOOT || + status == CPU_BOOT_STATUS_FW_SHUTDOWN_PREP || + status == CPU_BOOT_STATUS_DRAM_RDY); + + if (rc && tries && preboot_still_runs) { + tries--; + if (pre_fw_load->wait_for_preboot_extended_timeout) { + timeout = pre_fw_load->wait_for_preboot_extended_timeout; + goto retry; + } + } if (rc) { detect_cpu_boot_status(hdev, status); @@ -2743,7 +2772,8 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev, if (!(hdev->fw_components & FW_TYPE_BOOT_CPU)) { struct lkd_fw_binning_info *binning_info; - rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader, 0); + rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader, + sizeof(struct lkd_msg_comms)); if (rc) goto protocol_err; @@ -2777,6 +2807,11 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev, hdev->decoder_binning, hdev->rotator_binning); } + if (hdev->asic_prop.support_dynamic_resereved_fw_size) { + hdev->asic_prop.reserved_fw_mem_size = + le32_to_cpu(fw_loader->dynamic_loader.comm_desc.rsvd_mem_size_mb); + } + return 0; } diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h index 2f027d5a82..d0fd77bb6a 100644 --- a/drivers/accel/habanalabs/common/habanalabs.h +++ b/drivers/accel/habanalabs/common/habanalabs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 * - * Copyright 2016-2022 HabanaLabs, Ltd. + * Copyright 2016-2023 HabanaLabs, Ltd. * All Rights Reserved. * */ @@ -8,7 +8,7 @@ #ifndef HABANALABSP_H_ #define HABANALABSP_H_ -#include "../include/common/cpucp_if.h" +#include <linux/habanalabs/cpucp_if.h> #include "../include/common/qman_if.h" #include "../include/hw_ip/mmu/mmu_general.h" #include <uapi/drm/habanalabs_accel.h> @@ -29,6 +29,9 @@ #include <linux/coresight.h> #include <linux/dma-buf.h> +#include <drm/drm_device.h> +#include <drm/drm_file.h> + #include "security.h" #define HL_NAME "habanalabs" @@ -82,8 +85,6 @@ struct hl_fpriv; #define HL_PCI_ELBI_TIMEOUT_MSEC 10 /* 10ms */ -#define HL_SIM_MAX_TIMEOUT_US 100000000 /* 100s */ - #define HL_INVALID_QUEUE UINT_MAX #define HL_COMMON_USER_CQ_INTERRUPT_ID 0xFFF @@ -103,6 +104,8 @@ struct hl_fpriv; /* MMU */ #define MMU_HASH_TABLE_BITS 7 /* 1 << 7 buckets */ +#define TIMESTAMP_FREE_NODES_NUM 512 + /** * enum hl_mmu_page_table_location - mmu page table location * @MMU_DR_PGT: page-table is located on device DRAM. @@ -154,6 +157,11 @@ enum hl_mmu_page_table_location { #define hl_asic_dma_pool_free(hdev, vaddr, dma_addr) \ hl_asic_dma_pool_free_caller(hdev, vaddr, dma_addr, __func__) +#define hl_dma_map_sgtable(hdev, sgt, dir) \ + hl_dma_map_sgtable_caller(hdev, sgt, dir, __func__) +#define hl_dma_unmap_sgtable(hdev, sgt, dir) \ + hl_dma_unmap_sgtable_caller(hdev, sgt, dir, __func__) + /* * Reset Flags * @@ -545,8 +553,7 @@ struct hl_hints_range { * allocated with huge pages. * @hints_dram_reserved_va_range: dram hint addresses reserved range. * @hints_host_reserved_va_range: host hint addresses reserved range. - * @hints_host_hpage_reserved_va_range: host huge page hint addresses reserved - * range. + * @hints_host_hpage_reserved_va_range: host huge page hint addresses reserved range. * @sram_base_address: SRAM physical start address. * @sram_end_address: SRAM physical end address. * @sram_user_base_address - SRAM physical start address for user access. @@ -585,7 +592,7 @@ struct hl_hints_range { * @mmu_pte_size: PTE size in MMU page tables. * @mmu_hop_table_size: MMU hop table size. * @mmu_hop0_tables_total_size: total size of MMU hop0 tables. - * @dram_page_size: page size for MMU DRAM allocation. + * @dram_page_size: The DRAM physical page size. * @cfg_size: configuration space size on SRAM. * @sram_size: total size of SRAM. * @max_asid: maximum number of open contexts (ASIDs). @@ -641,6 +648,7 @@ struct hl_hints_range { * @glbl_err_cause_num: global err cause number. * @hbw_flush_reg: register to read to generate HBW flush. value of 0 means HBW flush is * not supported. + * @reserved_fw_mem_size: size in MB of dram memory reserved for FW. * @collective_first_sob: first sync object available for collective use * @collective_first_mon: first monitor available for collective use * @sync_stream_first_sob: first sync object available for sync stream use @@ -686,9 +694,10 @@ struct hl_hints_range { * @configurable_stop_on_err: is stop-on-error option configurable via debugfs. * @set_max_power_on_device_init: true if need to set max power in F/W on device init. * @supports_user_set_page_size: true if user can set the allocation page size. - * @dma_mask: the dma mask to be set for this device + * @dma_mask: the dma mask to be set for this device. * @supports_advanced_cpucp_rc: true if new cpucp opcodes are supported. * @supports_engine_modes: true if changing engines/engine_cores modes is supported. + * @support_dynamic_resereved_fw_size: true if we support dynamic reserved size for fw. */ struct asic_fixed_properties { struct hw_queue_properties *hw_queues_props; @@ -772,6 +781,7 @@ struct asic_fixed_properties { u32 num_of_special_blocks; u32 glbl_err_cause_num; u32 hbw_flush_reg; + u32 reserved_fw_mem_size; u16 collective_first_sob; u16 collective_first_mon; u16 sync_stream_first_sob; @@ -808,6 +818,7 @@ struct asic_fixed_properties { u8 dma_mask; u8 supports_advanced_cpucp_rc; u8 supports_engine_modes; + u8 support_dynamic_resereved_fw_size; }; /** @@ -1098,19 +1109,41 @@ enum hl_user_interrupt_type { }; /** + * struct hl_ts_free_jobs - holds user interrupt ts free nodes related data + * @free_nodes_pool: pool of nodes to be used for free timestamp jobs + * @free_nodes_length: number of nodes in free_nodes_pool + * @next_avail_free_node_idx: index of the next free node in the pool + * + * the free nodes pool must be protected by the user interrupt lock + * to avoid race between different interrupts which are using the same + * ts buffer with different offsets. + */ +struct hl_ts_free_jobs { + struct timestamp_reg_free_node *free_nodes_pool; + u32 free_nodes_length; + u32 next_avail_free_node_idx; +}; + +/** * struct hl_user_interrupt - holds user interrupt information * @hdev: pointer to the device structure + * @ts_free_jobs_data: timestamp free jobs related data * @type: user interrupt type * @wait_list_head: head to the list of user threads pending on this interrupt + * @ts_list_head: head to the list of timestamp records * @wait_list_lock: protects wait_list_head + * @ts_list_lock: protects ts_list_head * @timestamp: last timestamp taken upon interrupt * @interrupt_id: msix interrupt id */ struct hl_user_interrupt { struct hl_device *hdev; + struct hl_ts_free_jobs ts_free_jobs_data; enum hl_user_interrupt_type type; struct list_head wait_list_head; + struct list_head ts_list_head; spinlock_t wait_list_lock; + spinlock_t ts_list_lock; ktime_t timestamp; u32 interrupt_id; }; @@ -1120,11 +1153,15 @@ struct hl_user_interrupt { * @free_objects_node: node in the list free_obj_jobs * @cq_cb: pointer to cq command buffer to be freed * @buf: pointer to timestamp buffer to be freed + * @in_use: indicates whether the node still in use in workqueue thread. + * @dynamic_alloc: indicates whether the node was allocated dynamically in the interrupt handler */ struct timestamp_reg_free_node { struct list_head free_objects_node; struct hl_cb *cq_cb; struct hl_mmap_mem_buf *buf; + atomic_t in_use; + u8 dynamic_alloc; }; /* struct timestamp_reg_work_obj - holds the timestamp registration free objects job @@ -1133,17 +1170,21 @@ struct timestamp_reg_free_node { * @free_obj: workqueue object to free timestamp registration node objects * @hdev: pointer to the device structure * @free_obj_head: list of free jobs nodes (node type timestamp_reg_free_node) + * @dynamic_alloc_free_obj_head: list of free jobs nodes which were dynamically allocated in the + * interrupt handler. */ struct timestamp_reg_work_obj { struct work_struct free_obj; struct hl_device *hdev; struct list_head *free_obj_head; + struct list_head *dynamic_alloc_free_obj_head; }; /* struct timestamp_reg_info - holds the timestamp registration related data. * @buf: pointer to the timestamp buffer which include both user/kernel buffers. * relevant only when doing timestamps records registration. * @cq_cb: pointer to CQ counter CB. + * @interrupt: interrupt that the node hanged on it's wait list. * @timestamp_kernel_addr: timestamp handle address, where to set timestamp * relevant only when doing timestamps records * registration. @@ -1153,17 +1194,18 @@ struct timestamp_reg_work_obj { * allocating records dynamically. */ struct timestamp_reg_info { - struct hl_mmap_mem_buf *buf; - struct hl_cb *cq_cb; - u64 *timestamp_kernel_addr; - u8 in_use; + struct hl_mmap_mem_buf *buf; + struct hl_cb *cq_cb; + struct hl_user_interrupt *interrupt; + u64 *timestamp_kernel_addr; + bool in_use; }; /** * struct hl_user_pending_interrupt - holds a context to a user thread * pending on an interrupt * @ts_reg_info: holds the timestamps registration nodes info - * @wait_list_node: node in the list of user threads pending on an interrupt + * @list_node: node in the list of user threads pending on an interrupt or timestamp * @fence: hl fence object for interrupt completion * @cq_target_value: CQ target value * @cq_kernel_addr: CQ kernel address, to be used in the cq interrupt @@ -1171,7 +1213,7 @@ struct timestamp_reg_info { */ struct hl_user_pending_interrupt { struct timestamp_reg_info ts_reg_info; - struct list_head wait_list_node; + struct list_head list_node; struct hl_fence fence; u64 cq_target_value; u64 *cq_kernel_addr; @@ -1220,6 +1262,7 @@ struct hl_dec { * @ASIC_GAUDI_SEC: Gaudi secured device (HL-2000). * @ASIC_GAUDI2: Gaudi2 device. * @ASIC_GAUDI2B: Gaudi2B device. + * @ASIC_GAUDI2C: Gaudi2C device. */ enum hl_asic_type { ASIC_INVALID, @@ -1228,6 +1271,7 @@ enum hl_asic_type { ASIC_GAUDI_SEC, ASIC_GAUDI2, ASIC_GAUDI2B, + ASIC_GAUDI2C, }; struct hl_cs_parser; @@ -1370,6 +1414,8 @@ struct dynamic_fw_load_mgr { * @boot_err0_reg: boot_err0 register address * @boot_err1_reg: boot_err1 register address * @wait_for_preboot_timeout: timeout to poll for preboot ready + * @wait_for_preboot_extended_timeout: timeout to pull for preboot ready in case where we know + * preboot needs longer time. */ struct pre_fw_load_props { u32 cpu_boot_status_reg; @@ -1378,6 +1424,7 @@ struct pre_fw_load_props { u32 boot_err0_reg; u32 boot_err1_reg; u32 wait_for_preboot_timeout; + u32 wait_for_preboot_extended_timeout; }; /** @@ -1477,11 +1524,9 @@ struct engines_data { * @asic_dma_pool_free: free small DMA allocation from pool. * @cpu_accessible_dma_pool_alloc: allocate CPU PQ packet from DMA pool. * @cpu_accessible_dma_pool_free: free CPU PQ packet from DMA pool. - * @asic_dma_unmap_single: unmap a single DMA buffer - * @asic_dma_map_single: map a single buffer to a DMA - * @hl_dma_unmap_sgtable: DMA unmap scatter-gather table. + * @dma_unmap_sgtable: DMA unmap scatter-gather table. + * @dma_map_sgtable: DMA map scatter-gather table. * @cs_parser: parse Command Submission. - * @asic_dma_map_sgtable: DMA map scatter-gather table. * @add_end_of_cb_packets: Add packets to the end of CB, if device requires it. * @update_eq_ci: update event queue CI. * @context_switch: called upon ASID context switch. @@ -1602,18 +1647,11 @@ struct hl_asic_funcs { size_t size, dma_addr_t *dma_handle); void (*cpu_accessible_dma_pool_free)(struct hl_device *hdev, size_t size, void *vaddr); - void (*asic_dma_unmap_single)(struct hl_device *hdev, - dma_addr_t dma_addr, int len, - enum dma_data_direction dir); - dma_addr_t (*asic_dma_map_single)(struct hl_device *hdev, - void *addr, int len, + void (*dma_unmap_sgtable)(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir); - void (*hl_dma_unmap_sgtable)(struct hl_device *hdev, - struct sg_table *sgt, + int (*dma_map_sgtable)(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir); int (*cs_parser)(struct hl_device *hdev, struct hl_cs_parser *parser); - int (*asic_dma_map_sgtable)(struct hl_device *hdev, struct sg_table *sgt, - enum dma_data_direction dir); void (*add_end_of_cb_packets)(struct hl_device *hdev, void *kernel_address, u32 len, u32 original_len, @@ -1771,16 +1809,19 @@ struct hl_cs_counters_atomic { * @phys_pg_pack: pointer to physical page pack if the dma-buf was exported * where virtual memory is supported. * @memhash_hnode: pointer to the memhash node. this object holds the export count. - * @device_address: physical address of the device's memory. Relevant only - * if phys_pg_pack is NULL (dma-buf was exported from address). - * The total size can be taken from the dmabuf object. + * @offset: the offset into the buffer from which the memory is exported. + * Relevant only if virtual memory is supported and phys_pg_pack is being used. + * device_phys_addr: physical address of the device's memory. Relevant only + * if phys_pg_pack is NULL (dma-buf was exported from address). + * The total size can be taken from the dmabuf object. */ struct hl_dmabuf_priv { struct dma_buf *dmabuf; struct hl_ctx *ctx; struct hl_vm_phys_pg_pack *phys_pg_pack; struct hl_vm_hash_node *memhash_hnode; - uint64_t device_address; + u64 offset; + u64 device_phys_addr; }; #define HL_CS_OUTCOME_HISTORY_LEN 256 @@ -1835,6 +1876,7 @@ struct hl_cs_outcome_store { * @va_range: holds available virtual addresses for host and dram mappings. * @mem_hash_lock: protects the mem_hash. * @hw_block_list_lock: protects the HW block memory list. + * @ts_reg_lock: timestamp registration ioctls lock. * @debugfs_list: node in debugfs list of contexts. * @hw_block_mem_list: list of HW block virtual mapped addresses. * @cs_counters: context command submission counters. @@ -1871,6 +1913,7 @@ struct hl_ctx { struct hl_va_range *va_range[HL_VA_RANGE_TYPE_MAX]; struct mutex mem_hash_lock; struct mutex hw_block_list_lock; + struct mutex ts_reg_lock; struct list_head debugfs_list; struct list_head hw_block_mem_list; struct hl_cs_counters_atomic cs_counters; @@ -1917,17 +1960,17 @@ struct hl_ctx_mgr { * @dma_mapped: true if the SG was mapped to DMA addresses, false otherwise. */ struct hl_userptr { - enum vm_type vm_type; /* must be first */ - struct list_head job_node; - struct page **pages; - unsigned int npages; - struct sg_table *sgt; - enum dma_data_direction dir; - struct list_head debugfs_list; - pid_t pid; - u64 addr; - u64 size; - u8 dma_mapped; + enum vm_type vm_type; /* must be first */ + struct list_head job_node; + struct page **pages; + unsigned int npages; + struct sg_table *sgt; + enum dma_data_direction dir; + struct list_head debugfs_list; + pid_t pid; + u64 addr; + u64 size; + u8 dma_mapped; }; /** @@ -2148,7 +2191,6 @@ struct hl_vm_hw_block_list_node { * @pages: the physical page array. * @npages: num physical pages in the pack. * @total_size: total size of all the pages in this list. - * @exported_size: buffer exported size. * @node: used to attach to deletion list that is used when all the allocations are cleared * at the teardown of the context. * @mapping_cnt: number of shared mappings. @@ -2165,7 +2207,6 @@ struct hl_vm_phys_pg_pack { u64 *pages; u64 npages; u64 total_size; - u64 exported_size; struct list_head node; atomic_t mapping_cnt; u32 asid; @@ -2250,7 +2291,7 @@ struct hl_notifier_event { /** * struct hl_fpriv - process information stored in FD private data. * @hdev: habanalabs device structure. - * @filp: pointer to the given file structure. + * @file_priv: pointer to the DRM file private data structure. * @taskpid: current process ID. * @ctx: current executing context. TODO: remove for multiple ctx per process * @ctx_mgr: context manager to handle multiple context for this FD. @@ -2265,7 +2306,7 @@ struct hl_notifier_event { */ struct hl_fpriv { struct hl_device *hdev; - struct file *filp; + struct drm_file *file_priv; struct pid *taskpid; struct hl_ctx *ctx; struct hl_ctx_mgr ctx_mgr; @@ -2706,6 +2747,8 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); usr_intr.type = intr_type; \ INIT_LIST_HEAD(&usr_intr.wait_list_head); \ spin_lock_init(&usr_intr.wait_list_lock); \ + INIT_LIST_HEAD(&usr_intr.ts_list_head); \ + spin_lock_init(&usr_intr.ts_list_lock); \ }) struct hwmon_chip_info; @@ -3055,6 +3098,20 @@ struct fw_err_info { }; /** + * struct engine_err_info - engine error information. + * @event: holds information on the event. + * @event_detected: if set as 1, then an engine event was discovered for the + * first time after the driver has finished booting-up. + * @event_info_available: indicates that an engine event info is now available. + */ +struct engine_err_info { + struct hl_info_engine_err_event event; + atomic_t event_detected; + bool event_info_available; +}; + + +/** * struct hl_error_info - holds information collected during an error. * @cs_timeout: CS timeout error information. * @razwi_info: RAZWI information. @@ -3062,6 +3119,7 @@ struct fw_err_info { * @page_fault_info: page fault information. * @hw_err: (fatal) hardware error information. * @fw_err: firmware error information. + * @engine_err: engine error information. */ struct hl_error_info { struct cs_timeout_info cs_timeout; @@ -3070,6 +3128,7 @@ struct hl_error_info { struct page_fault_info page_fault_info; struct hw_err_info hw_err; struct fw_err_info fw_err; + struct engine_err_info engine_err; }; /** @@ -3117,8 +3176,7 @@ struct hl_reset_info { * (required only for PCI address match mode) * @pcie_bar: array of available PCIe bars virtual addresses. * @rmmio: configuration area address on SRAM. - * @hclass: pointer to the habanalabs class. - * @cdev: related char device. + * @drm: related DRM device. * @cdev_ctrl: char device for control operations only (INFO IOCTL) * @dev: related kernel basic device structure. * @dev_ctrl: related kernel device structure for the control device @@ -3245,8 +3303,7 @@ struct hl_reset_info { * @rotator_binning: contains mask of rotators engines that is received from the f/w * which indicates which rotator engines are binned-out(Gaudi3 and above). * @id: device minor. - * @id_control: minor of the control device. - * @cdev_idx: char device index. Used for setting its name. + * @cdev_idx: char device index. * @cpu_pci_msb_addr: 50-bit extension bits for the device CPU's 40-bit * addresses. * @is_in_dram_scrub: true if dram scrub operation is on going. @@ -3289,6 +3346,7 @@ struct hl_reset_info { * device. * @supports_ctx_switch: true if a ctx switch is required upon first submission. * @support_preboot_binning: true if we support read binning info from preboot. + * @eq_heartbeat_received: indication that eq heartbeat event has received from FW. * @nic_ports_mask: Controls which NIC ports are enabled. Used only for testing. * @fw_components: Controls which f/w components to load to the device. There are multiple f/w * stages and sometimes we want to stop at a certain stage. Used only for testing. @@ -3308,8 +3366,7 @@ struct hl_device { u64 pcie_bar_phys[HL_PCI_NUM_BARS]; void __iomem *pcie_bar[HL_PCI_NUM_BARS]; void __iomem *rmmio; - struct class *hclass; - struct cdev cdev; + struct drm_device drm; struct cdev cdev_ctrl; struct device *dev; struct device *dev_ctrl; @@ -3418,7 +3475,6 @@ struct hl_device { u32 device_release_watchdog_timeout_sec; u32 rotator_binning; u16 id; - u16 id_control; u16 cdev_idx; u16 cpu_pci_msb_addr; u8 is_in_dram_scrub; @@ -3451,6 +3507,7 @@ struct hl_device { u8 reset_upon_device_release; u8 supports_ctx_switch; u8 support_preboot_binning; + u8 eq_heartbeat_received; /* Parameters for bring-up to be upstreamed */ u64 nic_ports_mask; @@ -3582,6 +3639,11 @@ static inline bool hl_mem_area_inside_range(u64 address, u64 size, return false; } +static inline struct hl_device *to_hl_device(struct drm_device *ddev) +{ + return container_of(ddev, struct hl_device, drm); +} + /** * hl_mem_area_crosses_range() - Checks whether address+size crossing a range. * @address: The start address of the area we want to validate. @@ -3611,8 +3673,13 @@ void *hl_asic_dma_pool_zalloc_caller(struct hl_device *hdev, size_t size, gfp_t dma_addr_t *dma_handle, const char *caller); void hl_asic_dma_pool_free_caller(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr, const char *caller); -int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir); -void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt, +int hl_dma_map_sgtable_caller(struct hl_device *hdev, struct sg_table *sgt, + enum dma_data_direction dir, const char *caller); +void hl_dma_unmap_sgtable_caller(struct hl_device *hdev, struct sg_table *sgt, + enum dma_data_direction dir, const char *caller); +int hl_asic_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, + enum dma_data_direction dir); +void hl_asic_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir); int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val, enum debugfs_access_type acc_type, enum pci_region region_type, bool set_dram_bar); @@ -3620,7 +3687,12 @@ int hl_access_cfg_region(struct hl_device *hdev, u64 addr, u64 *val, enum debugfs_access_type acc_type); int hl_access_dev_mem(struct hl_device *hdev, enum pci_region region_type, u64 addr, u64 *val, enum debugfs_access_type acc_type); -int hl_device_open(struct inode *inode, struct file *filp); + +int hl_mmap(struct file *filp, struct vm_area_struct *vma); + +int hl_device_open(struct drm_device *drm, struct drm_file *file_priv); +void hl_device_release(struct drm_device *ddev, struct drm_file *file_priv); + int hl_device_open_ctrl(struct inode *inode, struct file *filp); bool hl_device_operational(struct hl_device *hdev, enum hl_device_status *status); @@ -3652,8 +3724,9 @@ void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q); irqreturn_t hl_irq_handler_cq(int irq, void *arg); irqreturn_t hl_irq_handler_eq(int irq, void *arg); irqreturn_t hl_irq_handler_dec_abnrm(int irq, void *arg); -irqreturn_t hl_irq_handler_user_interrupt(int irq, void *arg); +irqreturn_t hl_irq_user_interrupt_handler(int irq, void *arg); irqreturn_t hl_irq_user_interrupt_thread_handler(int irq, void *arg); +irqreturn_t hl_irq_eq_error_interrupt_thread_handler(int irq, void *arg); u32 hl_cq_inc_ptr(u32 ptr); int hl_asid_init(struct hl_device *hdev); @@ -3944,16 +4017,14 @@ void hl_handle_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_ u64 *event_mask); void hl_handle_critical_hw_err(struct hl_device *hdev, u16 event_id, u64 *event_mask); void hl_handle_fw_err(struct hl_device *hdev, struct hl_info_fw_err_info *info); +void hl_capture_engine_err(struct hl_device *hdev, u16 engine_id, u16 error_count); void hl_enable_err_info_capture(struct hl_error_info *captured_err_info); #ifdef CONFIG_DEBUG_FS -void hl_debugfs_init(void); -void hl_debugfs_fini(void); int hl_debugfs_device_init(struct hl_device *hdev); void hl_debugfs_device_fini(struct hl_device *hdev); void hl_debugfs_add_device(struct hl_device *hdev); -void hl_debugfs_remove_device(struct hl_device *hdev); void hl_debugfs_add_file(struct hl_fpriv *hpriv); void hl_debugfs_remove_file(struct hl_fpriv *hpriv); void hl_debugfs_add_cb(struct hl_cb *cb); @@ -3972,14 +4043,6 @@ void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data, #else -static inline void __init hl_debugfs_init(void) -{ -} - -static inline void hl_debugfs_fini(void) -{ -} - static inline int hl_debugfs_device_init(struct hl_device *hdev) { return 0; @@ -3993,10 +4056,6 @@ static inline void hl_debugfs_add_device(struct hl_device *hdev) { } -static inline void hl_debugfs_remove_device(struct hl_device *hdev) -{ -} - static inline void hl_debugfs_add_file(struct hl_fpriv *hpriv) { } @@ -4108,11 +4167,12 @@ void hl_ack_pb_single_dcore(struct hl_device *hdev, u32 dcore_offset, const u32 pb_blocks[], u32 blocks_array_size); /* IOCTLs */ -long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg); -int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data); -int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data); -int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data); -int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data); +int hl_info_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv); +int hl_cb_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv); +int hl_cs_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv); +int hl_wait_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv); +int hl_mem_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv); +int hl_debug_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv); #endif /* HABANALABSP_H_ */ diff --git a/drivers/accel/habanalabs/common/habanalabs_drv.c b/drivers/accel/habanalabs/common/habanalabs_drv.c index 7263e84c1a..51fb04bbe3 100644 --- a/drivers/accel/habanalabs/common/habanalabs_drv.c +++ b/drivers/accel/habanalabs/common/habanalabs_drv.c @@ -14,6 +14,11 @@ #include <linux/pci.h> #include <linux/module.h> #include <linux/vmalloc.h> +#include <linux/version.h> + +#include <drm/drm_accel.h> +#include <drm/drm_drv.h> +#include <drm/drm_ioctl.h> #define CREATE_TRACE_POINTS #include <trace/events/habanalabs.h> @@ -27,7 +32,6 @@ MODULE_DESCRIPTION(HL_DRIVER_DESC); MODULE_LICENSE("GPL v2"); static int hl_major; -static struct class *hl_class; static DEFINE_IDR(hl_devs_idr); static DEFINE_MUTEX(hl_devs_idr_lock); @@ -70,6 +74,42 @@ static const struct pci_device_id ids[] = { }; MODULE_DEVICE_TABLE(pci, ids); +static const struct drm_ioctl_desc hl_drm_ioctls[] = { + DRM_IOCTL_DEF_DRV(HL_INFO, hl_info_ioctl, 0), + DRM_IOCTL_DEF_DRV(HL_CB, hl_cb_ioctl, 0), + DRM_IOCTL_DEF_DRV(HL_CS, hl_cs_ioctl, 0), + DRM_IOCTL_DEF_DRV(HL_WAIT_CS, hl_wait_ioctl, 0), + DRM_IOCTL_DEF_DRV(HL_MEMORY, hl_mem_ioctl, 0), + DRM_IOCTL_DEF_DRV(HL_DEBUG, hl_debug_ioctl, 0), +}; + +static const struct file_operations hl_fops = { + .owner = THIS_MODULE, + .open = accel_open, + .release = drm_release, + .unlocked_ioctl = drm_ioctl, + .compat_ioctl = drm_compat_ioctl, + .llseek = noop_llseek, + .mmap = hl_mmap +}; + +static const struct drm_driver hl_driver = { + .driver_features = DRIVER_COMPUTE_ACCEL, + + .name = HL_NAME, + .desc = HL_DRIVER_DESC, + .major = LINUX_VERSION_MAJOR, + .minor = LINUX_VERSION_PATCHLEVEL, + .patchlevel = LINUX_VERSION_SUBLEVEL, + .date = "20190505", + + .fops = &hl_fops, + .open = hl_device_open, + .postclose = hl_device_release, + .ioctls = hl_drm_ioctls, + .num_ioctls = ARRAY_SIZE(hl_drm_ioctls) +}; + /* * get_asic_type - translate device id to asic type * @@ -101,6 +141,9 @@ static enum hl_asic_type get_asic_type(struct hl_device *hdev) case REV_ID_B: asic_type = ASIC_GAUDI2B; break; + case REV_ID_C: + asic_type = ASIC_GAUDI2C; + break; default: break; } @@ -123,43 +166,28 @@ static bool is_asic_secured(enum hl_asic_type asic_type) } /* - * hl_device_open - open function for habanalabs device - * - * @inode: pointer to inode structure - * @filp: pointer to file structure + * hl_device_open() - open function for habanalabs device. + * @ddev: pointer to DRM device structure. + * @file: pointer to DRM file private data structure. * * Called when process opens an habanalabs device. */ -int hl_device_open(struct inode *inode, struct file *filp) +int hl_device_open(struct drm_device *ddev, struct drm_file *file_priv) { + struct hl_device *hdev = to_hl_device(ddev); enum hl_device_status status; - struct hl_device *hdev; struct hl_fpriv *hpriv; int rc; - mutex_lock(&hl_devs_idr_lock); - hdev = idr_find(&hl_devs_idr, iminor(inode)); - mutex_unlock(&hl_devs_idr_lock); - - if (!hdev) { - pr_err("Couldn't find device %d:%d\n", - imajor(inode), iminor(inode)); - return -ENXIO; - } - hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL); if (!hpriv) return -ENOMEM; hpriv->hdev = hdev; - filp->private_data = hpriv; - hpriv->filp = filp; - mutex_init(&hpriv->notifier_event.lock); mutex_init(&hpriv->restore_phase_mutex); mutex_init(&hpriv->ctx_lock); kref_init(&hpriv->refcount); - nonseekable_open(inode, filp); hl_ctx_mgr_init(&hpriv->ctx_mgr); hl_mem_mgr_init(hpriv->hdev->dev, &hpriv->mem_mgr); @@ -225,6 +253,9 @@ int hl_device_open(struct inode *inode, struct file *filp) hdev->last_successful_open_jif = jiffies; hdev->last_successful_open_ktime = ktime_get(); + file_priv->driver_priv = hpriv; + hpriv->file_priv = file_priv; + return 0; out_err: @@ -232,7 +263,6 @@ out_err: hl_mem_mgr_fini(&hpriv->mem_mgr); hl_mem_mgr_idr_destroy(&hpriv->mem_mgr); hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr); - filp->private_data = NULL; mutex_destroy(&hpriv->ctx_lock); mutex_destroy(&hpriv->restore_phase_mutex); mutex_destroy(&hpriv->notifier_event.lock); @@ -268,9 +298,7 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp) */ hpriv->hdev = hdev; filp->private_data = hpriv; - hpriv->filp = filp; - mutex_init(&hpriv->notifier_event.lock); nonseekable_open(inode, filp); hpriv->taskpid = get_task_pid(current, PIDTYPE_PID); @@ -317,7 +345,6 @@ static void copy_kernel_module_params_to_device(struct hl_device *hdev) hdev->asic_prop.fw_security_enabled = is_asic_secured(hdev->asic_type); hdev->major = hl_major; - hdev->hclass = hl_class; hdev->memory_scrub = memory_scrub; hdev->reset_on_lockup = reset_on_lockup; hdev->boot_error_status_mask = boot_error_status_mask; @@ -383,6 +410,31 @@ static int fixup_device_params(struct hl_device *hdev) return 0; } +static int allocate_device_id(struct hl_device *hdev) +{ + int id; + + mutex_lock(&hl_devs_idr_lock); + id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS, GFP_KERNEL); + mutex_unlock(&hl_devs_idr_lock); + + if (id < 0) { + if (id == -ENOSPC) + pr_err("too many devices in the system\n"); + return -EBUSY; + } + + hdev->id = id; + + /* + * Firstly initialized with the internal device ID. + * Will be updated later after the DRM device registration to hold the minor ID. + */ + hdev->cdev_idx = hdev->id; + + return 0; +} + /** * create_hdev - create habanalabs device instance * @@ -395,27 +447,29 @@ static int fixup_device_params(struct hl_device *hdev) */ static int create_hdev(struct hl_device **dev, struct pci_dev *pdev) { - int main_id, ctrl_id = 0, rc = 0; struct hl_device *hdev; + int rc; *dev = NULL; - hdev = kzalloc(sizeof(*hdev), GFP_KERNEL); - if (!hdev) - return -ENOMEM; + hdev = devm_drm_dev_alloc(&pdev->dev, &hl_driver, struct hl_device, drm); + if (IS_ERR(hdev)) + return PTR_ERR(hdev); + + hdev->dev = hdev->drm.dev; /* Will be NULL in case of simulator device */ hdev->pdev = pdev; /* Assign status description string */ - strncpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL], "operational", HL_STR_MAX); - strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET], "in reset", HL_STR_MAX); - strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION], "disabled", HL_STR_MAX); - strncpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], "needs reset", HL_STR_MAX); - strncpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION], - "in device creation", HL_STR_MAX); - strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE], - "in reset after device release", HL_STR_MAX); + strscpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL], "operational", HL_STR_MAX); + strscpy(hdev->status[HL_DEVICE_STATUS_IN_RESET], "in reset", HL_STR_MAX); + strscpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION], "disabled", HL_STR_MAX); + strscpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], "needs reset", HL_STR_MAX); + strscpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION], + "in device creation", HL_STR_MAX); + strscpy(hdev->status[HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE], + "in reset after device release", HL_STR_MAX); /* First, we must find out which ASIC are we handling. This is needed @@ -425,7 +479,7 @@ static int create_hdev(struct hl_device **dev, struct pci_dev *pdev) if (hdev->asic_type == ASIC_INVALID) { dev_err(&pdev->dev, "Unsupported ASIC\n"); rc = -ENODEV; - goto free_hdev; + goto out_err; } copy_kernel_module_params_to_device(hdev); @@ -434,42 +488,15 @@ static int create_hdev(struct hl_device **dev, struct pci_dev *pdev) fixup_device_params(hdev); - mutex_lock(&hl_devs_idr_lock); - - /* Always save 2 numbers, 1 for main device and 1 for control. - * They must be consecutive - */ - main_id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS, GFP_KERNEL); - - if (main_id >= 0) - ctrl_id = idr_alloc(&hl_devs_idr, hdev, main_id + 1, - main_id + 2, GFP_KERNEL); - - mutex_unlock(&hl_devs_idr_lock); - - if ((main_id < 0) || (ctrl_id < 0)) { - if ((main_id == -ENOSPC) || (ctrl_id == -ENOSPC)) - pr_err("too many devices in the system\n"); - - if (main_id >= 0) { - mutex_lock(&hl_devs_idr_lock); - idr_remove(&hl_devs_idr, main_id); - mutex_unlock(&hl_devs_idr_lock); - } - - rc = -EBUSY; - goto free_hdev; - } - - hdev->id = main_id; - hdev->id_control = ctrl_id; + rc = allocate_device_id(hdev); + if (rc) + goto out_err; *dev = hdev; return 0; -free_hdev: - kfree(hdev); +out_err: return rc; } @@ -484,10 +511,8 @@ static void destroy_hdev(struct hl_device *hdev) /* Remove device from the device list */ mutex_lock(&hl_devs_idr_lock); idr_remove(&hl_devs_idr, hdev->id); - idr_remove(&hl_devs_idr, hdev->id_control); mutex_unlock(&hl_devs_idr_lock); - kfree(hdev); } static int hl_pmops_suspend(struct device *dev) @@ -691,28 +716,16 @@ static int __init hl_init(void) hl_major = MAJOR(dev); - hl_class = class_create(HL_NAME); - if (IS_ERR(hl_class)) { - pr_err("failed to allocate class\n"); - rc = PTR_ERR(hl_class); - goto remove_major; - } - - hl_debugfs_init(); - rc = pci_register_driver(&hl_pci_driver); if (rc) { pr_err("failed to register pci device\n"); - goto remove_debugfs; + goto remove_major; } pr_debug("driver loaded\n"); return 0; -remove_debugfs: - hl_debugfs_fini(); - class_destroy(hl_class); remove_major: unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS); return rc; @@ -725,14 +738,6 @@ static void __exit hl_exit(void) { pci_unregister_driver(&hl_pci_driver); - /* - * Removing debugfs must be after all devices or simulator devices - * have been removed because otherwise we get a bug in the - * debugfs module for referencing NULL objects - */ - hl_debugfs_fini(); - - class_destroy(hl_class); unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS); idr_destroy(&hl_devs_idr); diff --git a/drivers/accel/habanalabs/common/habanalabs_ioctl.c b/drivers/accel/habanalabs/common/habanalabs_ioctl.c index a7f6c54c12..a7cd625d82 100644 --- a/drivers/accel/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/accel/habanalabs/common/habanalabs_ioctl.c @@ -17,6 +17,8 @@ #include <linux/uaccess.h> #include <linux/vmalloc.h> +#include <asm/msr.h> + static u32 hl_debug_struct_size[HL_DEBUG_OP_TIMESTAMP + 1] = { [HL_DEBUG_OP_ETR] = sizeof(struct hl_debug_params_etr), [HL_DEBUG_OP_ETF] = sizeof(struct hl_debug_params_etf), @@ -320,6 +322,7 @@ static int time_sync_info(struct hl_device *hdev, struct hl_info_args *args) time_sync.device_time = hdev->asic_funcs->get_device_time(hdev); time_sync.host_time = ktime_get_raw_ns(); + time_sync.tsc_time = rdtsc(); return copy_to_user(out, &time_sync, min((size_t) max_size, sizeof(time_sync))) ? -EFAULT : 0; @@ -875,6 +878,28 @@ static int fw_err_info(struct hl_fpriv *hpriv, struct hl_info_args *args) return rc ? -EFAULT : 0; } +static int engine_err_info(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + void __user *user_buf = (void __user *) (uintptr_t) args->return_pointer; + struct hl_device *hdev = hpriv->hdev; + u32 user_buf_size = args->return_size; + struct engine_err_info *info; + int rc; + + if (!user_buf) + return -EINVAL; + + info = &hdev->captured_err_info.engine_err; + if (!info->event_info_available) + return 0; + + if (user_buf_size < sizeof(struct hl_info_engine_err_event)) + return -ENOMEM; + + rc = copy_to_user(user_buf, &info->event, sizeof(struct hl_info_engine_err_event)); + return rc ? -EFAULT : 0; +} + static int send_fw_generic_request(struct hl_device *hdev, struct hl_info_args *info_args) { void __user *buff = (void __user *) (uintptr_t) info_args->return_pointer; @@ -1001,6 +1026,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, case HL_INFO_FW_ERR_EVENT: return fw_err_info(hpriv, args); + case HL_INFO_USER_ENGINE_ERR_EVENT: + return engine_err_info(hpriv, args); + case HL_INFO_DRAM_USAGE: return dram_usage_info(hpriv, args); default: @@ -1070,20 +1098,34 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, return rc; } -static int hl_info_ioctl(struct hl_fpriv *hpriv, void *data) +int hl_info_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv) { + struct hl_fpriv *hpriv = file_priv->driver_priv; + return _hl_info_ioctl(hpriv, data, hpriv->hdev->dev); } static int hl_info_ioctl_control(struct hl_fpriv *hpriv, void *data) { + struct hl_info_args *args = data; + + switch (args->op) { + case HL_INFO_GET_EVENTS: + case HL_INFO_UNREGISTER_EVENTFD: + case HL_INFO_REGISTER_EVENTFD: + return -EOPNOTSUPP; + default: + break; + } + return _hl_info_ioctl(hpriv, data, hpriv->hdev->dev_ctrl); } -static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data) +int hl_debug_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv) { - struct hl_debug_args *args = data; + struct hl_fpriv *hpriv = file_priv->driver_priv; struct hl_device *hdev = hpriv->hdev; + struct hl_debug_args *args = data; enum hl_device_status status; int rc = 0; @@ -1126,25 +1168,15 @@ static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data) } #define HL_IOCTL_DEF(ioctl, _func) \ - [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func} - -static const struct hl_ioctl_desc hl_ioctls[] = { - HL_IOCTL_DEF(HL_IOCTL_INFO, hl_info_ioctl), - HL_IOCTL_DEF(HL_IOCTL_CB, hl_cb_ioctl), - HL_IOCTL_DEF(HL_IOCTL_CS, hl_cs_ioctl), - HL_IOCTL_DEF(HL_IOCTL_WAIT_CS, hl_wait_ioctl), - HL_IOCTL_DEF(HL_IOCTL_MEMORY, hl_mem_ioctl), - HL_IOCTL_DEF(HL_IOCTL_DEBUG, hl_debug_ioctl) -}; + [_IOC_NR(ioctl) - HL_COMMAND_START] = {.cmd = ioctl, .func = _func} static const struct hl_ioctl_desc hl_ioctls_control[] = { - HL_IOCTL_DEF(HL_IOCTL_INFO, hl_info_ioctl_control) + HL_IOCTL_DEF(DRM_IOCTL_HL_INFO, hl_info_ioctl_control) }; -static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg, - const struct hl_ioctl_desc *ioctl, struct device *dev) +static long _hl_ioctl(struct hl_fpriv *hpriv, unsigned int cmd, unsigned long arg, + const struct hl_ioctl_desc *ioctl, struct device *dev) { - struct hl_fpriv *hpriv = filep->private_data; unsigned int nr = _IOC_NR(cmd); char stack_kdata[128] = {0}; char *kdata = NULL; @@ -1194,9 +1226,13 @@ static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg, retcode = -EFAULT; out_err: - if (retcode) - dev_dbg_ratelimited(dev, "error in ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n", - task_pid_nr(current), cmd, nr); + if (retcode) { + char task_comm[TASK_COMM_LEN]; + + dev_dbg_ratelimited(dev, + "error in ioctl: pid=%d, comm=\"%s\", cmd=%#010x, nr=%#04x\n", + task_pid_nr(current), get_task_comm(task_comm, current), cmd, nr); + } if (kdata != stack_kdata) kfree(kdata); @@ -1204,29 +1240,6 @@ out_err: return retcode; } -long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) -{ - struct hl_fpriv *hpriv = filep->private_data; - struct hl_device *hdev = hpriv->hdev; - const struct hl_ioctl_desc *ioctl = NULL; - unsigned int nr = _IOC_NR(cmd); - - if (!hdev) { - pr_err_ratelimited("Sending ioctl after device was removed! Please close FD\n"); - return -ENODEV; - } - - if ((nr >= HL_COMMAND_START) && (nr < HL_COMMAND_END)) { - ioctl = &hl_ioctls[nr]; - } else { - dev_dbg_ratelimited(hdev->dev, "invalid ioctl: pid=%d, nr=0x%02x\n", - task_pid_nr(current), nr); - return -ENOTTY; - } - - return _hl_ioctl(filep, cmd, arg, ioctl, hdev->dev); -} - long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg) { struct hl_fpriv *hpriv = filep->private_data; @@ -1239,13 +1252,16 @@ long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg) return -ENODEV; } - if (nr == _IOC_NR(HL_IOCTL_INFO)) { - ioctl = &hl_ioctls_control[nr]; + if (nr == _IOC_NR(DRM_IOCTL_HL_INFO)) { + ioctl = &hl_ioctls_control[nr - HL_COMMAND_START]; } else { - dev_dbg_ratelimited(hdev->dev_ctrl, "invalid ioctl: pid=%d, nr=0x%02x\n", - task_pid_nr(current), nr); + char task_comm[TASK_COMM_LEN]; + + dev_dbg_ratelimited(hdev->dev_ctrl, + "invalid ioctl: pid=%d, comm=\"%s\", cmd=%#010x, nr=%#04x\n", + task_pid_nr(current), get_task_comm(task_comm, current), cmd, nr); return -ENOTTY; } - return _hl_ioctl(filep, cmd, arg, ioctl, hdev->dev_ctrl); + return _hl_ioctl(hpriv, cmd, arg, ioctl, hdev->dev_ctrl); } diff --git a/drivers/accel/habanalabs/common/irq.c b/drivers/accel/habanalabs/common/irq.c index b1010d206c..978b7f4d5e 100644 --- a/drivers/accel/habanalabs/common/irq.c +++ b/drivers/accel/habanalabs/common/irq.c @@ -204,8 +204,10 @@ static void hl_ts_free_objects(struct work_struct *work) { struct timestamp_reg_work_obj *job = container_of(work, struct timestamp_reg_work_obj, free_obj); + struct list_head *dynamic_alloc_free_list_head = job->dynamic_alloc_free_obj_head; struct timestamp_reg_free_node *free_obj, *temp_free_obj; struct list_head *free_list_head = job->free_obj_head; + struct hl_device *hdev = job->hdev; list_for_each_entry_safe(free_obj, temp_free_obj, free_list_head, free_objects_node) { @@ -215,10 +217,28 @@ static void hl_ts_free_objects(struct work_struct *work) hl_mmap_mem_buf_put(free_obj->buf); hl_cb_put(free_obj->cq_cb); - kfree(free_obj); + atomic_set(&free_obj->in_use, 0); } kfree(free_list_head); + + if (dynamic_alloc_free_list_head) { + list_for_each_entry_safe(free_obj, temp_free_obj, dynamic_alloc_free_list_head, + free_objects_node) { + dev_dbg(hdev->dev, + "Dynamic_Alloc list: About to put refcount to buf (%p) cq_cb(%p)\n", + free_obj->buf, + free_obj->cq_cb); + + hl_mmap_mem_buf_put(free_obj->buf); + hl_cb_put(free_obj->cq_cb); + list_del(&free_obj->free_objects_node); + kfree(free_obj); + } + + kfree(dynamic_alloc_free_list_head); + } + kfree(job); } @@ -233,11 +253,18 @@ static void hl_ts_free_objects(struct work_struct *work) * list to a dedicated workqueue to do the actual put. */ static int handle_registration_node(struct hl_device *hdev, struct hl_user_pending_interrupt *pend, - struct list_head **free_list, ktime_t now) + struct list_head **free_list, + struct list_head **dynamic_alloc_list, + struct hl_user_interrupt *intr) { + struct hl_ts_free_jobs *ts_free_jobs_data; struct timestamp_reg_free_node *free_node; + u32 free_node_index; u64 timestamp; + ts_free_jobs_data = &intr->ts_free_jobs_data; + free_node_index = ts_free_jobs_data->next_avail_free_node_idx; + if (!(*free_list)) { /* Alloc/Init the timestamp registration free objects list */ *free_list = kmalloc(sizeof(struct list_head), GFP_ATOMIC); @@ -247,39 +274,65 @@ static int handle_registration_node(struct hl_device *hdev, struct hl_user_pendi INIT_LIST_HEAD(*free_list); } - free_node = kmalloc(sizeof(*free_node), GFP_ATOMIC); - if (!free_node) - return -ENOMEM; + free_node = &ts_free_jobs_data->free_nodes_pool[free_node_index]; + if (atomic_cmpxchg(&free_node->in_use, 0, 1)) { + dev_dbg(hdev->dev, + "Timestamp free node pool is full, buff: %p, record: %p, irq: %u\n", + pend->ts_reg_info.buf, + pend, + intr->interrupt_id); - timestamp = ktime_to_ns(now); + if (!(*dynamic_alloc_list)) { + *dynamic_alloc_list = kmalloc(sizeof(struct list_head), GFP_ATOMIC); + if (!(*dynamic_alloc_list)) + return -ENOMEM; - *pend->ts_reg_info.timestamp_kernel_addr = timestamp; + INIT_LIST_HEAD(*dynamic_alloc_list); + } + + free_node = kmalloc(sizeof(struct timestamp_reg_free_node), GFP_ATOMIC); + if (!free_node) + return -ENOMEM; + + free_node->dynamic_alloc = 1; + } - dev_dbg(hdev->dev, "Timestamp is set to ts cb address (%p), ts: 0x%llx\n", - pend->ts_reg_info.timestamp_kernel_addr, - *(u64 *)pend->ts_reg_info.timestamp_kernel_addr); + timestamp = ktime_to_ns(intr->timestamp); - list_del(&pend->wait_list_node); + *pend->ts_reg_info.timestamp_kernel_addr = timestamp; + + dev_dbg(hdev->dev, "Irq handle: Timestamp record (%p) ts cb address (%p), interrupt_id: %u\n", + pend, pend->ts_reg_info.timestamp_kernel_addr, intr->interrupt_id); - /* Mark kernel CB node as free */ - pend->ts_reg_info.in_use = 0; + list_del(&pend->list_node); /* Putting the refcount for ts_buff and cq_cb objects will be handled * in workqueue context, just add job to free_list. */ free_node->buf = pend->ts_reg_info.buf; free_node->cq_cb = pend->ts_reg_info.cq_cb; - list_add(&free_node->free_objects_node, *free_list); + + if (free_node->dynamic_alloc) { + list_add(&free_node->free_objects_node, *dynamic_alloc_list); + } else { + ts_free_jobs_data->next_avail_free_node_idx = + (++free_node_index) % ts_free_jobs_data->free_nodes_length; + list_add(&free_node->free_objects_node, *free_list); + } + + /* Mark TS record as free */ + pend->ts_reg_info.in_use = false; return 0; } -static void handle_user_interrupt(struct hl_device *hdev, struct hl_user_interrupt *intr) +static void handle_user_interrupt_ts_list(struct hl_device *hdev, struct hl_user_interrupt *intr) { + struct list_head *ts_reg_free_list_head = NULL, *dynamic_alloc_list_head = NULL; struct hl_user_pending_interrupt *pend, *temp_pend; - struct list_head *ts_reg_free_list_head = NULL; struct timestamp_reg_work_obj *job; bool reg_node_handle_fail = false; + unsigned long flags; int rc; /* For registration nodes: @@ -288,36 +341,32 @@ static void handle_user_interrupt(struct hl_device *hdev, struct hl_user_interru * or in irq handler context at all (since release functions are long and * might sleep), so we will need to handle that part in workqueue context. * To avoid handling kmalloc failure which compels us rolling back actions - * and move nodes hanged on the free list back to the interrupt wait list + * and move nodes hanged on the free list back to the interrupt ts list * we always alloc the job of the WQ at the beginning. */ job = kmalloc(sizeof(*job), GFP_ATOMIC); if (!job) return; - spin_lock(&intr->wait_list_lock); - list_for_each_entry_safe(pend, temp_pend, &intr->wait_list_head, wait_list_node) { + spin_lock_irqsave(&intr->ts_list_lock, flags); + list_for_each_entry_safe(pend, temp_pend, &intr->ts_list_head, list_node) { if ((pend->cq_kernel_addr && *(pend->cq_kernel_addr) >= pend->cq_target_value) || !pend->cq_kernel_addr) { - if (pend->ts_reg_info.buf) { - if (!reg_node_handle_fail) { - rc = handle_registration_node(hdev, pend, - &ts_reg_free_list_head, intr->timestamp); - if (rc) - reg_node_handle_fail = true; - } - } else { - /* Handle wait target value node */ - pend->fence.timestamp = intr->timestamp; - complete_all(&pend->fence.completion); + if (!reg_node_handle_fail) { + rc = handle_registration_node(hdev, pend, + &ts_reg_free_list_head, + &dynamic_alloc_list_head, intr); + if (rc) + reg_node_handle_fail = true; } } } - spin_unlock(&intr->wait_list_lock); + spin_unlock_irqrestore(&intr->ts_list_lock, flags); if (ts_reg_free_list_head) { INIT_WORK(&job->free_obj, hl_ts_free_objects); job->free_obj_head = ts_reg_free_list_head; + job->dynamic_alloc_free_obj_head = dynamic_alloc_list_head; job->hdev = hdev; queue_work(hdev->ts_free_obj_wq, &job->free_obj); } else { @@ -325,6 +374,23 @@ static void handle_user_interrupt(struct hl_device *hdev, struct hl_user_interru } } +static void handle_user_interrupt_wait_list(struct hl_device *hdev, struct hl_user_interrupt *intr) +{ + struct hl_user_pending_interrupt *pend, *temp_pend; + unsigned long flags; + + spin_lock_irqsave(&intr->wait_list_lock, flags); + list_for_each_entry_safe(pend, temp_pend, &intr->wait_list_head, list_node) { + if ((pend->cq_kernel_addr && *(pend->cq_kernel_addr) >= pend->cq_target_value) || + !pend->cq_kernel_addr) { + /* Handle wait target value node */ + pend->fence.timestamp = intr->timestamp; + complete_all(&pend->fence.completion); + } + } + spin_unlock_irqrestore(&intr->wait_list_lock, flags); +} + static void handle_tpc_interrupt(struct hl_device *hdev) { u64 event_mask; @@ -346,19 +412,38 @@ static void handle_unexpected_user_interrupt(struct hl_device *hdev) } /** - * hl_irq_handler_user_interrupt - irq handler for user interrupts + * hl_irq_user_interrupt_handler - irq handler for user interrupts. * * @irq: irq number * @arg: pointer to user interrupt structure - * */ -irqreturn_t hl_irq_handler_user_interrupt(int irq, void *arg) +irqreturn_t hl_irq_user_interrupt_handler(int irq, void *arg) { struct hl_user_interrupt *user_int = arg; + struct hl_device *hdev = user_int->hdev; user_int->timestamp = ktime_get(); + switch (user_int->type) { + case HL_USR_INTERRUPT_CQ: + /* First handle user waiters threads */ + handle_user_interrupt_wait_list(hdev, &hdev->common_user_cq_interrupt); + handle_user_interrupt_wait_list(hdev, user_int); - return IRQ_WAKE_THREAD; + /* Second handle user timestamp registrations */ + handle_user_interrupt_ts_list(hdev, &hdev->common_user_cq_interrupt); + handle_user_interrupt_ts_list(hdev, user_int); + break; + case HL_USR_INTERRUPT_DECODER: + handle_user_interrupt_wait_list(hdev, &hdev->common_decoder_interrupt); + + /* Handle decoder interrupt registered on this specific irq */ + handle_user_interrupt_wait_list(hdev, user_int); + break; + default: + break; + } + + return IRQ_HANDLED; } /** @@ -374,19 +459,8 @@ irqreturn_t hl_irq_user_interrupt_thread_handler(int irq, void *arg) struct hl_user_interrupt *user_int = arg; struct hl_device *hdev = user_int->hdev; + user_int->timestamp = ktime_get(); switch (user_int->type) { - case HL_USR_INTERRUPT_CQ: - handle_user_interrupt(hdev, &hdev->common_user_cq_interrupt); - - /* Handle user cq interrupt registered on this specific irq */ - handle_user_interrupt(hdev, user_int); - break; - case HL_USR_INTERRUPT_DECODER: - handle_user_interrupt(hdev, &hdev->common_decoder_interrupt); - - /* Handle decoder interrupt registered on this specific irq */ - handle_user_interrupt(hdev, user_int); - break; case HL_USR_INTERRUPT_TPC: handle_tpc_interrupt(hdev); break; @@ -400,6 +474,18 @@ irqreturn_t hl_irq_user_interrupt_thread_handler(int irq, void *arg) return IRQ_HANDLED; } +irqreturn_t hl_irq_eq_error_interrupt_thread_handler(int irq, void *arg) +{ + u64 event_mask = HL_NOTIFIER_EVENT_DEVICE_RESET | HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE; + struct hl_device *hdev = arg; + + dev_err(hdev->dev, "EQ error interrupt received\n"); + + hl_device_cond_reset(hdev, HL_DRV_RESET_HARD, event_mask); + + return IRQ_HANDLED; +} + /** * hl_irq_handler_eq - irq handler for event queue * diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/habanalabs/common/memory.c index 4fc72a07d2..0b8689fe0b 100644 --- a/drivers/accel/habanalabs/common/memory.c +++ b/drivers/accel/habanalabs/common/memory.c @@ -244,7 +244,7 @@ static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size, *p_userptr = userptr; - rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, DMA_BIDIRECTIONAL); + rc = hl_dma_map_sgtable(hdev, userptr->sgt, DMA_BIDIRECTIONAL); if (rc) { dev_err(hdev->dev, "failed to map sgt with DMA region\n"); goto dma_map_err; @@ -832,7 +832,6 @@ int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx, * physical pages * * This function does the following: - * - Pin the physical pages related to the given virtual block. * - Create a physical page pack from the physical pages related to the given * virtual block. */ @@ -1532,24 +1531,20 @@ static int set_dma_sg(struct scatterlist *sg, u64 bar_address, u64 chunk_size, } static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64 *pages, u64 npages, - u64 page_size, u64 exported_size, + u64 page_size, u64 exported_size, u64 offset, struct device *dev, enum dma_data_direction dir) { - u64 chunk_size, bar_address, dma_max_seg_size, cur_size_to_export, cur_npages; - struct asic_fixed_properties *prop; - int rc, i, j, nents, cur_page; + u64 dma_max_seg_size, curr_page, size, chunk_size, left_size_to_export, left_size_in_page, + left_size_in_dma_seg, device_address, bar_address, start_page; + struct asic_fixed_properties *prop = &hdev->asic_prop; struct scatterlist *sg; + unsigned int nents, i; struct sg_table *sgt; + bool next_sg_entry; + int rc; - prop = &hdev->asic_prop; - - dma_max_seg_size = dma_get_max_seg_size(dev); - - /* We would like to align the max segment size to PAGE_SIZE, so the - * SGL will contain aligned addresses that can be easily mapped to - * an MMU - */ - dma_max_seg_size = ALIGN_DOWN(dma_max_seg_size, PAGE_SIZE); + /* Align max segment size to PAGE_SIZE to fit the minimal IOMMU mapping granularity */ + dma_max_seg_size = ALIGN_DOWN(dma_get_max_seg_size(dev), PAGE_SIZE); if (dma_max_seg_size < PAGE_SIZE) { dev_err_ratelimited(hdev->dev, "dma_max_seg_size %llu can't be smaller than PAGE_SIZE\n", @@ -1561,121 +1556,149 @@ static struct sg_table *alloc_sgt_from_device_pages(struct hl_device *hdev, u64 if (!sgt) return ERR_PTR(-ENOMEM); - /* remove export size restrictions in case not explicitly defined */ - cur_size_to_export = exported_size ? exported_size : (npages * page_size); - - /* If the size of each page is larger than the dma max segment size, - * then we can't combine pages and the number of entries in the SGL - * will just be the - * <number of pages> * <chunks of max segment size in each page> - */ - if (page_size > dma_max_seg_size) { - /* we should limit number of pages according to the exported size */ - cur_npages = DIV_ROUND_UP_SECTOR_T(cur_size_to_export, page_size); - nents = cur_npages * DIV_ROUND_UP_SECTOR_T(page_size, dma_max_seg_size); - } else { - cur_npages = npages; - - /* Get number of non-contiguous chunks */ - for (i = 1, nents = 1, chunk_size = page_size ; i < cur_npages ; i++) { - if (pages[i - 1] + page_size != pages[i] || - chunk_size + page_size > dma_max_seg_size) { - nents++; - chunk_size = page_size; - continue; - } + /* Use the offset to move to the actual first page that is exported */ + for (start_page = 0 ; start_page < npages ; ++start_page) { + if (offset < page_size) + break; - chunk_size += page_size; - } + /* The offset value was validated so there can't be an underflow */ + offset -= page_size; } - rc = sg_alloc_table(sgt, nents, GFP_KERNEL | __GFP_ZERO); - if (rc) - goto error_free; + /* Calculate the required number of entries for the SG table */ + curr_page = start_page; + nents = 1; + left_size_to_export = exported_size; + left_size_in_page = page_size - offset; + left_size_in_dma_seg = dma_max_seg_size; + next_sg_entry = false; - cur_page = 0; + while (true) { + size = min3(left_size_to_export, left_size_in_page, left_size_in_dma_seg); + left_size_to_export -= size; + left_size_in_page -= size; + left_size_in_dma_seg -= size; - if (page_size > dma_max_seg_size) { - u64 size_left, cur_device_address = 0; + if (!left_size_to_export) + break; - size_left = page_size; + if (!left_size_in_page) { + /* left_size_to_export is not zero so there must be another page */ + if (pages[curr_page] + page_size != pages[curr_page + 1]) + next_sg_entry = true; - /* Need to split each page into the number of chunks of - * dma_max_seg_size - */ - for_each_sgtable_dma_sg(sgt, sg, i) { - if (size_left == page_size) - cur_device_address = - pages[cur_page] - prop->dram_base_address; - else - cur_device_address += dma_max_seg_size; + ++curr_page; + left_size_in_page = page_size; + } - /* make sure not to export over exported size */ - chunk_size = min3(size_left, dma_max_seg_size, cur_size_to_export); + if (!left_size_in_dma_seg) { + next_sg_entry = true; + left_size_in_dma_seg = dma_max_seg_size; + } - bar_address = hdev->dram_pci_bar_start + cur_device_address; + if (next_sg_entry) { + ++nents; + next_sg_entry = false; + } + } - rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir); - if (rc) - goto error_unmap; + rc = sg_alloc_table(sgt, nents, GFP_KERNEL | __GFP_ZERO); + if (rc) + goto err_free_sgt; - cur_size_to_export -= chunk_size; + /* Prepare the SG table entries */ + curr_page = start_page; + device_address = pages[curr_page] + offset; + left_size_to_export = exported_size; + left_size_in_page = page_size - offset; + left_size_in_dma_seg = dma_max_seg_size; + next_sg_entry = false; - if (size_left > dma_max_seg_size) { - size_left -= dma_max_seg_size; - } else { - cur_page++; - size_left = page_size; + for_each_sgtable_dma_sg(sgt, sg, i) { + bar_address = hdev->dram_pci_bar_start + (device_address - prop->dram_base_address); + chunk_size = 0; + + for ( ; curr_page < npages ; ++curr_page) { + size = min3(left_size_to_export, left_size_in_page, left_size_in_dma_seg); + chunk_size += size; + left_size_to_export -= size; + left_size_in_page -= size; + left_size_in_dma_seg -= size; + + if (!left_size_to_export) + break; + + if (!left_size_in_page) { + /* left_size_to_export is not zero so there must be another page */ + if (pages[curr_page] + page_size != pages[curr_page + 1]) { + device_address = pages[curr_page + 1]; + next_sg_entry = true; + } + + left_size_in_page = page_size; } - } - } else { - /* Merge pages and put them into the scatterlist */ - for_each_sgtable_dma_sg(sgt, sg, i) { - chunk_size = page_size; - for (j = cur_page + 1 ; j < cur_npages ; j++) { - if (pages[j - 1] + page_size != pages[j] || - chunk_size + page_size > dma_max_seg_size) - break; - - chunk_size += page_size; + + if (!left_size_in_dma_seg) { + /* + * Skip setting a new device address if already moving to a page + * which is not contiguous with the current page. + */ + if (!next_sg_entry) { + device_address += chunk_size; + next_sg_entry = true; + } + + left_size_in_dma_seg = dma_max_seg_size; } - bar_address = hdev->dram_pci_bar_start + - (pages[cur_page] - prop->dram_base_address); + if (next_sg_entry) { + next_sg_entry = false; + break; + } + } - /* make sure not to export over exported size */ - chunk_size = min(chunk_size, cur_size_to_export); - rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir); - if (rc) - goto error_unmap; + rc = set_dma_sg(sg, bar_address, chunk_size, dev, dir); + if (rc) + goto err_unmap; + } - cur_size_to_export -= chunk_size; - cur_page = j; - } + /* There should be nothing left to export exactly after looping over all SG elements */ + if (left_size_to_export) { + dev_err(hdev->dev, + "left size to export %#llx after initializing %u SG elements\n", + left_size_to_export, sgt->nents); + rc = -ENOMEM; + goto err_unmap; } - /* Because we are not going to include a CPU list we want to have some - * chance that other users will detect this by setting the orig_nents - * to 0 and using only nents (length of DMA list) when going over the - * sgl + /* + * Because we are not going to include a CPU list, we want to have some chance that other + * users will detect this when going over SG table, by setting the orig_nents to 0 and using + * only nents (length of DMA list). */ sgt->orig_nents = 0; + dev_dbg(hdev->dev, "prepared SG table with %u entries for importer %s\n", + nents, dev_name(dev)); + for_each_sgtable_dma_sg(sgt, sg, i) + dev_dbg(hdev->dev, + "SG entry %d: address %#llx, length %#x\n", + i, sg_dma_address(sg), sg_dma_len(sg)); + return sgt; -error_unmap: +err_unmap: for_each_sgtable_dma_sg(sgt, sg, i) { if (!sg_dma_len(sg)) continue; - dma_unmap_resource(dev, sg_dma_address(sg), - sg_dma_len(sg), dir, + dma_unmap_resource(dev, sg_dma_address(sg), sg_dma_len(sg), dir, DMA_ATTR_SKIP_CPU_SYNC); } sg_free_table(sgt); -error_free: +err_free_sgt: kfree(sgt); return ERR_PTR(rc); } @@ -1700,6 +1723,7 @@ static int hl_dmabuf_attach(struct dma_buf *dmabuf, static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment, enum dma_data_direction dir) { + u64 *pages, npages, page_size, exported_size, offset; struct dma_buf *dma_buf = attachment->dmabuf; struct hl_vm_phys_pg_pack *phys_pg_pack; struct hl_dmabuf_priv *hl_dmabuf; @@ -1708,30 +1732,28 @@ static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment, hl_dmabuf = dma_buf->priv; hdev = hl_dmabuf->ctx->hdev; - phys_pg_pack = hl_dmabuf->phys_pg_pack; if (!attachment->peer2peer) { dev_dbg(hdev->dev, "Failed to map dmabuf because p2p is disabled\n"); return ERR_PTR(-EPERM); } - if (phys_pg_pack) - sgt = alloc_sgt_from_device_pages(hdev, - phys_pg_pack->pages, - phys_pg_pack->npages, - phys_pg_pack->page_size, - phys_pg_pack->exported_size, - attachment->dev, - dir); - else - sgt = alloc_sgt_from_device_pages(hdev, - &hl_dmabuf->device_address, - 1, - hl_dmabuf->dmabuf->size, - 0, - attachment->dev, - dir); + exported_size = hl_dmabuf->dmabuf->size; + offset = hl_dmabuf->offset; + phys_pg_pack = hl_dmabuf->phys_pg_pack; + if (phys_pg_pack) { + pages = phys_pg_pack->pages; + npages = phys_pg_pack->npages; + page_size = phys_pg_pack->page_size; + } else { + pages = &hl_dmabuf->device_phys_addr; + npages = 1; + page_size = hl_dmabuf->dmabuf->size; + } + + sgt = alloc_sgt_from_device_pages(hdev, pages, npages, page_size, exported_size, offset, + attachment->dev, dir); if (IS_ERR(sgt)) dev_err(hdev->dev, "failed (%ld) to initialize sgt for dmabuf\n", PTR_ERR(sgt)); @@ -1818,7 +1840,7 @@ static void hl_release_dmabuf(struct dma_buf *dmabuf) hl_ctx_put(ctx); /* Paired with get_file() in export_dmabuf() */ - fput(ctx->hpriv->filp); + fput(ctx->hpriv->file_priv->filp); kfree(hl_dmabuf); } @@ -1864,7 +1886,7 @@ static int export_dmabuf(struct hl_ctx *ctx, * released first and only then the compute device. * Paired with fput() in hl_release_dmabuf(). */ - get_file(ctx->hpriv->filp); + get_file(ctx->hpriv->file_priv->filp); *dmabuf_fd = fd; @@ -1876,22 +1898,29 @@ err_dma_buf_put: return rc; } -static int validate_export_params_common(struct hl_device *hdev, u64 device_addr, u64 size) +static int validate_export_params_common(struct hl_device *hdev, u64 addr, u64 size, u64 offset) { - if (!IS_ALIGNED(device_addr, PAGE_SIZE)) { + if (!PAGE_ALIGNED(addr)) { dev_dbg(hdev->dev, - "exported device memory address 0x%llx should be aligned to 0x%lx\n", - device_addr, PAGE_SIZE); + "exported device memory address 0x%llx should be aligned to PAGE_SIZE 0x%lx\n", + addr, PAGE_SIZE); return -EINVAL; } - if (size < PAGE_SIZE) { + if (!size || !PAGE_ALIGNED(size)) { dev_dbg(hdev->dev, - "exported device memory size %llu should be equal to or greater than %lu\n", + "exported device memory size %llu should be a multiple of PAGE_SIZE %lu\n", size, PAGE_SIZE); return -EINVAL; } + if (!PAGE_ALIGNED(offset)) { + dev_dbg(hdev->dev, + "exported device memory offset %llu should be a multiple of PAGE_SIZE %lu\n", + offset, PAGE_SIZE); + return -EINVAL; + } + return 0; } @@ -1901,13 +1930,13 @@ static int validate_export_params_no_mmu(struct hl_device *hdev, u64 device_addr u64 bar_address; int rc; - rc = validate_export_params_common(hdev, device_addr, size); + rc = validate_export_params_common(hdev, device_addr, size, 0); if (rc) return rc; if (device_addr < prop->dram_user_base_address || - (device_addr + size) > prop->dram_end_address || - (device_addr + size) < device_addr) { + (device_addr + size) > prop->dram_end_address || + (device_addr + size) < device_addr) { dev_dbg(hdev->dev, "DRAM memory range 0x%llx (+0x%llx) is outside of DRAM boundaries\n", device_addr, size); @@ -1934,29 +1963,26 @@ static int validate_export_params(struct hl_device *hdev, u64 device_addr, u64 s u64 bar_address; int i, rc; - rc = validate_export_params_common(hdev, device_addr, size); + rc = validate_export_params_common(hdev, device_addr, size, offset); if (rc) return rc; if ((offset + size) > phys_pg_pack->total_size) { dev_dbg(hdev->dev, "offset %#llx and size %#llx exceed total map size %#llx\n", - offset, size, phys_pg_pack->total_size); + offset, size, phys_pg_pack->total_size); return -EINVAL; } for (i = 0 ; i < phys_pg_pack->npages ; i++) { - bar_address = hdev->dram_pci_bar_start + - (phys_pg_pack->pages[i] - prop->dram_base_address); + (phys_pg_pack->pages[i] - prop->dram_base_address); if ((bar_address + phys_pg_pack->page_size) > (hdev->dram_pci_bar_start + prop->dram_pci_bar_size) || (bar_address + phys_pg_pack->page_size) < bar_address) { dev_dbg(hdev->dev, "DRAM memory range 0x%llx (+0x%x) is outside of PCI BAR boundaries\n", - phys_pg_pack->pages[i], - phys_pg_pack->page_size); - + phys_pg_pack->pages[i], phys_pg_pack->page_size); return -EINVAL; } } @@ -2012,7 +2038,6 @@ static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 addr, u64 size, u64 o struct asic_fixed_properties *prop; struct hl_dmabuf_priv *hl_dmabuf; struct hl_device *hdev; - u64 export_addr; int rc; hdev = ctx->hdev; @@ -2024,8 +2049,6 @@ static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 addr, u64 size, u64 o return -EINVAL; } - export_addr = addr + offset; - hl_dmabuf = kzalloc(sizeof(*hl_dmabuf), GFP_KERNEL); if (!hl_dmabuf) return -ENOMEM; @@ -2041,20 +2064,20 @@ static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 addr, u64 size, u64 o rc = PTR_ERR(phys_pg_pack); goto dec_memhash_export_cnt; } - rc = validate_export_params(hdev, export_addr, size, offset, phys_pg_pack); + rc = validate_export_params(hdev, addr, size, offset, phys_pg_pack); if (rc) goto dec_memhash_export_cnt; - phys_pg_pack->exported_size = size; hl_dmabuf->phys_pg_pack = phys_pg_pack; hl_dmabuf->memhash_hnode = hnode; + hl_dmabuf->offset = offset; } else { - rc = validate_export_params_no_mmu(hdev, export_addr, size); + rc = validate_export_params_no_mmu(hdev, addr, size); if (rc) goto err_free_dmabuf_wrapper; - } - hl_dmabuf->device_address = export_addr; + hl_dmabuf->device_phys_addr = addr; + } rc = export_dmabuf(ctx, hl_dmabuf, size, flags, dmabuf_fd); if (rc) @@ -2171,8 +2194,9 @@ static int allocate_timestamps_buffers(struct hl_fpriv *hpriv, struct hl_mem_in return 0; } -int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) +int hl_mem_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv) { + struct hl_fpriv *hpriv = file_priv->driver_priv; enum hl_device_status status; union hl_mem_args *args = data; struct hl_device *hdev = hpriv->hdev; @@ -2420,7 +2444,7 @@ void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr) hl_debugfs_remove_userptr(hdev, userptr); if (userptr->dma_mapped) - hdev->asic_funcs->hl_dma_unmap_sgtable(hdev, userptr->sgt, userptr->dir); + hl_dma_unmap_sgtable(hdev, userptr->sgt, userptr->dir); unpin_user_pages_dirty_lock(userptr->pages, userptr->npages, true); kvfree(userptr->pages); diff --git a/drivers/accel/habanalabs/common/mmu/mmu.c b/drivers/accel/habanalabs/common/mmu/mmu.c index b2145716c6..b654302a68 100644 --- a/drivers/accel/habanalabs/common/mmu/mmu.c +++ b/drivers/accel/habanalabs/common/mmu/mmu.c @@ -596,6 +596,7 @@ int hl_mmu_if_set_funcs(struct hl_device *hdev) break; case ASIC_GAUDI2: case ASIC_GAUDI2B: + case ASIC_GAUDI2C: /* MMUs in Gaudi2 are always host resident */ hl_mmu_v2_hr_set_funcs(hdev, &hdev->mmu_func[MMU_HR_PGT]); break; diff --git a/drivers/accel/habanalabs/common/sysfs.c b/drivers/accel/habanalabs/common/sysfs.c index 01f89f0293..2786063730 100644 --- a/drivers/accel/habanalabs/common/sysfs.c +++ b/drivers/accel/habanalabs/common/sysfs.c @@ -251,6 +251,9 @@ static ssize_t device_type_show(struct device *dev, case ASIC_GAUDI2B: str = "GAUDI2B"; break; + case ASIC_GAUDI2C: + str = "GAUDI2C"; + break; default: dev_err(hdev->dev, "Unrecognized ASIC type %d\n", hdev->asic_type); diff --git a/drivers/accel/habanalabs/gaudi/gaudi.c b/drivers/accel/habanalabs/gaudi/gaudi.c index 056e2ef44a..53292d4c15 100644 --- a/drivers/accel/habanalabs/gaudi/gaudi.c +++ b/drivers/accel/habanalabs/gaudi/gaudi.c @@ -63,6 +63,10 @@ #define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb" #define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin" +MODULE_FIRMWARE(GAUDI_BOOT_FIT_FILE); +MODULE_FIRMWARE(GAUDI_LINUX_FW_FILE); +MODULE_FIRMWARE(GAUDI_TPC_FW_FILE); + #define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */ #define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */ @@ -660,7 +664,7 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev) prop->pcie_dbi_base_address = mmPCIE_DBI_BASE; prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; - strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME, + strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN); prop->max_pending_cs = GAUDI_MAX_PENDING_CS; @@ -4619,8 +4623,7 @@ static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val) static int gaudi_scrub_device_mem(struct hl_device *hdev) { struct asic_fixed_properties *prop = &hdev->asic_prop; - u64 wait_to_idle_time = hdev->pdev ? HBM_SCRUBBING_TIMEOUT_US : - min_t(u64, HBM_SCRUBBING_TIMEOUT_US * 10, HL_SIM_MAX_TIMEOUT_US); + u64 wait_to_idle_time = HBM_SCRUBBING_TIMEOUT_US; u64 addr, size, val = hdev->memory_scrub_val; ktime_t timeout; int rc = 0; @@ -4904,7 +4907,7 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev, list_add_tail(&userptr->job_node, parser->job_userptr_list); - rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir); + rc = hl_dma_map_sgtable(hdev, userptr->sgt, dir); if (rc) { dev_err(hdev->dev, "failed to map sgt with DMA region\n"); goto unpin_memory; @@ -8000,7 +8003,7 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev) return rc; if (!strlen(prop->cpucp_info.card_name)) - strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME, + strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN); hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type); @@ -9140,9 +9143,9 @@ static const struct hl_asic_funcs gaudi_funcs = { .asic_dma_pool_free = gaudi_dma_pool_free, .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc, .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free, - .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable, + .dma_unmap_sgtable = hl_asic_dma_unmap_sgtable, .cs_parser = gaudi_cs_parser, - .asic_dma_map_sgtable = hl_dma_map_sgtable, + .dma_map_sgtable = hl_asic_dma_map_sgtable, .add_end_of_cb_packets = gaudi_add_end_of_cb_packets, .update_eq_ci = gaudi_update_eq_ci, .context_switch = gaudi_context_switch, diff --git a/drivers/accel/habanalabs/gaudi/gaudiP.h b/drivers/accel/habanalabs/gaudi/gaudiP.h index b8fa724be5..831be53bb9 100644 --- a/drivers/accel/habanalabs/gaudi/gaudiP.h +++ b/drivers/accel/habanalabs/gaudi/gaudiP.h @@ -10,7 +10,7 @@ #include <uapi/drm/habanalabs_accel.h> #include "../common/habanalabs.h" -#include "../include/common/hl_boot_if.h" +#include <linux/habanalabs/hl_boot_if.h> #include "../include/gaudi/gaudi_packets.h" #include "../include/gaudi/gaudi.h" #include "../include/gaudi/gaudi_async_events.h" diff --git a/drivers/accel/habanalabs/gaudi/gaudi_coresight.c b/drivers/accel/habanalabs/gaudi/gaudi_coresight.c index 3455b14554..1168fefa33 100644 --- a/drivers/accel/habanalabs/gaudi/gaudi_coresight.c +++ b/drivers/accel/habanalabs/gaudi/gaudi_coresight.c @@ -482,6 +482,11 @@ static int gaudi_config_etf(struct hl_device *hdev, WREG32(base_reg + 0xFB0, CORESIGHT_UNLOCK); + val = RREG32(base_reg + 0x20); + + if ((!params->enable && val == 0x0) || (params->enable && val != 0x0)) + return 0; + val = RREG32(base_reg + 0x304); val |= 0x1000; WREG32(base_reg + 0x304, val); @@ -580,6 +585,13 @@ static int gaudi_config_etr(struct hl_device *hdev, WREG32(mmPSOC_ETR_LAR, CORESIGHT_UNLOCK); + val = RREG32(mmPSOC_ETR_CTL); + + if ((!params->enable && val == 0x0) || (params->enable && val != 0x0)) + return 0; + + + val = RREG32(mmPSOC_ETR_FFCR); val |= 0x1000; WREG32(mmPSOC_ETR_FFCR, val); diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c index 31c74ca70a..bc6e338ef2 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c @@ -66,7 +66,6 @@ #define GAUDI2_NUM_OF_TPC_INTR_CAUSE 31 #define GAUDI2_NUM_OF_DEC_ERR_CAUSE 25 #define GAUDI2_NUM_OF_MME_ERR_CAUSE 16 -#define GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE 5 #define GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE 7 #define GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE 8 #define GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE 19 @@ -916,14 +915,6 @@ static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] = "sbte_prtn_intr_4", }; -static const char * const guadi2_mme_sbte_error_cause[GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE] = { - "i0", - "i1", - "i2", - "i3", - "i4", -}; - static const char * const guadi2_mme_wap_error_cause[GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE] = { "WBC ERR RESP_0", "WBC ERR RESP_1", @@ -993,6 +984,111 @@ gaudi2_pcie_addr_dec_error_cause[GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE] = { "TLP is blocked by RR" }; +static const int gaudi2_queue_id_to_engine_id[] = { + [GAUDI2_QUEUE_ID_PDMA_0_0...GAUDI2_QUEUE_ID_PDMA_0_3] = GAUDI2_ENGINE_ID_PDMA_0, + [GAUDI2_QUEUE_ID_PDMA_1_0...GAUDI2_QUEUE_ID_PDMA_1_3] = GAUDI2_ENGINE_ID_PDMA_1, + [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = + GAUDI2_DCORE0_ENGINE_ID_EDMA_0, + [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = + GAUDI2_DCORE0_ENGINE_ID_EDMA_1, + [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = + GAUDI2_DCORE1_ENGINE_ID_EDMA_0, + [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = + GAUDI2_DCORE1_ENGINE_ID_EDMA_1, + [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = + GAUDI2_DCORE2_ENGINE_ID_EDMA_0, + [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = + GAUDI2_DCORE2_ENGINE_ID_EDMA_1, + [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = + GAUDI2_DCORE3_ENGINE_ID_EDMA_0, + [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = + GAUDI2_DCORE3_ENGINE_ID_EDMA_1, + [GAUDI2_QUEUE_ID_DCORE0_MME_0_0...GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = + GAUDI2_DCORE0_ENGINE_ID_MME, + [GAUDI2_QUEUE_ID_DCORE1_MME_0_0...GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = + GAUDI2_DCORE1_ENGINE_ID_MME, + [GAUDI2_QUEUE_ID_DCORE2_MME_0_0...GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = + GAUDI2_DCORE2_ENGINE_ID_MME, + [GAUDI2_QUEUE_ID_DCORE3_MME_0_0...GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = + GAUDI2_DCORE3_ENGINE_ID_MME, + [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0...GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = + GAUDI2_DCORE0_ENGINE_ID_TPC_0, + [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0...GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = + GAUDI2_DCORE0_ENGINE_ID_TPC_1, + [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0...GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = + GAUDI2_DCORE0_ENGINE_ID_TPC_2, + [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0...GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = + GAUDI2_DCORE0_ENGINE_ID_TPC_3, + [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0...GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = + GAUDI2_DCORE0_ENGINE_ID_TPC_4, + [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0...GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = + GAUDI2_DCORE0_ENGINE_ID_TPC_5, + [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0...GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = + GAUDI2_DCORE0_ENGINE_ID_TPC_6, + [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0...GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = + GAUDI2_DCORE1_ENGINE_ID_TPC_0, + [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0...GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = + GAUDI2_DCORE1_ENGINE_ID_TPC_1, + [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0...GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = + GAUDI2_DCORE1_ENGINE_ID_TPC_2, + [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0...GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = + GAUDI2_DCORE1_ENGINE_ID_TPC_3, + [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0...GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = + GAUDI2_DCORE1_ENGINE_ID_TPC_4, + [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0...GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = + GAUDI2_DCORE1_ENGINE_ID_TPC_5, + [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0...GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = + GAUDI2_DCORE2_ENGINE_ID_TPC_0, + [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0...GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = + GAUDI2_DCORE2_ENGINE_ID_TPC_1, + [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0...GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = + GAUDI2_DCORE2_ENGINE_ID_TPC_2, + [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0...GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = + GAUDI2_DCORE2_ENGINE_ID_TPC_3, + [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0...GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = + GAUDI2_DCORE2_ENGINE_ID_TPC_4, + [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0...GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = + GAUDI2_DCORE2_ENGINE_ID_TPC_5, + [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0...GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = + GAUDI2_DCORE3_ENGINE_ID_TPC_0, + [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0...GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = + GAUDI2_DCORE3_ENGINE_ID_TPC_1, + [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0...GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = + GAUDI2_DCORE3_ENGINE_ID_TPC_2, + [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0...GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = + GAUDI2_DCORE3_ENGINE_ID_TPC_3, + [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0...GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = + GAUDI2_DCORE3_ENGINE_ID_TPC_4, + [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0...GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = + GAUDI2_DCORE3_ENGINE_ID_TPC_5, + [GAUDI2_QUEUE_ID_NIC_0_0...GAUDI2_QUEUE_ID_NIC_0_3] = GAUDI2_ENGINE_ID_NIC0_0, + [GAUDI2_QUEUE_ID_NIC_1_0...GAUDI2_QUEUE_ID_NIC_1_3] = GAUDI2_ENGINE_ID_NIC0_1, + [GAUDI2_QUEUE_ID_NIC_2_0...GAUDI2_QUEUE_ID_NIC_2_3] = GAUDI2_ENGINE_ID_NIC1_0, + [GAUDI2_QUEUE_ID_NIC_3_0...GAUDI2_QUEUE_ID_NIC_3_3] = GAUDI2_ENGINE_ID_NIC1_1, + [GAUDI2_QUEUE_ID_NIC_4_0...GAUDI2_QUEUE_ID_NIC_4_3] = GAUDI2_ENGINE_ID_NIC2_0, + [GAUDI2_QUEUE_ID_NIC_5_0...GAUDI2_QUEUE_ID_NIC_5_3] = GAUDI2_ENGINE_ID_NIC2_1, + [GAUDI2_QUEUE_ID_NIC_6_0...GAUDI2_QUEUE_ID_NIC_6_3] = GAUDI2_ENGINE_ID_NIC3_0, + [GAUDI2_QUEUE_ID_NIC_7_0...GAUDI2_QUEUE_ID_NIC_7_3] = GAUDI2_ENGINE_ID_NIC3_1, + [GAUDI2_QUEUE_ID_NIC_8_0...GAUDI2_QUEUE_ID_NIC_8_3] = GAUDI2_ENGINE_ID_NIC4_0, + [GAUDI2_QUEUE_ID_NIC_9_0...GAUDI2_QUEUE_ID_NIC_9_3] = GAUDI2_ENGINE_ID_NIC4_1, + [GAUDI2_QUEUE_ID_NIC_10_0...GAUDI2_QUEUE_ID_NIC_10_3] = GAUDI2_ENGINE_ID_NIC5_0, + [GAUDI2_QUEUE_ID_NIC_11_0...GAUDI2_QUEUE_ID_NIC_11_3] = GAUDI2_ENGINE_ID_NIC5_1, + [GAUDI2_QUEUE_ID_NIC_12_0...GAUDI2_QUEUE_ID_NIC_12_3] = GAUDI2_ENGINE_ID_NIC6_0, + [GAUDI2_QUEUE_ID_NIC_13_0...GAUDI2_QUEUE_ID_NIC_13_3] = GAUDI2_ENGINE_ID_NIC6_1, + [GAUDI2_QUEUE_ID_NIC_14_0...GAUDI2_QUEUE_ID_NIC_14_3] = GAUDI2_ENGINE_ID_NIC7_0, + [GAUDI2_QUEUE_ID_NIC_15_0...GAUDI2_QUEUE_ID_NIC_15_3] = GAUDI2_ENGINE_ID_NIC7_1, + [GAUDI2_QUEUE_ID_NIC_16_0...GAUDI2_QUEUE_ID_NIC_16_3] = GAUDI2_ENGINE_ID_NIC8_0, + [GAUDI2_QUEUE_ID_NIC_17_0...GAUDI2_QUEUE_ID_NIC_17_3] = GAUDI2_ENGINE_ID_NIC8_1, + [GAUDI2_QUEUE_ID_NIC_18_0...GAUDI2_QUEUE_ID_NIC_18_3] = GAUDI2_ENGINE_ID_NIC9_0, + [GAUDI2_QUEUE_ID_NIC_19_0...GAUDI2_QUEUE_ID_NIC_19_3] = GAUDI2_ENGINE_ID_NIC9_1, + [GAUDI2_QUEUE_ID_NIC_20_0...GAUDI2_QUEUE_ID_NIC_20_3] = GAUDI2_ENGINE_ID_NIC10_0, + [GAUDI2_QUEUE_ID_NIC_21_0...GAUDI2_QUEUE_ID_NIC_21_3] = GAUDI2_ENGINE_ID_NIC10_1, + [GAUDI2_QUEUE_ID_NIC_22_0...GAUDI2_QUEUE_ID_NIC_22_3] = GAUDI2_ENGINE_ID_NIC11_0, + [GAUDI2_QUEUE_ID_NIC_23_0...GAUDI2_QUEUE_ID_NIC_23_3] = GAUDI2_ENGINE_ID_NIC11_1, + [GAUDI2_QUEUE_ID_ROT_0_0...GAUDI2_QUEUE_ID_ROT_0_3] = GAUDI2_ENGINE_ID_ROT_0, + [GAUDI2_QUEUE_ID_ROT_1_0...GAUDI2_QUEUE_ID_ROT_1_3] = GAUDI2_ENGINE_ID_ROT_1, +}; + const u32 gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_SIZE] = { [GAUDI2_QUEUE_ID_PDMA_0_0] = mmPDMA0_QM_BASE, [GAUDI2_QUEUE_ID_PDMA_0_1] = mmPDMA0_QM_BASE, @@ -2001,7 +2097,8 @@ enum razwi_event_sources { RAZWI_PDMA, RAZWI_NIC, RAZWI_DEC, - RAZWI_ROT + RAZWI_ROT, + RAZWI_ARC_FARM }; struct hbm_mc_error_causes { @@ -2431,7 +2528,7 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev) prop->pcie_dbi_base_address = CFG_BASE + mmPCIE_DBI_BASE; prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; - strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN); + strscpy_pad(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN); prop->mme_master_slave_mode = 1; @@ -2884,7 +2981,8 @@ static int gaudi2_cpucp_info_get(struct hl_device *hdev) } if (!strlen(prop->cpucp_info.card_name)) - strncpy(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN); + strscpy_pad(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, + CARD_NAME_MAX_LEN); /* Overwrite binning masks with the actual binning values from F/W */ hdev->dram_binning = prop->cpucp_info.dram_binning_mask; @@ -4077,6 +4175,8 @@ static const char *gaudi2_irq_name(u16 irq_number) return "gaudi2 unexpected error"; case GAUDI2_IRQ_NUM_USER_FIRST ... GAUDI2_IRQ_NUM_USER_LAST: return "gaudi2 user completion"; + case GAUDI2_IRQ_NUM_EQ_ERROR: + return "gaudi2 eq error"; default: return "invalid"; } @@ -4127,9 +4227,7 @@ static int gaudi2_dec_enable_msix(struct hl_device *hdev) rc = request_irq(irq, hl_irq_handler_dec_abnrm, 0, gaudi2_irq_name(i), (void *) dec); } else { - rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt, - hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT, - gaudi2_irq_name(i), + rc = request_irq(irq, hl_irq_user_interrupt_handler, 0, gaudi2_irq_name(i), (void *) &hdev->user_interrupt[dec->core_id]); } @@ -4187,17 +4285,17 @@ static int gaudi2_enable_msix(struct hl_device *hdev) } irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT); - rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt, - hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT, - gaudi2_irq_name(GAUDI2_IRQ_NUM_TPC_ASSERT), &hdev->tpc_interrupt); + rc = request_threaded_irq(irq, NULL, hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT, + gaudi2_irq_name(GAUDI2_IRQ_NUM_TPC_ASSERT), + &hdev->tpc_interrupt); if (rc) { dev_err(hdev->dev, "Failed to request IRQ %d", irq); goto free_dec_irq; } irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR); - rc = request_irq(irq, hl_irq_handler_user_interrupt, 0, - gaudi2_irq_name(GAUDI2_IRQ_NUM_UNEXPECTED_ERROR), + rc = request_threaded_irq(irq, NULL, hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT, + gaudi2_irq_name(GAUDI2_IRQ_NUM_UNEXPECTED_ERROR), &hdev->unexpected_error_interrupt); if (rc) { dev_err(hdev->dev, "Failed to request IRQ %d", irq); @@ -4209,16 +4307,23 @@ static int gaudi2_enable_msix(struct hl_device *hdev) i++, j++, user_irq_init_cnt++) { irq = pci_irq_vector(hdev->pdev, i); - rc = request_threaded_irq(irq, hl_irq_handler_user_interrupt, - hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT, - gaudi2_irq_name(i), &hdev->user_interrupt[j]); - + rc = request_irq(irq, hl_irq_user_interrupt_handler, 0, gaudi2_irq_name(i), + &hdev->user_interrupt[j]); if (rc) { dev_err(hdev->dev, "Failed to request IRQ %d", irq); goto free_user_irq; } } + irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR); + rc = request_threaded_irq(irq, NULL, hl_irq_eq_error_interrupt_thread_handler, + IRQF_ONESHOT, gaudi2_irq_name(GAUDI2_IRQ_NUM_EQ_ERROR), + hdev); + if (rc) { + dev_err(hdev->dev, "Failed to request IRQ %d", irq); + goto free_user_irq; + } + gaudi2->hw_cap_initialized |= HW_CAP_MSIX; return 0; @@ -4278,6 +4383,7 @@ static void gaudi2_sync_irqs(struct hl_device *hdev) } synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE)); + synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR)); } static void gaudi2_disable_msix(struct hl_device *hdev) @@ -4314,6 +4420,9 @@ static void gaudi2_disable_msix(struct hl_device *hdev) cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION]; free_irq(irq, cq); + irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR); + free_irq(irq, hdev); + pci_free_irq_vectors(hdev->pdev); gaudi2->hw_cap_initialized &= ~HW_CAP_MSIX; @@ -4716,6 +4825,8 @@ static void gaudi2_init_firmware_preload_params(struct hl_device *hdev) pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0; pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1; pre_fw_load->wait_for_preboot_timeout = GAUDI2_PREBOOT_REQ_TIMEOUT_USEC; + pre_fw_load->wait_for_preboot_extended_timeout = + GAUDI2_PREBOOT_EXTENDED_REQ_TIMEOUT_USEC; } static void gaudi2_init_firmware_loader(struct hl_device *hdev) @@ -6157,17 +6268,14 @@ static int gaudi2_get_soft_rst_done_indication(struct hl_device *hdev, u32 poll_ static int gaudi2_execute_soft_reset(struct hl_device *hdev, bool driver_performs_reset, u32 poll_timeout_us) { - struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; - int rc = 0; + int rc; if (!driver_performs_reset) { if (hl_is_fw_sw_ver_below(hdev, 1, 10)) { /* set SP to indicate reset request sent to FW */ - if (dyn_regs->cpu_rst_status) - WREG32(le32_to_cpu(dyn_regs->cpu_rst_status), CPU_RST_STATUS_NA); - else - WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA); - WREG32(le32_to_cpu(dyn_regs->gic_host_soft_rst_irq), + WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA); + + WREG32(mmGIC_HOST_SOFT_RST_IRQ_POLL_REG, gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id); /* wait for f/w response */ @@ -6623,24 +6731,6 @@ static void gaudi2_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t s hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr); } -static dma_addr_t gaudi2_dma_map_single(struct hl_device *hdev, void *addr, int len, - enum dma_data_direction dir) -{ - dma_addr_t dma_addr; - - dma_addr = dma_map_single(&hdev->pdev->dev, addr, len, dir); - if (unlikely(dma_mapping_error(&hdev->pdev->dev, dma_addr))) - return 0; - - return dma_addr; -} - -static void gaudi2_dma_unmap_single(struct hl_device *hdev, dma_addr_t addr, int len, - enum dma_data_direction dir) -{ - dma_unmap_single(&hdev->pdev->dev, addr, len, dir); -} - static int gaudi2_validate_cb_address(struct hl_device *hdev, struct hl_cs_parser *parser) { struct asic_fixed_properties *asic_prop = &hdev->asic_prop; @@ -7703,11 +7793,13 @@ static inline bool is_info_event(u32 event) switch (event) { case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE: case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S ... GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E: + case GAUDI2_EVENT_ARC_PWR_BRK_ENTRY ... GAUDI2_EVENT_ARC_PWR_RD_MODE3: /* return in case of NIC status event - these events are received periodically and not as * an indication to an error. */ case GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0 ... GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1: + case GAUDI2_EVENT_ARC_EQ_HEARTBEAT: return true; default: return false; @@ -7739,21 +7831,34 @@ static void gaudi2_print_event(struct hl_device *hdev, u16 event_type, static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type, struct hl_eq_ecc_data *ecc_data) { - u64 ecc_address = 0, ecc_syndrom = 0; + u64 ecc_address = 0, ecc_syndrome = 0; u8 memory_wrapper_idx = 0; + bool has_block_id = false; + u16 block_id; + + if (!hl_is_fw_sw_ver_below(hdev, 1, 12)) + has_block_id = true; ecc_address = le64_to_cpu(ecc_data->ecc_address); - ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom); + ecc_syndrome = le64_to_cpu(ecc_data->ecc_syndrom); memory_wrapper_idx = ecc_data->memory_wrapper_idx; - gaudi2_print_event(hdev, event_type, !ecc_data->is_critical, - "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u. critical %u.", - ecc_address, ecc_syndrom, memory_wrapper_idx, ecc_data->is_critical); + if (has_block_id) { + block_id = le16_to_cpu(ecc_data->block_id); + gaudi2_print_event(hdev, event_type, !ecc_data->is_critical, + "ECC error detected. address: %#llx. Syndrome: %#llx. wrapper id %u. block id %#x. critical %u.", + ecc_address, ecc_syndrome, memory_wrapper_idx, block_id, + ecc_data->is_critical); + } else { + gaudi2_print_event(hdev, event_type, !ecc_data->is_critical, + "ECC error detected. address: %#llx. Syndrome: %#llx. wrapper id %u. critical %u.", + ecc_address, ecc_syndrome, memory_wrapper_idx, ecc_data->is_critical); + } return !!ecc_data->is_critical; } -static void print_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base) +static void handle_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base, u64 event_mask) { u32 lo, hi, cq_ptr_size, arc_cq_ptr_size; u64 cq_ptr, arc_cq_ptr, cp_current_inst; @@ -7775,10 +7880,22 @@ static void print_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base) dev_info(hdev->dev, "LowerQM. CQ: {ptr %#llx, size %u}, ARC_CQ: {ptr %#llx, size %u}, CP: {instruction %#llx}\n", cq_ptr, cq_ptr_size, arc_cq_ptr, arc_cq_ptr_size, cp_current_inst); + + if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) { + if (arc_cq_ptr) { + hdev->captured_err_info.undef_opcode.cq_addr = arc_cq_ptr; + hdev->captured_err_info.undef_opcode.cq_size = arc_cq_ptr_size; + } else { + hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr; + hdev->captured_err_info.undef_opcode.cq_size = cq_ptr_size; + } + + hdev->captured_err_info.undef_opcode.stream_id = QMAN_STREAMS; + } } static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type, - u64 qman_base, u32 qid_base) + u64 qman_base, u32 qid_base, u64 *event_mask) { u32 i, j, glbl_sts_val, arb_err_val, num_error_causes, error_count = 0; u64 glbl_sts_addr, arb_err_addr; @@ -7812,8 +7929,20 @@ static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type error_count++; } - if (i == QMAN_STREAMS) - print_lower_qman_data_on_err(hdev, qman_base); + /* check for undefined opcode */ + if (glbl_sts_val & PDMA0_QM_GLBL_ERR_STS_CP_UNDEF_CMD_ERR_MASK) { + *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE; + if (hdev->captured_err_info.undef_opcode.write_enable) { + memset(&hdev->captured_err_info.undef_opcode, 0, + sizeof(hdev->captured_err_info.undef_opcode)); + hdev->captured_err_info.undef_opcode.timestamp = ktime_get(); + hdev->captured_err_info.undef_opcode.engine_id = + gaudi2_queue_id_to_engine_id[qid_base]; + } + + if (i == QMAN_STREAMS) + handle_lower_qman_data_on_err(hdev, qman_base, *event_mask); + } } arb_err_val = RREG32(arb_err_addr); @@ -7927,6 +8056,9 @@ static enum gaudi2_engine_id gaudi2_razwi_calc_engine_id(struct hl_device *hdev, case RAZWI_ROT: return GAUDI2_ENGINE_ID_ROT_0 + module_idx; + case RAZWI_ARC_FARM: + return GAUDI2_ENGINE_ID_ARC_FARM; + default: return GAUDI2_ENGINE_ID_SIZE; } @@ -8036,6 +8168,11 @@ static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev, lbw_rtr_id = gaudi2_rot_initiator_lbw_rtr_id[module_idx]; sprintf(initiator_name, "ROT_%u", module_idx); break; + case RAZWI_ARC_FARM: + lbw_rtr_id = DCORE1_RTR5; + hbw_rtr_id = DCORE1_RTR7; + sprintf(initiator_name, "ARC_FARM_%u", module_idx); + break; default: return; } @@ -8475,7 +8612,8 @@ static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *e return 0; } - error_count = gaudi2_handle_qman_err_generic(hdev, event_type, qman_base, qid_base); + error_count = gaudi2_handle_qman_err_generic(hdev, event_type, qman_base, + qid_base, event_mask); /* Handle EDMA QM SEI here because there is no AXI error response event for EDMA */ if (event_type >= GAUDI2_EVENT_HDMA2_QM && event_type <= GAUDI2_EVENT_HDMA5_QM) { @@ -8488,7 +8626,7 @@ static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *e return error_count; } -static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type) +static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask) { u32 i, sts_val, sts_clr_val, error_count = 0, arc_farm; @@ -8510,6 +8648,7 @@ static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type sts_clr_val); } + gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ARC_FARM, 0, 0, event_mask); hl_check_for_glbl_errors(hdev); return error_count; @@ -8649,21 +8788,16 @@ static int gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, u16 event return error_count; } -static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type, - u64 intr_cause_data) +static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type) { - int i, error_count = 0; - - for (i = 0 ; i < GAUDI2_NUM_OF_MME_SBTE_ERR_CAUSE ; i++) - if (intr_cause_data & BIT(i)) { - gaudi2_print_event(hdev, event_type, true, - "err cause: %s", guadi2_mme_sbte_error_cause[i]); - error_count++; - } - + /* + * We have a single error cause here but the report mechanism is + * buggy. Hence there is no good reason to fetch the cause so we + * just check for glbl_errors and exit. + */ hl_check_for_glbl_errors(hdev); - return error_count; + return GAUDI2_NA_EVENT_CAUSE; } static int gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index, u16 event_type, @@ -9460,6 +9594,176 @@ static int hl_arc_event_handle(struct hl_device *hdev, u16 event_type, } } +static u16 event_id_to_engine_id(struct hl_device *hdev, u16 event_type) +{ + enum gaudi2_block_types type = GAUDI2_BLOCK_TYPE_MAX; + u16 index; + + switch (event_type) { + case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP: + index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP; + type = GAUDI2_BLOCK_TYPE_TPC; + break; + case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC24_QM: + index = event_type - GAUDI2_EVENT_TPC0_QM; + type = GAUDI2_BLOCK_TYPE_TPC; + break; + case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE: + case GAUDI2_EVENT_MME0_SPI_BASE ... GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID: + case GAUDI2_EVENT_MME0_QM: + index = 0; + type = GAUDI2_BLOCK_TYPE_MME; + break; + case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE: + case GAUDI2_EVENT_MME1_SPI_BASE ... GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID: + case GAUDI2_EVENT_MME1_QM: + index = 1; + type = GAUDI2_BLOCK_TYPE_MME; + break; + case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE: + case GAUDI2_EVENT_MME2_SPI_BASE ... GAUDI2_EVENT_MME2_WAP_SOURCE_RESULT_INVALID: + case GAUDI2_EVENT_MME2_QM: + index = 2; + type = GAUDI2_BLOCK_TYPE_MME; + break; + case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE: + case GAUDI2_EVENT_MME3_SPI_BASE ... GAUDI2_EVENT_MME3_WAP_SOURCE_RESULT_INVALID: + case GAUDI2_EVENT_MME3_QM: + index = 3; + type = GAUDI2_BLOCK_TYPE_MME; + break; + case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP: + case GAUDI2_EVENT_KDMA_BM_SPMU: + case GAUDI2_EVENT_KDMA0_CORE: + return GAUDI2_ENGINE_ID_KDMA; + case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP: + case GAUDI2_EVENT_PDMA0_CORE: + case GAUDI2_EVENT_PDMA0_BM_SPMU: + case GAUDI2_EVENT_PDMA0_QM: + return GAUDI2_ENGINE_ID_PDMA_0; + case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP: + case GAUDI2_EVENT_PDMA1_CORE: + case GAUDI2_EVENT_PDMA1_BM_SPMU: + case GAUDI2_EVENT_PDMA1_QM: + return GAUDI2_ENGINE_ID_PDMA_1; + case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE: + index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE; + type = GAUDI2_BLOCK_TYPE_DEC; + break; + case GAUDI2_EVENT_DEC0_SPI ... GAUDI2_EVENT_DEC9_BMON_SPMU: + index = (event_type - GAUDI2_EVENT_DEC0_SPI) >> 1; + type = GAUDI2_BLOCK_TYPE_DEC; + break; + case GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_NIC11_AXI_ERROR_RESPONSE: + index = event_type - GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE; + return GAUDI2_ENGINE_ID_NIC0_0 + (index * 2); + case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1: + index = event_type - GAUDI2_EVENT_NIC0_QM0; + return GAUDI2_ENGINE_ID_NIC0_0 + index; + case GAUDI2_EVENT_NIC0_BMON_SPMU ... GAUDI2_EVENT_NIC11_SW_ERROR: + index = event_type - GAUDI2_EVENT_NIC0_BMON_SPMU; + return GAUDI2_ENGINE_ID_NIC0_0 + (index * 2); + case GAUDI2_EVENT_TPC0_BMON_SPMU ... GAUDI2_EVENT_TPC24_KERNEL_ERR: + index = (event_type - GAUDI2_EVENT_TPC0_BMON_SPMU) >> 1; + type = GAUDI2_BLOCK_TYPE_TPC; + break; + case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE: + case GAUDI2_EVENT_ROTATOR0_BMON_SPMU: + case GAUDI2_EVENT_ROTATOR0_ROT0_QM: + return GAUDI2_ENGINE_ID_ROT_0; + case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE: + case GAUDI2_EVENT_ROTATOR1_BMON_SPMU: + case GAUDI2_EVENT_ROTATOR1_ROT1_QM: + return GAUDI2_ENGINE_ID_ROT_1; + case GAUDI2_EVENT_HDMA0_BM_SPMU: + case GAUDI2_EVENT_HDMA0_QM: + case GAUDI2_EVENT_HDMA0_CORE: + return GAUDI2_DCORE0_ENGINE_ID_EDMA_0; + case GAUDI2_EVENT_HDMA1_BM_SPMU: + case GAUDI2_EVENT_HDMA1_QM: + case GAUDI2_EVENT_HDMA1_CORE: + return GAUDI2_DCORE0_ENGINE_ID_EDMA_1; + case GAUDI2_EVENT_HDMA2_BM_SPMU: + case GAUDI2_EVENT_HDMA2_QM: + case GAUDI2_EVENT_HDMA2_CORE: + return GAUDI2_DCORE1_ENGINE_ID_EDMA_0; + case GAUDI2_EVENT_HDMA3_BM_SPMU: + case GAUDI2_EVENT_HDMA3_QM: + case GAUDI2_EVENT_HDMA3_CORE: + return GAUDI2_DCORE1_ENGINE_ID_EDMA_1; + case GAUDI2_EVENT_HDMA4_BM_SPMU: + case GAUDI2_EVENT_HDMA4_QM: + case GAUDI2_EVENT_HDMA4_CORE: + return GAUDI2_DCORE2_ENGINE_ID_EDMA_0; + case GAUDI2_EVENT_HDMA5_BM_SPMU: + case GAUDI2_EVENT_HDMA5_QM: + case GAUDI2_EVENT_HDMA5_CORE: + return GAUDI2_DCORE2_ENGINE_ID_EDMA_1; + case GAUDI2_EVENT_HDMA6_BM_SPMU: + case GAUDI2_EVENT_HDMA6_QM: + case GAUDI2_EVENT_HDMA6_CORE: + return GAUDI2_DCORE3_ENGINE_ID_EDMA_0; + case GAUDI2_EVENT_HDMA7_BM_SPMU: + case GAUDI2_EVENT_HDMA7_QM: + case GAUDI2_EVENT_HDMA7_CORE: + return GAUDI2_DCORE3_ENGINE_ID_EDMA_1; + default: + break; + } + + switch (type) { + case GAUDI2_BLOCK_TYPE_TPC: + switch (index) { + case TPC_ID_DCORE0_TPC0 ... TPC_ID_DCORE0_TPC5: + return GAUDI2_DCORE0_ENGINE_ID_TPC_0 + index; + case TPC_ID_DCORE1_TPC0 ... TPC_ID_DCORE1_TPC5: + return GAUDI2_DCORE1_ENGINE_ID_TPC_0 + index - TPC_ID_DCORE1_TPC0; + case TPC_ID_DCORE2_TPC0 ... TPC_ID_DCORE2_TPC5: + return GAUDI2_DCORE2_ENGINE_ID_TPC_0 + index - TPC_ID_DCORE2_TPC0; + case TPC_ID_DCORE3_TPC0 ... TPC_ID_DCORE3_TPC5: + return GAUDI2_DCORE3_ENGINE_ID_TPC_0 + index - TPC_ID_DCORE3_TPC0; + default: + break; + } + break; + case GAUDI2_BLOCK_TYPE_MME: + switch (index) { + case MME_ID_DCORE0: return GAUDI2_DCORE0_ENGINE_ID_MME; + case MME_ID_DCORE1: return GAUDI2_DCORE1_ENGINE_ID_MME; + case MME_ID_DCORE2: return GAUDI2_DCORE2_ENGINE_ID_MME; + case MME_ID_DCORE3: return GAUDI2_DCORE3_ENGINE_ID_MME; + default: + break; + } + break; + case GAUDI2_BLOCK_TYPE_DEC: + switch (index) { + case DEC_ID_DCORE0_DEC0: return GAUDI2_DCORE0_ENGINE_ID_DEC_0; + case DEC_ID_DCORE0_DEC1: return GAUDI2_DCORE0_ENGINE_ID_DEC_1; + case DEC_ID_DCORE1_DEC0: return GAUDI2_DCORE1_ENGINE_ID_DEC_0; + case DEC_ID_DCORE1_DEC1: return GAUDI2_DCORE1_ENGINE_ID_DEC_1; + case DEC_ID_DCORE2_DEC0: return GAUDI2_DCORE2_ENGINE_ID_DEC_0; + case DEC_ID_DCORE2_DEC1: return GAUDI2_DCORE2_ENGINE_ID_DEC_1; + case DEC_ID_DCORE3_DEC0: return GAUDI2_DCORE3_ENGINE_ID_DEC_0; + case DEC_ID_DCORE3_DEC1: return GAUDI2_DCORE3_ENGINE_ID_DEC_1; + case DEC_ID_PCIE_VDEC0: return GAUDI2_PCIE_ENGINE_ID_DEC_0; + case DEC_ID_PCIE_VDEC1: return GAUDI2_PCIE_ENGINE_ID_DEC_1; + default: + break; + } + break; + default: + break; + } + + return U16_MAX; +} + +static void hl_eq_heartbeat_event_handle(struct hl_device *hdev) +{ + hdev->eq_heartbeat_received = true; +} + static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) { struct gaudi2_device *gaudi2 = hdev->asic_specific; @@ -9501,7 +9805,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent break; case GAUDI2_EVENT_ARC_AXI_ERROR_RESPONSE_0: - error_count = gaudi2_handle_arc_farm_sei_err(hdev, event_type); + error_count = gaudi2_handle_arc_farm_sei_err(hdev, event_type, &event_mask); event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; @@ -9724,8 +10028,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP: case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_SBTE4_AXI_ERR_RSP: case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_SBTE4_AXI_ERR_RSP: - error_count = gaudi2_handle_mme_sbte_err(hdev, event_type, - le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); + error_count = gaudi2_handle_mme_sbte_err(hdev, event_type); event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B: @@ -9875,6 +10178,21 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent is_critical = true; break; + case GAUDI2_EVENT_ARC_PWR_BRK_ENTRY: + case GAUDI2_EVENT_ARC_PWR_BRK_EXT: + case GAUDI2_EVENT_ARC_PWR_RD_MODE0: + case GAUDI2_EVENT_ARC_PWR_RD_MODE1: + case GAUDI2_EVENT_ARC_PWR_RD_MODE2: + case GAUDI2_EVENT_ARC_PWR_RD_MODE3: + error_count = GAUDI2_NA_EVENT_CAUSE; + dev_info_ratelimited(hdev->dev, "%s event received\n", + gaudi2_irq_map_table[event_type].name); + break; + + case GAUDI2_EVENT_ARC_EQ_HEARTBEAT: + hl_eq_heartbeat_event_handle(hdev); + error_count = GAUDI2_NA_EVENT_CAUSE; + break; default: if (gaudi2_irq_map_table[event_type].valid) { dev_err_ratelimited(hdev->dev, "Cannot find handler for event %d\n", @@ -9883,6 +10201,9 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent } } + if (event_mask & HL_NOTIFIER_EVENT_USER_ENGINE_ERR) + hl_capture_engine_err(hdev, event_id_to_engine_id(hdev, event_type), error_count); + /* Make sure to dump an error in case no error cause was printed so far. * Note that although we have counted the errors, we use this number as * a boolean. @@ -10523,6 +10844,9 @@ static int gaudi2_ctx_init(struct hl_ctx *ctx) { int rc; + if (ctx->asid == HL_KERNEL_ASID_ID) + return 0; + rc = gaudi2_mmu_prepare(ctx->hdev, ctx->asid); if (rc) return rc; @@ -11014,6 +11338,7 @@ static int gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 static void gaudi2_get_msi_info(__le32 *table) { table[CPUCP_EVENT_QUEUE_MSI_TYPE] = cpu_to_le32(GAUDI2_EVENT_QUEUE_MSIX_IDX); + table[CPUCP_EVENT_QUEUE_ERR_MSI_TYPE] = cpu_to_le32(GAUDI2_IRQ_NUM_EQ_ERROR); } static int gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx) @@ -11170,11 +11495,9 @@ static const struct hl_asic_funcs gaudi2_funcs = { .asic_dma_pool_free = gaudi2_dma_pool_free, .cpu_accessible_dma_pool_alloc = gaudi2_cpu_accessible_dma_pool_alloc, .cpu_accessible_dma_pool_free = gaudi2_cpu_accessible_dma_pool_free, - .asic_dma_unmap_single = gaudi2_dma_unmap_single, - .asic_dma_map_single = gaudi2_dma_map_single, - .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable, + .dma_unmap_sgtable = hl_asic_dma_unmap_sgtable, .cs_parser = gaudi2_cs_parser, - .asic_dma_map_sgtable = hl_dma_map_sgtable, + .dma_map_sgtable = hl_asic_dma_map_sgtable, .add_end_of_cb_packets = NULL, .update_eq_ci = gaudi2_update_eq_ci, .context_switch = gaudi2_context_switch, diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2P.h b/drivers/accel/habanalabs/gaudi2/gaudi2P.h index 5f3ce08692..9b9eef0d97 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2P.h +++ b/drivers/accel/habanalabs/gaudi2/gaudi2P.h @@ -10,7 +10,7 @@ #include <uapi/drm/habanalabs_accel.h> #include "../common/habanalabs.h" -#include "../include/common/hl_boot_if.h" +#include <linux/habanalabs/hl_boot_if.h> #include "../include/gaudi2/gaudi2.h" #include "../include/gaudi2/gaudi2_packets.h" #include "../include/gaudi2/gaudi2_fw_if.h" @@ -84,6 +84,7 @@ #define CORESIGHT_TIMEOUT_USEC 100000 /* 100 ms */ #define GAUDI2_PREBOOT_REQ_TIMEOUT_USEC 25000000 /* 25s */ +#define GAUDI2_PREBOOT_EXTENDED_REQ_TIMEOUT_USEC 85000000 /* 85s */ #define GAUDI2_BOOT_FIT_REQ_TIMEOUT_USEC 10000000 /* 10s */ @@ -419,6 +420,7 @@ enum gaudi2_irq_num { GAUDI2_IRQ_NUM_NIC_PORT_FIRST, GAUDI2_IRQ_NUM_NIC_PORT_LAST = (GAUDI2_IRQ_NUM_NIC_PORT_FIRST + NIC_NUMBER_OF_PORTS - 1), GAUDI2_IRQ_NUM_TPC_ASSERT, + GAUDI2_IRQ_NUM_EQ_ERROR, GAUDI2_IRQ_NUM_RESERVED_FIRST, GAUDI2_IRQ_NUM_RESERVED_LAST = (GAUDI2_MSIX_ENTRIES - GAUDI2_TOTAL_USER_INTERRUPTS - 1), GAUDI2_IRQ_NUM_UNEXPECTED_ERROR = RESERVED_MSIX_UNEXPECTED_USER_ERROR_INTERRUPT, diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c b/drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c index 25b5368f37..2423620ff3 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c @@ -151,8 +151,8 @@ static u64 debug_stm_regs[GAUDI2_STM_LAST + 1] = { [GAUDI2_STM_DCORE3_VDEC1_CS] = mmDCORE3_VDEC1_CS_STM_BASE, [GAUDI2_STM_PCIE] = mmPCIE_STM_BASE, [GAUDI2_STM_PSOC] = mmPSOC_STM_BASE, - [GAUDI2_STM_PSOC_ARC0_CS] = mmPSOC_ARC0_CS_STM_BASE, - [GAUDI2_STM_PSOC_ARC1_CS] = mmPSOC_ARC1_CS_STM_BASE, + [GAUDI2_STM_PSOC_ARC0_CS] = 0, + [GAUDI2_STM_PSOC_ARC1_CS] = 0, [GAUDI2_STM_PDMA0_CS] = mmPDMA0_CS_STM_BASE, [GAUDI2_STM_PDMA1_CS] = mmPDMA1_CS_STM_BASE, [GAUDI2_STM_CPU] = mmCPU_STM_BASE, @@ -293,8 +293,8 @@ static u64 debug_etf_regs[GAUDI2_ETF_LAST + 1] = { [GAUDI2_ETF_DCORE3_VDEC1_CS] = mmDCORE3_VDEC1_CS_ETF_BASE, [GAUDI2_ETF_PCIE] = mmPCIE_ETF_BASE, [GAUDI2_ETF_PSOC] = mmPSOC_ETF_BASE, - [GAUDI2_ETF_PSOC_ARC0_CS] = mmPSOC_ARC0_CS_ETF_BASE, - [GAUDI2_ETF_PSOC_ARC1_CS] = mmPSOC_ARC1_CS_ETF_BASE, + [GAUDI2_ETF_PSOC_ARC0_CS] = 0, + [GAUDI2_ETF_PSOC_ARC1_CS] = 0, [GAUDI2_ETF_PDMA0_CS] = mmPDMA0_CS_ETF_BASE, [GAUDI2_ETF_PDMA1_CS] = mmPDMA1_CS_ETF_BASE, [GAUDI2_ETF_CPU_0] = mmCPU_ETF_0_BASE, @@ -436,8 +436,8 @@ static u64 debug_funnel_regs[GAUDI2_FUNNEL_LAST + 1] = { [GAUDI2_FUNNEL_DCORE3_RTR6] = mmDCORE3_RTR6_FUNNEL_BASE, [GAUDI2_FUNNEL_DCORE3_RTR7] = mmDCORE3_RTR7_FUNNEL_BASE, [GAUDI2_FUNNEL_PSOC] = mmPSOC_FUNNEL_BASE, - [GAUDI2_FUNNEL_PSOC_ARC0] = mmPSOC_ARC0_FUNNEL_BASE, - [GAUDI2_FUNNEL_PSOC_ARC1] = mmPSOC_ARC1_FUNNEL_BASE, + [GAUDI2_FUNNEL_PSOC_ARC0] = 0, + [GAUDI2_FUNNEL_PSOC_ARC1] = 0, [GAUDI2_FUNNEL_XDMA] = mmXDMA_FUNNEL_BASE, [GAUDI2_FUNNEL_CPU] = mmCPU_FUNNEL_BASE, [GAUDI2_FUNNEL_PMMU] = mmPMMU_FUNNEL_BASE, @@ -766,10 +766,10 @@ static u64 debug_bmon_regs[GAUDI2_BMON_LAST + 1] = { [GAUDI2_BMON_PCIE_MSTR_RD] = mmPCIE_BMON_MSTR_RD_BASE, [GAUDI2_BMON_PCIE_SLV_WR] = mmPCIE_BMON_SLV_WR_BASE, [GAUDI2_BMON_PCIE_SLV_RD] = mmPCIE_BMON_SLV_RD_BASE, - [GAUDI2_BMON_PSOC_ARC0_0] = mmPSOC_ARC0_BMON_0_BASE, - [GAUDI2_BMON_PSOC_ARC0_1] = mmPSOC_ARC0_BMON_1_BASE, - [GAUDI2_BMON_PSOC_ARC1_0] = mmPSOC_ARC1_BMON_0_BASE, - [GAUDI2_BMON_PSOC_ARC1_1] = mmPSOC_ARC1_BMON_1_BASE, + [GAUDI2_BMON_PSOC_ARC0_0] = 0, + [GAUDI2_BMON_PSOC_ARC0_1] = 0, + [GAUDI2_BMON_PSOC_ARC1_0] = 0, + [GAUDI2_BMON_PSOC_ARC1_1] = 0, [GAUDI2_BMON_PDMA0_0] = mmPDMA0_BMON_0_BASE, [GAUDI2_BMON_PDMA0_1] = mmPDMA0_BMON_1_BASE, [GAUDI2_BMON_PDMA1_0] = mmPDMA1_BMON_0_BASE, @@ -968,8 +968,8 @@ static u64 debug_spmu_regs[GAUDI2_SPMU_LAST + 1] = { [GAUDI2_SPMU_DCORE3_VDEC0_CS] = mmDCORE3_VDEC0_CS_SPMU_BASE, [GAUDI2_SPMU_DCORE3_VDEC1_CS] = mmDCORE3_VDEC1_CS_SPMU_BASE, [GAUDI2_SPMU_PCIE] = mmPCIE_SPMU_BASE, - [GAUDI2_SPMU_PSOC_ARC0_CS] = mmPSOC_ARC0_CS_SPMU_BASE, - [GAUDI2_SPMU_PSOC_ARC1_CS] = mmPSOC_ARC1_CS_SPMU_BASE, + [GAUDI2_SPMU_PSOC_ARC0_CS] = 0, + [GAUDI2_SPMU_PSOC_ARC1_CS] = 0, [GAUDI2_SPMU_PDMA0_CS] = mmPDMA0_CS_SPMU_BASE, [GAUDI2_SPMU_PDMA1_CS] = mmPDMA1_CS_SPMU_BASE, [GAUDI2_SPMU_PMMU_CS] = mmPMMU_CS_SPMU_BASE, @@ -2092,6 +2092,11 @@ static int gaudi2_config_etf(struct hl_device *hdev, struct hl_debug_params *par if (rc) return -EIO; + val = RREG32(base_reg + mmETF_CTL_OFFSET); + + if ((!params->enable && val == 0x0) || (params->enable && val != 0x0)) + return 0; + val = RREG32(base_reg + mmETF_FFCR_OFFSET); val |= 0x1000; WREG32(base_reg + mmETF_FFCR_OFFSET, val); @@ -2120,10 +2125,17 @@ static int gaudi2_config_etf(struct hl_device *hdev, struct hl_debug_params *par if (!input) return -EINVAL; + val = RREG32(base_reg + mmETF_RSZ_OFFSET) << 2; + if (val) { + val = ffs(val); + WREG32(base_reg + mmETF_PSCR_OFFSET, val); + } else { + WREG32(base_reg + mmETF_PSCR_OFFSET, 0x10); + } + WREG32(base_reg + mmETF_BUFWM_OFFSET, 0x3FFC); WREG32(base_reg + mmETF_MODE_OFFSET, input->sink_mode); WREG32(base_reg + mmETF_FFCR_OFFSET, 0x4001); - WREG32(base_reg + mmETF_PSCR_OFFSET, 0x10); WREG32(base_reg + mmETF_CTL_OFFSET, 1); } else { WREG32(base_reg + mmETF_BUFWM_OFFSET, 0); @@ -2189,6 +2201,11 @@ static int gaudi2_config_etr(struct hl_device *hdev, struct hl_ctx *ctx, if (rc) return -EIO; + val = RREG32(mmPSOC_ETR_CTL); + + if ((!params->enable && val == 0x0) || (params->enable && val != 0x0)) + return 0; + val = RREG32(mmPSOC_ETR_FFCR); val |= 0x1000; WREG32(mmPSOC_ETR_FFCR, val); @@ -2483,7 +2500,8 @@ static int gaudi2_config_spmu(struct hl_device *hdev, struct hl_debug_params *pa * set enabled events mask based on input->event_types_num */ event_mask = 0x80000000; - event_mask |= GENMASK(input->event_types_num, 0); + if (input->event_types_num) + event_mask |= GENMASK(input->event_types_num - 1, 0); WREG32(base_reg + mmSPMU_PMCNTENSET_EL0_OFFSET, event_mask); } else { diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2_security.c b/drivers/accel/habanalabs/gaudi2/gaudi2_security.c index 2742b1f801..34bf80c5a4 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2_security.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2_security.c @@ -1601,6 +1601,7 @@ static const u32 gaudi2_pb_dcr0_tpc0_unsecured_regs[] = { mmDCORE0_TPC0_CFG_KERNEL_SRF_30, mmDCORE0_TPC0_CFG_KERNEL_SRF_31, mmDCORE0_TPC0_CFG_TPC_SB_L0CD, + mmDCORE0_TPC0_CFG_TPC_COUNT, mmDCORE0_TPC0_CFG_TPC_ID, mmDCORE0_TPC0_CFG_QM_KERNEL_ID_INC, mmDCORE0_TPC0_CFG_QM_TID_BASE_SIZE_HIGH_DIM_0, @@ -2907,7 +2908,7 @@ static void gaudi2_init_lbw_range_registers_secure(struct hl_device *hdev) * - range 11: NIC11_CFG + *_DBG (not including TPC_DBG) * * If F/W security is not enabled: - * - ranges 12,13: PSOC_CFG (excluding PSOC_TIMESTAMP) + * - ranges 12,13: PSOC_CFG (excluding PSOC_TIMESTAMP, PSOC_EFUSE and PSOC_GLOBAL_CONF) */ u64 lbw_range_min_short[] = { mmNIC0_TX_AXUSER_BASE, @@ -2923,7 +2924,7 @@ static void gaudi2_init_lbw_range_registers_secure(struct hl_device *hdev) mmNIC10_TX_AXUSER_BASE, mmNIC11_TX_AXUSER_BASE, mmPSOC_I2C_M0_BASE, - mmPSOC_EFUSE_BASE + mmPSOC_GPIO0_BASE }; u64 lbw_range_max_short[] = { mmNIC0_MAC_CH3_MAC_PCS_BASE + HL_BLOCK_SIZE, @@ -3219,6 +3220,7 @@ static void gaudi2_init_range_registers(struct hl_device *hdev) */ static int gaudi2_init_protection_bits(struct hl_device *hdev) { + u32 *user_regs_array = NULL, user_regs_array_size = 0, engine_core_intr_reg; struct asic_fixed_properties *prop = &hdev->asic_prop; u32 instance_offset; int rc = 0; @@ -3389,11 +3391,24 @@ static int gaudi2_init_protection_bits(struct hl_device *hdev) /* PSOC. * Except for PSOC_GLOBAL_CONF, skip when security is enabled in F/W, because the blocks are * protected by privileged RR. + * For PSOC_GLOBAL_CONF, need to un-secure the scratchpad register which is used for engine + * cores to raise events towards F/W. */ + engine_core_intr_reg = (u32) (hdev->asic_prop.engine_core_interrupt_reg_addr - CFG_BASE); + if (engine_core_intr_reg >= mmPSOC_GLOBAL_CONF_SCRATCHPAD_0 && + engine_core_intr_reg <= mmPSOC_GLOBAL_CONF_SCRATCHPAD_31) { + user_regs_array = &engine_core_intr_reg; + user_regs_array_size = 1; + } else { + dev_err(hdev->dev, + "Engine cores register for interrupts (%#x) is not a PSOC scratchpad register\n", + engine_core_intr_reg); + } + rc |= hl_init_pb(hdev, HL_PB_SHARED, HL_PB_NA, HL_PB_SINGLE_INSTANCE, HL_PB_NA, gaudi2_pb_psoc_global_conf, ARRAY_SIZE(gaudi2_pb_psoc_global_conf), - NULL, HL_PB_NA); + user_regs_array, user_regs_array_size); if (!hdev->asic_prop.fw_security_enabled) rc |= hl_init_pb(hdev, HL_PB_SHARED, HL_PB_NA, diff --git a/drivers/accel/habanalabs/goya/goya.c b/drivers/accel/habanalabs/goya/goya.c index 7c685e6075..1322cb330c 100644 --- a/drivers/accel/habanalabs/goya/goya.c +++ b/drivers/accel/habanalabs/goya/goya.c @@ -466,7 +466,7 @@ int goya_set_fixed_properties(struct hl_device *hdev) prop->pcie_dbi_base_address = mmPCIE_DBI_BASE; prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; - strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME, + strscpy_pad(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN); prop->max_pending_cs = GOYA_MAX_PENDING_CS; @@ -3358,7 +3358,7 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev, list_add_tail(&userptr->job_node, parser->job_userptr_list); - rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir); + rc = hl_dma_map_sgtable(hdev, userptr->sgt, dir); if (rc) { dev_err(hdev->dev, "failed to map sgt with DMA region\n"); goto unpin_memory; @@ -5122,7 +5122,7 @@ int goya_cpucp_info_get(struct hl_device *hdev) } if (!strlen(prop->cpucp_info.card_name)) - strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME, + strscpy_pad(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN); return 0; @@ -5465,9 +5465,9 @@ static const struct hl_asic_funcs goya_funcs = { .asic_dma_pool_free = goya_dma_pool_free, .cpu_accessible_dma_pool_alloc = goya_cpu_accessible_dma_pool_alloc, .cpu_accessible_dma_pool_free = goya_cpu_accessible_dma_pool_free, - .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable, + .dma_unmap_sgtable = hl_asic_dma_unmap_sgtable, .cs_parser = goya_cs_parser, - .asic_dma_map_sgtable = hl_dma_map_sgtable, + .dma_map_sgtable = hl_asic_dma_map_sgtable, .add_end_of_cb_packets = goya_add_end_of_cb_packets, .update_eq_ci = goya_update_eq_ci, .context_switch = goya_context_switch, diff --git a/drivers/accel/habanalabs/goya/goyaP.h b/drivers/accel/habanalabs/goya/goyaP.h index 5df3d30b91..194c2ae157 100644 --- a/drivers/accel/habanalabs/goya/goyaP.h +++ b/drivers/accel/habanalabs/goya/goyaP.h @@ -9,8 +9,8 @@ #define GOYAP_H_ #include <uapi/drm/habanalabs_accel.h> +#include <linux/habanalabs/hl_boot_if.h> #include "../common/habanalabs.h" -#include "../include/common/hl_boot_if.h" #include "../include/goya/goya_packets.h" #include "../include/goya/goya.h" #include "../include/goya/goya_async_events.h" diff --git a/drivers/accel/habanalabs/goya/goya_coresight.c b/drivers/accel/habanalabs/goya/goya_coresight.c index a6d6cc38bc..41cae5fd84 100644 --- a/drivers/accel/habanalabs/goya/goya_coresight.c +++ b/drivers/accel/habanalabs/goya/goya_coresight.c @@ -315,6 +315,11 @@ static int goya_config_etf(struct hl_device *hdev, WREG32(base_reg + 0xFB0, CORESIGHT_UNLOCK); + val = RREG32(base_reg + 0x20); + + if ((!params->enable && val == 0x0) || (params->enable && val != 0x0)) + return 0; + val = RREG32(base_reg + 0x304); val |= 0x1000; WREG32(base_reg + 0x304, val); @@ -386,6 +391,11 @@ static int goya_config_etr(struct hl_device *hdev, WREG32(mmPSOC_ETR_LAR, CORESIGHT_UNLOCK); + val = RREG32(mmPSOC_ETR_CTL); + + if ((!params->enable && val == 0x0) || (params->enable && val != 0x0)) + return 0; + val = RREG32(mmPSOC_ETR_FFCR); val |= 0x1000; WREG32(mmPSOC_ETR_FFCR, val); diff --git a/drivers/accel/habanalabs/include/gaudi/gaudi_fw_if.h b/drivers/accel/habanalabs/include/gaudi/gaudi_fw_if.h index 2dba02757d..a2547f3067 100644 --- a/drivers/accel/habanalabs/include/gaudi/gaudi_fw_if.h +++ b/drivers/accel/habanalabs/include/gaudi/gaudi_fw_if.h @@ -44,38 +44,6 @@ struct eq_nic_sei_event { __u8 pad[6]; }; -/* - * struct gaudi_nic_status - describes the status of a NIC port. - * @port: NIC port index. - * @bad_format_cnt: e.g. CRC. - * @responder_out_of_sequence_psn_cnt: e.g NAK. - * @high_ber_reinit_cnt: link reinit due to high BER. - * @correctable_err_cnt: e.g. bit-flip. - * @uncorrectable_err_cnt: e.g. MAC errors. - * @retraining_cnt: re-training counter. - * @up: is port up. - * @pcs_link: has PCS link. - * @phy_ready: is PHY ready. - * @auto_neg: is Autoneg enabled. - * @timeout_retransmission_cnt: timeout retransmission events - * @high_ber_cnt: high ber events - */ -struct gaudi_nic_status { - __u32 port; - __u32 bad_format_cnt; - __u32 responder_out_of_sequence_psn_cnt; - __u32 high_ber_reinit; - __u32 correctable_err_cnt; - __u32 uncorrectable_err_cnt; - __u32 retraining_cnt; - __u8 up; - __u8 pcs_link; - __u8 phy_ready; - __u8 auto_neg; - __u32 timeout_retransmission_cnt; - __u32 high_ber_cnt; -}; - struct gaudi_cold_rst_data { union { struct { diff --git a/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_events.h b/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_events.h index f661068d0c..a426410139 100644 --- a/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_events.h +++ b/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_events.h @@ -959,6 +959,13 @@ enum gaudi2_async_event_id { GAUDI2_EVENT_ARC_DCCM_FULL = 1319, GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED = 1320, GAUDI2_EVENT_CPU_DEV_RESET_REQ = 1321, + GAUDI2_EVENT_ARC_PWR_BRK_ENTRY = 1322, + GAUDI2_EVENT_ARC_PWR_BRK_EXT = 1323, + GAUDI2_EVENT_ARC_PWR_RD_MODE0 = 1324, + GAUDI2_EVENT_ARC_PWR_RD_MODE1 = 1325, + GAUDI2_EVENT_ARC_PWR_RD_MODE2 = 1326, + GAUDI2_EVENT_ARC_PWR_RD_MODE3 = 1327, + GAUDI2_EVENT_ARC_EQ_HEARTBEAT = 1328, GAUDI2_EVENT_SIZE, }; diff --git a/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h b/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h index ad01fc4e99..b2dbe1f644 100644 --- a/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h +++ b/drivers/accel/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h @@ -1293,7 +1293,7 @@ static struct gaudi2_async_events_ids_map gaudi2_irq_map_table[] = { .name = "" }, { .fc_id = 631, .cpu_id = 128, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, .name = "PCIE_P2P_MSIX" }, - { .fc_id = 632, .cpu_id = 129, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, + { .fc_id = 632, .cpu_id = 129, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_HARD, .name = "PCIE_DRAIN_COMPLETE" }, { .fc_id = 633, .cpu_id = 130, .valid = 1, .msg = 0, .reset = EVENT_RESET_TYPE_NONE, .name = "TPC0_BMON_SPMU" }, @@ -2673,6 +2673,20 @@ static struct gaudi2_async_events_ids_map gaudi2_irq_map_table[] = { .name = "FP32_NOT_SUPPORTED" }, { .fc_id = 1321, .cpu_id = 627, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_HARD, .name = "DEV_RESET_REQ" }, + { .fc_id = 1322, .cpu_id = 628, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "ARC_PWR_BRK_ENTRY" }, + { .fc_id = 1323, .cpu_id = 629, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "ARC_PWR_BRK_EXT" }, + { .fc_id = 1324, .cpu_id = 630, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "ARC_PWR_RD_MODE0" }, + { .fc_id = 1325, .cpu_id = 631, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "ARC_PWR_RD_MODE1" }, + { .fc_id = 1326, .cpu_id = 632, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "ARC_PWR_RD_MODE2" }, + { .fc_id = 1327, .cpu_id = 633, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "ARC_PWR_RD_MODE3" }, + { .fc_id = 1328, .cpu_id = 634, .valid = 1, .msg = 1, .reset = EVENT_RESET_TYPE_NONE, + .name = "ARC_EQ_HEARTBEAT" }, }; #endif /* __GAUDI2_ASYNC_IDS_MAP_EVENTS_EXT_H_ */ diff --git a/drivers/accel/habanalabs/include/hw_ip/pci/pci_general.h b/drivers/accel/habanalabs/include/hw_ip/pci/pci_general.h index f5d497dc9b..4f951cada0 100644 --- a/drivers/accel/habanalabs/include/hw_ip/pci/pci_general.h +++ b/drivers/accel/habanalabs/include/hw_ip/pci/pci_general.h @@ -25,6 +25,7 @@ enum hl_revision_id { REV_ID_INVALID = 0x00, REV_ID_A = 0x01, REV_ID_B = 0x02, + REV_ID_C = 0x03 }; #endif /* INCLUDE_PCI_GENERAL_H_ */ diff --git a/drivers/accel/ivpu/Makefile b/drivers/accel/ivpu/Makefile index e4328b4305..95ff7ad163 100644 --- a/drivers/accel/ivpu/Makefile +++ b/drivers/accel/ivpu/Makefile @@ -2,7 +2,6 @@ # Copyright (C) 2023 Intel Corporation intel_vpu-y := \ - ivpu_debugfs.o \ ivpu_drv.o \ ivpu_fw.o \ ivpu_fw_log.o \ @@ -16,4 +15,6 @@ intel_vpu-y := \ ivpu_mmu_context.o \ ivpu_pm.o +intel_vpu-$(CONFIG_DEBUG_FS) += ivpu_debugfs.o + obj-$(CONFIG_DRM_ACCEL_IVPU) += intel_vpu.o diff --git a/drivers/accel/ivpu/TODO b/drivers/accel/ivpu/TODO deleted file mode 100644 index 9077217ae1..0000000000 --- a/drivers/accel/ivpu/TODO +++ /dev/null @@ -1,11 +0,0 @@ -- Move to threaded_irqs to mitigate potential infinite loop in ivpu_ipc_irq_handler() -- Implement support for BLOB IDs -- Add debugfs support to improve debugging and testing -- Add tracing events for performance debugging -- Implement HW based scheduling support -- Use syncobjs for submit/sync -- Refactor IPC protocol to improve message latency -- Implement BO cache and MADVISE IOCTL -- Add support for user allocated buffers using prime import and dma-buf heaps -- Refactor struct ivpu_bo to use struct drm_gem_shmem_object -- Add driver/device documentation diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c index 5e5996fd4f..ea453b985b 100644 --- a/drivers/accel/ivpu/ivpu_debugfs.c +++ b/drivers/accel/ivpu/ivpu_debugfs.c @@ -17,20 +17,26 @@ #include "ivpu_jsm_msg.h" #include "ivpu_pm.h" +static inline struct ivpu_device *seq_to_ivpu(struct seq_file *s) +{ + struct drm_debugfs_entry *entry = s->private; + + return to_ivpu_device(entry->dev); +} + static int bo_list_show(struct seq_file *s, void *v) { - struct drm_info_node *node = (struct drm_info_node *)s->private; struct drm_printer p = drm_seq_file_printer(s); + struct ivpu_device *vdev = seq_to_ivpu(s); - ivpu_bo_list(node->minor->dev, &p); + ivpu_bo_list(&vdev->drm, &p); return 0; } static int fw_name_show(struct seq_file *s, void *v) { - struct drm_info_node *node = (struct drm_info_node *)s->private; - struct ivpu_device *vdev = to_ivpu_device(node->minor->dev); + struct ivpu_device *vdev = seq_to_ivpu(s); seq_printf(s, "%s\n", vdev->fw->name); return 0; @@ -38,8 +44,7 @@ static int fw_name_show(struct seq_file *s, void *v) static int fw_trace_capability_show(struct seq_file *s, void *v) { - struct drm_info_node *node = (struct drm_info_node *)s->private; - struct ivpu_device *vdev = to_ivpu_device(node->minor->dev); + struct ivpu_device *vdev = seq_to_ivpu(s); u64 trace_hw_component_mask; u32 trace_destination_mask; int ret; @@ -57,8 +62,7 @@ static int fw_trace_capability_show(struct seq_file *s, void *v) static int fw_trace_config_show(struct seq_file *s, void *v) { - struct drm_info_node *node = (struct drm_info_node *)s->private; - struct ivpu_device *vdev = to_ivpu_device(node->minor->dev); + struct ivpu_device *vdev = seq_to_ivpu(s); /** * WA: VPU_JSM_MSG_TRACE_GET_CONFIG command is not working yet, * so we use values from vdev->fw instead of calling ivpu_jsm_trace_get_config() @@ -78,8 +82,7 @@ static int fw_trace_config_show(struct seq_file *s, void *v) static int last_bootmode_show(struct seq_file *s, void *v) { - struct drm_info_node *node = (struct drm_info_node *)s->private; - struct ivpu_device *vdev = to_ivpu_device(node->minor->dev); + struct ivpu_device *vdev = seq_to_ivpu(s); seq_printf(s, "%s\n", (vdev->pm->is_warmboot) ? "warmboot" : "coldboot"); @@ -88,8 +91,7 @@ static int last_bootmode_show(struct seq_file *s, void *v) static int reset_counter_show(struct seq_file *s, void *v) { - struct drm_info_node *node = (struct drm_info_node *)s->private; - struct ivpu_device *vdev = to_ivpu_device(node->minor->dev); + struct ivpu_device *vdev = seq_to_ivpu(s); seq_printf(s, "%d\n", atomic_read(&vdev->pm->reset_counter)); return 0; @@ -97,14 +99,13 @@ static int reset_counter_show(struct seq_file *s, void *v) static int reset_pending_show(struct seq_file *s, void *v) { - struct drm_info_node *node = (struct drm_info_node *)s->private; - struct ivpu_device *vdev = to_ivpu_device(node->minor->dev); + struct ivpu_device *vdev = seq_to_ivpu(s); seq_printf(s, "%d\n", atomic_read(&vdev->pm->in_reset)); return 0; } -static const struct drm_info_list vdev_debugfs_list[] = { +static const struct drm_debugfs_info vdev_debugfs_list[] = { {"bo_list", bo_list_show, 0}, {"fw_name", fw_name_show, 0}, {"fw_trace_capability", fw_trace_capability_show, 0}, @@ -270,25 +271,24 @@ static const struct file_operations ivpu_reset_engine_fops = { .write = ivpu_reset_engine_fn, }; -void ivpu_debugfs_init(struct drm_minor *minor) +void ivpu_debugfs_init(struct ivpu_device *vdev) { - struct ivpu_device *vdev = to_ivpu_device(minor->dev); + struct dentry *debugfs_root = vdev->drm.debugfs_root; - drm_debugfs_create_files(vdev_debugfs_list, ARRAY_SIZE(vdev_debugfs_list), - minor->debugfs_root, minor); + drm_debugfs_add_files(&vdev->drm, vdev_debugfs_list, ARRAY_SIZE(vdev_debugfs_list)); - debugfs_create_file("force_recovery", 0200, minor->debugfs_root, vdev, + debugfs_create_file("force_recovery", 0200, debugfs_root, vdev, &ivpu_force_recovery_fops); - debugfs_create_file("fw_log", 0644, minor->debugfs_root, vdev, + debugfs_create_file("fw_log", 0644, debugfs_root, vdev, &fw_log_fops); - debugfs_create_file("fw_trace_destination_mask", 0200, minor->debugfs_root, vdev, + debugfs_create_file("fw_trace_destination_mask", 0200, debugfs_root, vdev, &fw_trace_destination_mask_fops); - debugfs_create_file("fw_trace_hw_comp_mask", 0200, minor->debugfs_root, vdev, + debugfs_create_file("fw_trace_hw_comp_mask", 0200, debugfs_root, vdev, &fw_trace_hw_comp_mask_fops); - debugfs_create_file("fw_trace_level", 0200, minor->debugfs_root, vdev, + debugfs_create_file("fw_trace_level", 0200, debugfs_root, vdev, &fw_trace_level_fops); - debugfs_create_file("reset_engine", 0200, minor->debugfs_root, vdev, + debugfs_create_file("reset_engine", 0200, debugfs_root, vdev, &ivpu_reset_engine_fops); } diff --git a/drivers/accel/ivpu/ivpu_debugfs.h b/drivers/accel/ivpu/ivpu_debugfs.h index 78f80c1e00..49ae9ea782 100644 --- a/drivers/accel/ivpu/ivpu_debugfs.h +++ b/drivers/accel/ivpu/ivpu_debugfs.h @@ -6,8 +6,12 @@ #ifndef __IVPU_DEBUGFS_H__ #define __IVPU_DEBUGFS_H__ -struct drm_minor; +struct ivpu_device; -void ivpu_debugfs_init(struct drm_minor *minor); +#if defined(CONFIG_DEBUG_FS) +void ivpu_debugfs_init(struct ivpu_device *vdev); +#else +static inline void ivpu_debugfs_init(struct ivpu_device *vdev) { } +#endif #endif /* __IVPU_DEBUGFS_H__ */ diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index 7e9359611d..c856c417a1 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -131,6 +131,22 @@ static int ivpu_get_capabilities(struct ivpu_device *vdev, struct drm_ivpu_param return 0; } +static int ivpu_get_core_clock_rate(struct ivpu_device *vdev, u64 *clk_rate) +{ + int ret; + + ret = ivpu_rpm_get_if_active(vdev); + if (ret < 0) + return ret; + + *clk_rate = ret ? ivpu_hw_reg_pll_freq_get(vdev) : 0; + + if (ret) + ivpu_rpm_put(vdev); + + return 0; +} + static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct ivpu_file_priv *file_priv = file->driver_priv; @@ -154,7 +170,7 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f args->value = vdev->platform; break; case DRM_IVPU_PARAM_CORE_CLOCK_RATE: - args->value = ivpu_hw_reg_pll_freq_get(vdev); + ret = ivpu_get_core_clock_rate(vdev, &args->value); break; case DRM_IVPU_PARAM_NUM_CONTEXTS: args->value = ivpu_get_context_count(vdev); @@ -400,10 +416,6 @@ static const struct drm_driver driver = { .postclose = ivpu_postclose, .gem_prime_import = ivpu_gem_prime_import, -#if defined(CONFIG_DEBUG_FS) - .debugfs_init = ivpu_debugfs_init, -#endif - .ioctls = ivpu_drm_ioctls, .num_ioctls = ARRAY_SIZE(ivpu_drm_ioctls), .fops = &ivpu_fops, @@ -467,9 +479,8 @@ static int ivpu_pci_init(struct ivpu_device *vdev) /* Clear any pending errors */ pcie_capability_clear_word(pdev, PCI_EXP_DEVSTA, 0x3f); - /* VPU 37XX does not require 10m D3hot delay */ - if (ivpu_hw_gen(vdev) == IVPU_HW_37XX) - pdev->d3hot_delay = 0; + /* NPU does not require 10m D3hot delay */ + pdev->d3hot_delay = 0; ret = pcim_enable_device(pdev); if (ret) { @@ -523,78 +534,52 @@ static int ivpu_dev_init(struct ivpu_device *vdev) lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key); ret = ivpu_pci_init(vdev); - if (ret) { - ivpu_err(vdev, "Failed to initialize PCI device: %d\n", ret); + if (ret) goto err_xa_destroy; - } ret = ivpu_irq_init(vdev); - if (ret) { - ivpu_err(vdev, "Failed to initialize IRQs: %d\n", ret); + if (ret) goto err_xa_destroy; - } /* Init basic HW info based on buttress registers which are accessible before power up */ ret = ivpu_hw_info_init(vdev); - if (ret) { - ivpu_err(vdev, "Failed to initialize HW info: %d\n", ret); + if (ret) goto err_xa_destroy; - } /* Power up early so the rest of init code can access VPU registers */ ret = ivpu_hw_power_up(vdev); - if (ret) { - ivpu_err(vdev, "Failed to power up HW: %d\n", ret); + if (ret) goto err_xa_destroy; - } ret = ivpu_mmu_global_context_init(vdev); - if (ret) { - ivpu_err(vdev, "Failed to initialize global MMU context: %d\n", ret); + if (ret) goto err_power_down; - } ret = ivpu_mmu_init(vdev); - if (ret) { - ivpu_err(vdev, "Failed to initialize MMU device: %d\n", ret); + if (ret) goto err_mmu_gctx_fini; - } - ret = ivpu_fw_init(vdev); - if (ret) { - ivpu_err(vdev, "Failed to initialize firmware: %d\n", ret); + ret = ivpu_mmu_reserved_context_init(vdev); + if (ret) goto err_mmu_gctx_fini; - } + + ret = ivpu_fw_init(vdev); + if (ret) + goto err_mmu_rctx_fini; ret = ivpu_ipc_init(vdev); - if (ret) { - ivpu_err(vdev, "Failed to initialize IPC: %d\n", ret); + if (ret) goto err_fw_fini; - } - ret = ivpu_pm_init(vdev); - if (ret) { - ivpu_err(vdev, "Failed to initialize PM: %d\n", ret); - goto err_ipc_fini; - } + ivpu_pm_init(vdev); ret = ivpu_job_done_thread_init(vdev); - if (ret) { - ivpu_err(vdev, "Failed to initialize job done thread: %d\n", ret); + if (ret) goto err_ipc_fini; - } - - ret = ivpu_fw_load(vdev); - if (ret) { - ivpu_err(vdev, "Failed to load firmware: %d\n", ret); - goto err_job_done_thread_fini; - } ret = ivpu_boot(vdev); - if (ret) { - ivpu_err(vdev, "Failed to boot: %d\n", ret); + if (ret) goto err_job_done_thread_fini; - } ivpu_pm_enable(vdev); @@ -606,6 +591,8 @@ err_ipc_fini: ivpu_ipc_fini(vdev); err_fw_fini: ivpu_fw_fini(vdev); +err_mmu_rctx_fini: + ivpu_mmu_reserved_context_fini(vdev); err_mmu_gctx_fini: ivpu_mmu_global_context_fini(vdev); err_power_down: @@ -629,6 +616,7 @@ static void ivpu_dev_fini(struct ivpu_device *vdev) ivpu_ipc_fini(vdev); ivpu_fw_fini(vdev); + ivpu_mmu_reserved_context_fini(vdev); ivpu_mmu_global_context_fini(vdev); drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa)); @@ -657,10 +645,10 @@ static int ivpu_probe(struct pci_dev *pdev, const struct pci_device_id *id) pci_set_drvdata(pdev, vdev); ret = ivpu_dev_init(vdev); - if (ret) { - dev_err(&pdev->dev, "Failed to initialize VPU device: %d\n", ret); + if (ret) return ret; - } + + ivpu_debugfs_init(vdev); ret = drm_dev_register(&vdev->drm, 0); if (ret) { diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h index 6853dfe1c7..417ddeca85 100644 --- a/drivers/accel/ivpu/ivpu_drv.h +++ b/drivers/accel/ivpu/ivpu_drv.h @@ -29,12 +29,13 @@ #define IVPU_HW_37XX 37 #define IVPU_HW_40XX 40 -#define IVPU_GLOBAL_CONTEXT_MMU_SSID 0 -/* SSID 1 is used by the VPU to represent invalid context */ -#define IVPU_USER_CONTEXT_MIN_SSID 2 -#define IVPU_USER_CONTEXT_MAX_SSID (IVPU_USER_CONTEXT_MIN_SSID + 63) +#define IVPU_GLOBAL_CONTEXT_MMU_SSID 0 +/* SSID 1 is used by the VPU to represent reserved context */ +#define IVPU_RESERVED_CONTEXT_MMU_SSID 1 +#define IVPU_USER_CONTEXT_MIN_SSID 2 +#define IVPU_USER_CONTEXT_MAX_SSID (IVPU_USER_CONTEXT_MIN_SSID + 63) -#define IVPU_NUM_ENGINES 2 +#define IVPU_NUM_ENGINES 2 #define IVPU_PLATFORM_SILICON 0 #define IVPU_PLATFORM_SIMICS 2 @@ -110,6 +111,7 @@ struct ivpu_device { struct ivpu_pm_info *pm; struct ivpu_mmu_context gctx; + struct ivpu_mmu_context rctx; struct xarray context_xa; struct xa_limit context_xa_limit; @@ -123,6 +125,7 @@ struct ivpu_device { int jsm; int tdr; int reschedule_suspend; + int autosuspend; } timeout; }; diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c index a277bbae78..691da521dd 100644 --- a/drivers/accel/ivpu/ivpu_fw.c +++ b/drivers/accel/ivpu/ivpu_fw.c @@ -301,6 +301,8 @@ int ivpu_fw_init(struct ivpu_device *vdev) if (ret) goto err_fw_release; + ivpu_fw_load(vdev); + return 0; err_fw_release: @@ -314,25 +316,23 @@ void ivpu_fw_fini(struct ivpu_device *vdev) ivpu_fw_release(vdev); } -int ivpu_fw_load(struct ivpu_device *vdev) +void ivpu_fw_load(struct ivpu_device *vdev) { struct ivpu_fw_info *fw = vdev->fw; u64 image_end_offset = fw->image_load_offset + fw->image_size; - memset(fw->mem->kvaddr, 0, fw->image_load_offset); - memcpy(fw->mem->kvaddr + fw->image_load_offset, + memset(ivpu_bo_vaddr(fw->mem), 0, fw->image_load_offset); + memcpy(ivpu_bo_vaddr(fw->mem) + fw->image_load_offset, fw->file->data + FW_FILE_IMAGE_OFFSET, fw->image_size); if (IVPU_WA(clear_runtime_mem)) { - u8 *start = fw->mem->kvaddr + image_end_offset; - u64 size = fw->mem->base.size - image_end_offset; + u8 *start = ivpu_bo_vaddr(fw->mem) + image_end_offset; + u64 size = ivpu_bo_size(fw->mem) - image_end_offset; memset(start, 0, size); } wmb(); /* Flush WC buffers after writing fw->mem */ - - return 0; } static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_params *boot_params) @@ -451,10 +451,10 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params vdev->hw->ranges.global.start; boot_params->ipc_header_area_start = ipc_mem_rx->vpu_addr; - boot_params->ipc_header_area_size = ipc_mem_rx->base.size / 2; + boot_params->ipc_header_area_size = ivpu_bo_size(ipc_mem_rx) / 2; - boot_params->ipc_payload_area_start = ipc_mem_rx->vpu_addr + ipc_mem_rx->base.size / 2; - boot_params->ipc_payload_area_size = ipc_mem_rx->base.size / 2; + boot_params->ipc_payload_area_start = ipc_mem_rx->vpu_addr + ivpu_bo_size(ipc_mem_rx) / 2; + boot_params->ipc_payload_area_size = ivpu_bo_size(ipc_mem_rx) / 2; boot_params->global_aliased_pio_base = vdev->hw->ranges.user.start; boot_params->global_aliased_pio_size = ivpu_hw_range_size(&vdev->hw->ranges.user); @@ -486,9 +486,9 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params boot_params->trace_destination_mask = vdev->fw->trace_destination_mask; boot_params->trace_hw_component_mask = vdev->fw->trace_hw_component_mask; boot_params->crit_tracing_buff_addr = vdev->fw->mem_log_crit->vpu_addr; - boot_params->crit_tracing_buff_size = vdev->fw->mem_log_crit->base.size; + boot_params->crit_tracing_buff_size = ivpu_bo_size(vdev->fw->mem_log_crit); boot_params->verbose_tracing_buff_addr = vdev->fw->mem_log_verb->vpu_addr; - boot_params->verbose_tracing_buff_size = vdev->fw->mem_log_verb->base.size; + boot_params->verbose_tracing_buff_size = ivpu_bo_size(vdev->fw->mem_log_verb); boot_params->punit_telemetry_sram_base = ivpu_hw_reg_telemetry_offset_get(vdev); boot_params->punit_telemetry_sram_size = ivpu_hw_reg_telemetry_size_get(vdev); diff --git a/drivers/accel/ivpu/ivpu_fw.h b/drivers/accel/ivpu/ivpu_fw.h index 8567fdf925..10ae2847f0 100644 --- a/drivers/accel/ivpu/ivpu_fw.h +++ b/drivers/accel/ivpu/ivpu_fw.h @@ -31,7 +31,7 @@ struct ivpu_fw_info { int ivpu_fw_init(struct ivpu_device *vdev); void ivpu_fw_fini(struct ivpu_device *vdev); -int ivpu_fw_load(struct ivpu_device *vdev); +void ivpu_fw_load(struct ivpu_device *vdev); void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params *bp); static inline bool ivpu_fw_is_cold_boot(struct ivpu_device *vdev) diff --git a/drivers/accel/ivpu/ivpu_fw_log.c b/drivers/accel/ivpu/ivpu_fw_log.c index 95065cac9f..f6770f5e82 100644 --- a/drivers/accel/ivpu/ivpu_fw_log.c +++ b/drivers/accel/ivpu/ivpu_fw_log.c @@ -31,10 +31,10 @@ static int fw_log_ptr(struct ivpu_device *vdev, struct ivpu_bo *bo, u32 *offset, { struct vpu_tracing_buffer_header *log; - if ((*offset + sizeof(*log)) > bo->base.size) + if ((*offset + sizeof(*log)) > ivpu_bo_size(bo)) return -EINVAL; - log = bo->kvaddr + *offset; + log = ivpu_bo_vaddr(bo) + *offset; if (log->vpu_canary_start != VPU_TRACING_BUFFER_CANARY) return -EINVAL; @@ -43,7 +43,7 @@ static int fw_log_ptr(struct ivpu_device *vdev, struct ivpu_bo *bo, u32 *offset, ivpu_dbg(vdev, FW_BOOT, "Invalid header size 0x%x\n", log->header_size); return -EINVAL; } - if ((char *)log + log->size > (char *)bo->kvaddr + bo->base.size) { + if ((char *)log + log->size > (char *)ivpu_bo_vaddr(bo) + ivpu_bo_size(bo)) { ivpu_dbg(vdev, FW_BOOT, "Invalid log size 0x%x\n", log->size); return -EINVAL; } diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c index d09f13b359..c91852f2ed 100644 --- a/drivers/accel/ivpu/ivpu_gem.c +++ b/drivers/accel/ivpu/ivpu_gem.c @@ -69,7 +69,7 @@ static const struct ivpu_bo_ops prime_ops = { static int __must_check shmem_alloc_pages_locked(struct ivpu_bo *bo) { - int npages = bo->base.size >> PAGE_SHIFT; + int npages = ivpu_bo_size(bo) >> PAGE_SHIFT; struct page **pages; pages = drm_gem_get_pages(&bo->base); @@ -88,7 +88,7 @@ static int __must_check shmem_alloc_pages_locked(struct ivpu_bo *bo) static void shmem_free_pages_locked(struct ivpu_bo *bo) { if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED) - set_pages_array_wb(bo->pages, bo->base.size >> PAGE_SHIFT); + set_pages_array_wb(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT); drm_gem_put_pages(&bo->base, bo->pages, true, false); bo->pages = NULL; @@ -96,7 +96,7 @@ static void shmem_free_pages_locked(struct ivpu_bo *bo) static int ivpu_bo_map_pages_locked(struct ivpu_bo *bo) { - int npages = bo->base.size >> PAGE_SHIFT; + int npages = ivpu_bo_size(bo) >> PAGE_SHIFT; struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); struct sg_table *sgt; int ret; @@ -142,7 +142,7 @@ static const struct ivpu_bo_ops shmem_ops = { static int __must_check internal_alloc_pages_locked(struct ivpu_bo *bo) { - unsigned int i, npages = bo->base.size >> PAGE_SHIFT; + unsigned int i, npages = ivpu_bo_size(bo) >> PAGE_SHIFT; struct page **pages; int ret; @@ -171,10 +171,10 @@ err_free_pages: static void internal_free_pages_locked(struct ivpu_bo *bo) { - unsigned int i, npages = bo->base.size >> PAGE_SHIFT; + unsigned int i, npages = ivpu_bo_size(bo) >> PAGE_SHIFT; if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED) - set_pages_array_wb(bo->pages, bo->base.size >> PAGE_SHIFT); + set_pages_array_wb(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT); for (i = 0; i < npages; i++) put_page(bo->pages[i]); @@ -291,7 +291,7 @@ ivpu_bo_alloc_vpu_addr(struct ivpu_bo *bo, struct ivpu_mmu_context *ctx, } mutex_lock(&ctx->lock); - ret = ivpu_mmu_context_insert_node_locked(ctx, range, bo->base.size, &bo->mm_node); + ret = ivpu_mmu_context_insert_node_locked(ctx, range, ivpu_bo_size(bo), &bo->mm_node); if (!ret) { bo->ctx = ctx; bo->vpu_addr = bo->mm_node.start; @@ -438,7 +438,7 @@ static int ivpu_bo_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); ivpu_dbg(vdev, BO, "mmap: ctx %u handle %u vpu_addr 0x%llx size %zu type %s", - bo->ctx->id, bo->handle, bo->vpu_addr, bo->base.size, bo->ops->name); + bo->ctx->id, bo->handle, bo->vpu_addr, ivpu_bo_size(bo), bo->ops->name); if (obj->import_attach) { /* Drop the reference drm_gem_mmap_obj() acquired.*/ @@ -553,7 +553,7 @@ ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file) drm_gem_object_put(&bo->base); ivpu_dbg(vdev, BO, "alloc shmem: ctx %u vpu_addr 0x%llx size %zu flags 0x%x\n", - file_priv->ctx.id, bo->vpu_addr, bo->base.size, bo->flags); + file_priv->ctx.id, bo->vpu_addr, ivpu_bo_size(bo), bo->flags); return ret; } @@ -590,22 +590,22 @@ ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 fla goto err_put; if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED) - drm_clflush_pages(bo->pages, bo->base.size >> PAGE_SHIFT); + drm_clflush_pages(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT); if (bo->flags & DRM_IVPU_BO_WC) - set_pages_array_wc(bo->pages, bo->base.size >> PAGE_SHIFT); + set_pages_array_wc(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT); else if (bo->flags & DRM_IVPU_BO_UNCACHED) - set_pages_array_uc(bo->pages, bo->base.size >> PAGE_SHIFT); + set_pages_array_uc(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT); prot = ivpu_bo_pgprot(bo, PAGE_KERNEL); - bo->kvaddr = vmap(bo->pages, bo->base.size >> PAGE_SHIFT, VM_MAP, prot); + bo->kvaddr = vmap(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT, VM_MAP, prot); if (!bo->kvaddr) { ivpu_err(vdev, "Failed to map BO into kernel virtual memory\n"); goto err_put; } ivpu_dbg(vdev, BO, "alloc internal: ctx 0 vpu_addr 0x%llx size %zu flags 0x%x\n", - bo->vpu_addr, bo->base.size, flags); + bo->vpu_addr, ivpu_bo_size(bo), flags); return bo; @@ -718,7 +718,7 @@ static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p) dma_refcount = atomic_long_read(&bo->base.dma_buf->file->f_count); drm_printf(p, "%5u %6d %16llx %10lu %10u %12lu %14s\n", - bo->ctx->id, bo->handle, bo->vpu_addr, bo->base.size, + bo->ctx->id, bo->handle, bo->vpu_addr, ivpu_bo_size(bo), kref_read(&bo->base.refcount), dma_refcount, bo->ops->name); } diff --git a/drivers/accel/ivpu/ivpu_gem.h b/drivers/accel/ivpu/ivpu_gem.h index 6b0ceda5f2..a0b4d4a32b 100644 --- a/drivers/accel/ivpu/ivpu_gem.h +++ b/drivers/accel/ivpu/ivpu_gem.h @@ -68,9 +68,19 @@ static inline struct ivpu_bo *to_ivpu_bo(struct drm_gem_object *obj) return container_of(obj, struct ivpu_bo, base); } +static inline void *ivpu_bo_vaddr(struct ivpu_bo *bo) +{ + return bo->kvaddr; +} + +static inline size_t ivpu_bo_size(struct ivpu_bo *bo) +{ + return bo->base.size; +} + static inline struct page *ivpu_bo_get_page(struct ivpu_bo *bo, u64 offset) { - if (offset > bo->base.size || !bo->pages) + if (offset > ivpu_bo_size(bo) || !bo->pages) return NULL; return bo->pages[offset / PAGE_SIZE]; @@ -107,21 +117,21 @@ static inline void *ivpu_to_cpu_addr(struct ivpu_bo *bo, u32 vpu_addr) if (vpu_addr < bo->vpu_addr) return NULL; - if (vpu_addr >= (bo->vpu_addr + bo->base.size)) + if (vpu_addr >= (bo->vpu_addr + ivpu_bo_size(bo))) return NULL; - return bo->kvaddr + (vpu_addr - bo->vpu_addr); + return ivpu_bo_vaddr(bo) + (vpu_addr - bo->vpu_addr); } static inline u32 cpu_to_vpu_addr(struct ivpu_bo *bo, void *cpu_addr) { - if (cpu_addr < bo->kvaddr) + if (cpu_addr < ivpu_bo_vaddr(bo)) return 0; - if (cpu_addr >= (bo->kvaddr + bo->base.size)) + if (cpu_addr >= (ivpu_bo_vaddr(bo) + ivpu_bo_size(bo))) return 0; - return bo->vpu_addr + (cpu_addr - bo->kvaddr); + return bo->vpu_addr + (cpu_addr - ivpu_bo_vaddr(bo)); } #endif /* __IVPU_GEM_H__ */ diff --git a/drivers/accel/ivpu/ivpu_hw_37xx.c b/drivers/accel/ivpu/ivpu_hw_37xx.c index ddf03498fd..e658fcf849 100644 --- a/drivers/accel/ivpu/ivpu_hw_37xx.c +++ b/drivers/accel/ivpu/ivpu_hw_37xx.c @@ -70,37 +70,9 @@ (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI)) | \ (REG_FLD(VPU_37XX_HOST_SS_FW_SOC_IRQ_EN, MSS_MBI_CMX))) -static char *ivpu_platform_to_str(u32 platform) -{ - switch (platform) { - case IVPU_PLATFORM_SILICON: - return "IVPU_PLATFORM_SILICON"; - case IVPU_PLATFORM_SIMICS: - return "IVPU_PLATFORM_SIMICS"; - case IVPU_PLATFORM_FPGA: - return "IVPU_PLATFORM_FPGA"; - default: - return "Invalid platform"; - } -} - -static void ivpu_hw_read_platform(struct ivpu_device *vdev) -{ - u32 gen_ctrl = REGV_RD32(VPU_37XX_HOST_SS_GEN_CTRL); - u32 platform = REG_GET_FLD(VPU_37XX_HOST_SS_GEN_CTRL, PS, gen_ctrl); - - if (platform == IVPU_PLATFORM_SIMICS || platform == IVPU_PLATFORM_FPGA) - vdev->platform = platform; - else - vdev->platform = IVPU_PLATFORM_SILICON; - - ivpu_dbg(vdev, MISC, "Platform type: %s (%d)\n", - ivpu_platform_to_str(vdev->platform), vdev->platform); -} - static void ivpu_hw_wa_init(struct ivpu_device *vdev) { - vdev->wa.punit_disabled = ivpu_is_fpga(vdev); + vdev->wa.punit_disabled = false; vdev->wa.clear_runtime_mem = false; vdev->wa.d3hot_after_power_off = true; @@ -119,17 +91,11 @@ static void ivpu_hw_wa_init(struct ivpu_device *vdev) static void ivpu_hw_timeouts_init(struct ivpu_device *vdev) { - if (ivpu_is_simics(vdev) || ivpu_is_fpga(vdev)) { - vdev->timeout.boot = 100000; - vdev->timeout.jsm = 50000; - vdev->timeout.tdr = 2000000; - vdev->timeout.reschedule_suspend = 1000; - } else { - vdev->timeout.boot = 1000; - vdev->timeout.jsm = 500; - vdev->timeout.tdr = 2000; - vdev->timeout.reschedule_suspend = 10; - } + vdev->timeout.boot = 1000; + vdev->timeout.jsm = 500; + vdev->timeout.tdr = 2000; + vdev->timeout.reschedule_suspend = 10; + vdev->timeout.autosuspend = 10; } static int ivpu_pll_wait_for_cmd_send(struct ivpu_device *vdev) @@ -224,8 +190,7 @@ static int ivpu_pll_drive(struct ivpu_device *vdev, bool enable) int ret; if (IVPU_WA(punit_disabled)) { - ivpu_dbg(vdev, PM, "Skipping PLL request on %s\n", - ivpu_platform_to_str(vdev->platform)); + ivpu_dbg(vdev, PM, "Skipping PLL request\n"); return 0; } @@ -356,10 +321,10 @@ static int ivpu_boot_noc_qdeny_check(struct ivpu_device *vdev, u32 exp_val) static int ivpu_boot_top_noc_qrenqn_check(struct ivpu_device *vdev, u32 exp_val) { - u32 val = REGV_RD32(MTL_VPU_TOP_NOC_QREQN); + u32 val = REGV_RD32(VPU_37XX_TOP_NOC_QREQN); - if (!REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QREQN, CPU_CTRL, exp_val, val) || - !REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QREQN, HOSTIF_L2CACHE, exp_val, val)) + if (!REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QREQN, CPU_CTRL, exp_val, val) || + !REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, exp_val, val)) return -EIO; return 0; @@ -367,10 +332,10 @@ static int ivpu_boot_top_noc_qrenqn_check(struct ivpu_device *vdev, u32 exp_val) static int ivpu_boot_top_noc_qacceptn_check(struct ivpu_device *vdev, u32 exp_val) { - u32 val = REGV_RD32(MTL_VPU_TOP_NOC_QACCEPTN); + u32 val = REGV_RD32(VPU_37XX_TOP_NOC_QACCEPTN); - if (!REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QACCEPTN, CPU_CTRL, exp_val, val) || - !REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QACCEPTN, HOSTIF_L2CACHE, exp_val, val)) + if (!REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QACCEPTN, CPU_CTRL, exp_val, val) || + !REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QACCEPTN, HOSTIF_L2CACHE, exp_val, val)) return -EIO; return 0; @@ -378,10 +343,10 @@ static int ivpu_boot_top_noc_qacceptn_check(struct ivpu_device *vdev, u32 exp_va static int ivpu_boot_top_noc_qdeny_check(struct ivpu_device *vdev, u32 exp_val) { - u32 val = REGV_RD32(MTL_VPU_TOP_NOC_QDENY); + u32 val = REGV_RD32(VPU_37XX_TOP_NOC_QDENY); - if (!REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QDENY, CPU_CTRL, exp_val, val) || - !REG_TEST_FLD_NUM(MTL_VPU_TOP_NOC_QDENY, HOSTIF_L2CACHE, exp_val, val)) + if (!REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QDENY, CPU_CTRL, exp_val, val) || + !REG_TEST_FLD_NUM(VPU_37XX_TOP_NOC_QDENY, HOSTIF_L2CACHE, exp_val, val)) return -EIO; return 0; @@ -434,15 +399,15 @@ static int ivpu_boot_host_ss_top_noc_drive(struct ivpu_device *vdev, bool enable int ret; u32 val; - val = REGV_RD32(MTL_VPU_TOP_NOC_QREQN); + val = REGV_RD32(VPU_37XX_TOP_NOC_QREQN); if (enable) { - val = REG_SET_FLD(MTL_VPU_TOP_NOC_QREQN, CPU_CTRL, val); - val = REG_SET_FLD(MTL_VPU_TOP_NOC_QREQN, HOSTIF_L2CACHE, val); + val = REG_SET_FLD(VPU_37XX_TOP_NOC_QREQN, CPU_CTRL, val); + val = REG_SET_FLD(VPU_37XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, val); } else { - val = REG_CLR_FLD(MTL_VPU_TOP_NOC_QREQN, CPU_CTRL, val); - val = REG_CLR_FLD(MTL_VPU_TOP_NOC_QREQN, HOSTIF_L2CACHE, val); + val = REG_CLR_FLD(VPU_37XX_TOP_NOC_QREQN, CPU_CTRL, val); + val = REG_CLR_FLD(VPU_37XX_TOP_NOC_QREQN, HOSTIF_L2CACHE, val); } - REGV_WR32(MTL_VPU_TOP_NOC_QREQN, val); + REGV_WR32(VPU_37XX_TOP_NOC_QREQN, val); ret = ivpu_boot_top_noc_qacceptn_check(vdev, enable ? 0x1 : 0x0); if (ret) { @@ -488,10 +453,6 @@ static void ivpu_boot_pwr_island_drive(struct ivpu_device *vdev, bool enable) static int ivpu_boot_wait_for_pwr_island_status(struct ivpu_device *vdev, u32 exp_val) { - /* FPGA model (UPF) is not power aware, skipped Power Island polling */ - if (ivpu_is_fpga(vdev)) - return 0; - return REGV_POLL_FLD(VPU_37XX_HOST_SS_AON_PWR_ISLAND_STATUS0, MSS_CPU, exp_val, PWR_ISLAND_STATUS_TIMEOUT_US); } @@ -562,7 +523,7 @@ static void ivpu_boot_no_snoop_enable(struct ivpu_device *vdev) u32 val = REGV_RD32(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES); val = REG_SET_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, NOSNOOP_OVERRIDE_EN, val); - val = REG_SET_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, AW_NOSNOOP_OVERRIDE, val); + val = REG_CLR_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, AW_NOSNOOP_OVERRIDE, val); val = REG_SET_FLD(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, AR_NOSNOOP_OVERRIDE, val); REGV_WR32(VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES, val); @@ -584,17 +545,17 @@ static void ivpu_boot_soc_cpu_boot(struct ivpu_device *vdev) { u32 val; - val = REGV_RD32(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC); - val = REG_SET_FLD(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RSTRUN0, val); + val = REGV_RD32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC); + val = REG_SET_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RSTRUN0, val); - val = REG_CLR_FLD(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RSTVEC, val); - REGV_WR32(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val); + val = REG_CLR_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RSTVEC, val); + REGV_WR32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val); - val = REG_SET_FLD(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RESUME0, val); - REGV_WR32(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val); + val = REG_SET_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RESUME0, val); + REGV_WR32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val); - val = REG_CLR_FLD(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RESUME0, val); - REGV_WR32(MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val); + val = REG_CLR_FLD(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, IRQI_RESUME0, val); + REGV_WR32(VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC, val); val = vdev->fw->entry_point >> 9; REGV_WR32(VPU_37XX_HOST_SS_LOADING_ADDRESS_LO, val); @@ -646,7 +607,7 @@ static int ivpu_hw_37xx_info_init(struct ivpu_device *vdev) ivpu_hw_init_range(&hw->ranges.shave, 0x180000000, SZ_2G); ivpu_hw_init_range(&hw->ranges.dma, 0x200000000, SZ_8G); - ivpu_hw_read_platform(vdev); + vdev->platform = IVPU_PLATFORM_SILICON; ivpu_hw_wa_init(vdev); ivpu_hw_timeouts_init(vdev); @@ -786,17 +747,17 @@ static void ivpu_hw_37xx_wdt_disable(struct ivpu_device *vdev) u32 val; /* Enable writing and set non-zero WDT value */ - REGV_WR32(MTL_VPU_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE); - REGV_WR32(MTL_VPU_CPU_SS_TIM_WATCHDOG, TIM_WATCHDOG_RESET_VALUE); + REGV_WR32(VPU_37XX_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE); + REGV_WR32(VPU_37XX_CPU_SS_TIM_WATCHDOG, TIM_WATCHDOG_RESET_VALUE); /* Enable writing and disable watchdog timer */ - REGV_WR32(MTL_VPU_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE); - REGV_WR32(MTL_VPU_CPU_SS_TIM_WDOG_EN, 0); + REGV_WR32(VPU_37XX_CPU_SS_TIM_SAFE, TIM_SAFE_ENABLE); + REGV_WR32(VPU_37XX_CPU_SS_TIM_WDOG_EN, 0); /* Now clear the timeout interrupt */ - val = REGV_RD32(MTL_VPU_CPU_SS_TIM_GEN_CONFIG); - val = REG_CLR_FLD(MTL_VPU_CPU_SS_TIM_GEN_CONFIG, WDOG_TO_INT_CLR, val); - REGV_WR32(MTL_VPU_CPU_SS_TIM_GEN_CONFIG, val); + val = REGV_RD32(VPU_37XX_CPU_SS_TIM_GEN_CONFIG); + val = REG_CLR_FLD(VPU_37XX_CPU_SS_TIM_GEN_CONFIG, WDOG_TO_INT_CLR, val); + REGV_WR32(VPU_37XX_CPU_SS_TIM_GEN_CONFIG, val); } static u32 ivpu_hw_37xx_pll_to_freq(u32 ratio, u32 config) @@ -843,10 +804,10 @@ static u32 ivpu_hw_37xx_reg_telemetry_enable_get(struct ivpu_device *vdev) static void ivpu_hw_37xx_reg_db_set(struct ivpu_device *vdev, u32 db_id) { - u32 reg_stride = MTL_VPU_CPU_SS_DOORBELL_1 - MTL_VPU_CPU_SS_DOORBELL_0; - u32 val = REG_FLD(MTL_VPU_CPU_SS_DOORBELL_0, SET); + u32 reg_stride = VPU_37XX_CPU_SS_DOORBELL_1 - VPU_37XX_CPU_SS_DOORBELL_0; + u32 val = REG_FLD(VPU_37XX_CPU_SS_DOORBELL_0, SET); - REGV_WR32I(MTL_VPU_CPU_SS_DOORBELL_0, reg_stride, db_id, val); + REGV_WR32I(VPU_37XX_CPU_SS_DOORBELL_0, reg_stride, db_id, val); } static u32 ivpu_hw_37xx_reg_ipc_rx_addr_get(struct ivpu_device *vdev) @@ -863,7 +824,7 @@ static u32 ivpu_hw_37xx_reg_ipc_rx_count_get(struct ivpu_device *vdev) static void ivpu_hw_37xx_reg_ipc_tx_set(struct ivpu_device *vdev, u32 vpu_addr) { - REGV_WR32(MTL_VPU_CPU_SS_TIM_IPC_FIFO, vpu_addr); + REGV_WR32(VPU_37XX_CPU_SS_TIM_IPC_FIFO, vpu_addr); } static void ivpu_hw_37xx_irq_clear(struct ivpu_device *vdev) diff --git a/drivers/accel/ivpu/ivpu_hw_37xx_reg.h b/drivers/accel/ivpu/ivpu_hw_37xx_reg.h index 6e4e915948..4083beb5e9 100644 --- a/drivers/accel/ivpu/ivpu_hw_37xx_reg.h +++ b/drivers/accel/ivpu/ivpu_hw_37xx_reg.h @@ -3,70 +3,70 @@ * Copyright (C) 2020-2023 Intel Corporation */ -#ifndef __IVPU_HW_MTL_REG_H__ -#define __IVPU_HW_MTL_REG_H__ +#ifndef __IVPU_HW_37XX_REG_H__ +#define __IVPU_HW_37XX_REG_H__ #include <linux/bits.h> -#define VPU_37XX_BUTTRESS_INTERRUPT_TYPE 0x00000000u +#define VPU_37XX_BUTTRESS_INTERRUPT_TYPE 0x00000000u -#define VPU_37XX_BUTTRESS_INTERRUPT_STAT 0x00000004u -#define VPU_37XX_BUTTRESS_INTERRUPT_STAT_FREQ_CHANGE_MASK BIT_MASK(0) +#define VPU_37XX_BUTTRESS_INTERRUPT_STAT 0x00000004u +#define VPU_37XX_BUTTRESS_INTERRUPT_STAT_FREQ_CHANGE_MASK BIT_MASK(0) #define VPU_37XX_BUTTRESS_INTERRUPT_STAT_ATS_ERR_MASK BIT_MASK(1) #define VPU_37XX_BUTTRESS_INTERRUPT_STAT_UFI_ERR_MASK BIT_MASK(2) -#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD0 0x00000008u -#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD0_MIN_RATIO_MASK GENMASK(15, 0) -#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD0_MAX_RATIO_MASK GENMASK(31, 16) +#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD0 0x00000008u +#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD0_MIN_RATIO_MASK GENMASK(15, 0) +#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD0_MAX_RATIO_MASK GENMASK(31, 16) -#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD1 0x0000000cu -#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD1_TARGET_RATIO_MASK GENMASK(15, 0) -#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD1_EPP_MASK GENMASK(31, 16) +#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD1 0x0000000cu +#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD1_TARGET_RATIO_MASK GENMASK(15, 0) +#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD1_EPP_MASK GENMASK(31, 16) -#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD2 0x00000010u +#define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD2 0x00000010u #define VPU_37XX_BUTTRESS_WP_REQ_PAYLOAD2_CONFIG_MASK GENMASK(15, 0) -#define VPU_37XX_BUTTRESS_WP_REQ_CMD 0x00000014u +#define VPU_37XX_BUTTRESS_WP_REQ_CMD 0x00000014u #define VPU_37XX_BUTTRESS_WP_REQ_CMD_SEND_MASK BIT_MASK(0) #define VPU_37XX_BUTTRESS_WP_DOWNLOAD 0x00000018u #define VPU_37XX_BUTTRESS_WP_DOWNLOAD_TARGET_RATIO_MASK GENMASK(15, 0) #define VPU_37XX_BUTTRESS_CURRENT_PLL 0x0000001cu -#define VPU_37XX_BUTTRESS_CURRENT_PLL_RATIO_MASK GENMASK(15, 0) +#define VPU_37XX_BUTTRESS_CURRENT_PLL_RATIO_MASK GENMASK(15, 0) -#define VPU_37XX_BUTTRESS_PLL_ENABLE 0x00000020u +#define VPU_37XX_BUTTRESS_PLL_ENABLE 0x00000020u -#define VPU_37XX_BUTTRESS_FMIN_FUSE 0x00000024u -#define VPU_37XX_BUTTRESS_FMIN_FUSE_MIN_RATIO_MASK GENMASK(7, 0) -#define VPU_37XX_BUTTRESS_FMIN_FUSE_PN_RATIO_MASK GENMASK(15, 8) +#define VPU_37XX_BUTTRESS_FMIN_FUSE 0x00000024u +#define VPU_37XX_BUTTRESS_FMIN_FUSE_MIN_RATIO_MASK GENMASK(7, 0) +#define VPU_37XX_BUTTRESS_FMIN_FUSE_PN_RATIO_MASK GENMASK(15, 8) -#define VPU_37XX_BUTTRESS_FMAX_FUSE 0x00000028u -#define VPU_37XX_BUTTRESS_FMAX_FUSE_MAX_RATIO_MASK GENMASK(7, 0) +#define VPU_37XX_BUTTRESS_FMAX_FUSE 0x00000028u +#define VPU_37XX_BUTTRESS_FMAX_FUSE_MAX_RATIO_MASK GENMASK(7, 0) -#define VPU_37XX_BUTTRESS_TILE_FUSE 0x0000002cu +#define VPU_37XX_BUTTRESS_TILE_FUSE 0x0000002cu #define VPU_37XX_BUTTRESS_TILE_FUSE_VALID_MASK BIT_MASK(0) -#define VPU_37XX_BUTTRESS_TILE_FUSE_SKU_MASK GENMASK(3, 2) +#define VPU_37XX_BUTTRESS_TILE_FUSE_SKU_MASK GENMASK(3, 2) -#define VPU_37XX_BUTTRESS_LOCAL_INT_MASK 0x00000030u -#define VPU_37XX_BUTTRESS_GLOBAL_INT_MASK 0x00000034u +#define VPU_37XX_BUTTRESS_LOCAL_INT_MASK 0x00000030u +#define VPU_37XX_BUTTRESS_GLOBAL_INT_MASK 0x00000034u -#define VPU_37XX_BUTTRESS_PLL_STATUS 0x00000040u +#define VPU_37XX_BUTTRESS_PLL_STATUS 0x00000040u #define VPU_37XX_BUTTRESS_PLL_STATUS_LOCK_MASK BIT_MASK(1) -#define VPU_37XX_BUTTRESS_VPU_STATUS 0x00000044u +#define VPU_37XX_BUTTRESS_VPU_STATUS 0x00000044u #define VPU_37XX_BUTTRESS_VPU_STATUS_READY_MASK BIT_MASK(0) #define VPU_37XX_BUTTRESS_VPU_STATUS_IDLE_MASK BIT_MASK(1) -#define VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL 0x00000060u -#define VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL_INPROGRESS_MASK BIT_MASK(0) -#define VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL_I3_MASK BIT_MASK(2) +#define VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL 0x00000060u +#define VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL_INPROGRESS_MASK BIT_MASK(0) +#define VPU_37XX_BUTTRESS_VPU_D0I3_CONTROL_I3_MASK BIT_MASK(2) #define VPU_37XX_BUTTRESS_VPU_IP_RESET 0x00000050u -#define VPU_37XX_BUTTRESS_VPU_IP_RESET_TRIGGER_MASK BIT_MASK(0) +#define VPU_37XX_BUTTRESS_VPU_IP_RESET_TRIGGER_MASK BIT_MASK(0) #define VPU_37XX_BUTTRESS_VPU_TELEMETRY_OFFSET 0x00000080u -#define VPU_37XX_BUTTRESS_VPU_TELEMETRY_SIZE 0x00000084u +#define VPU_37XX_BUTTRESS_VPU_TELEMETRY_SIZE 0x00000084u #define VPU_37XX_BUTTRESS_VPU_TELEMETRY_ENABLE 0x00000088u #define VPU_37XX_BUTTRESS_ATS_ERR_LOG_0 0x000000a0u @@ -74,9 +74,9 @@ #define VPU_37XX_BUTTRESS_ATS_ERR_CLEAR 0x000000a8u #define VPU_37XX_BUTTRESS_UFI_ERR_LOG 0x000000b0u -#define VPU_37XX_BUTTRESS_UFI_ERR_LOG_CQ_ID_MASK GENMASK(11, 0) -#define VPU_37XX_BUTTRESS_UFI_ERR_LOG_AXI_ID_MASK GENMASK(19, 12) -#define VPU_37XX_BUTTRESS_UFI_ERR_LOG_OPCODE_MASK GENMASK(24, 20) +#define VPU_37XX_BUTTRESS_UFI_ERR_LOG_CQ_ID_MASK GENMASK(11, 0) +#define VPU_37XX_BUTTRESS_UFI_ERR_LOG_AXI_ID_MASK GENMASK(19, 12) +#define VPU_37XX_BUTTRESS_UFI_ERR_LOG_OPCODE_MASK GENMASK(24, 20) #define VPU_37XX_BUTTRESS_UFI_ERR_CLEAR 0x000000b4u @@ -113,17 +113,17 @@ #define VPU_37XX_HOST_SS_NOC_QDENY 0x0000015cu #define VPU_37XX_HOST_SS_NOC_QDENY_TOP_SOCMMIO_MASK BIT_MASK(0) -#define MTL_VPU_TOP_NOC_QREQN 0x00000160u -#define MTL_VPU_TOP_NOC_QREQN_CPU_CTRL_MASK BIT_MASK(0) -#define MTL_VPU_TOP_NOC_QREQN_HOSTIF_L2CACHE_MASK BIT_MASK(1) +#define VPU_37XX_TOP_NOC_QREQN 0x00000160u +#define VPU_37XX_TOP_NOC_QREQN_CPU_CTRL_MASK BIT_MASK(0) +#define VPU_37XX_TOP_NOC_QREQN_HOSTIF_L2CACHE_MASK BIT_MASK(1) -#define MTL_VPU_TOP_NOC_QACCEPTN 0x00000164u -#define MTL_VPU_TOP_NOC_QACCEPTN_CPU_CTRL_MASK BIT_MASK(0) -#define MTL_VPU_TOP_NOC_QACCEPTN_HOSTIF_L2CACHE_MASK BIT_MASK(1) +#define VPU_37XX_TOP_NOC_QACCEPTN 0x00000164u +#define VPU_37XX_TOP_NOC_QACCEPTN_CPU_CTRL_MASK BIT_MASK(0) +#define VPU_37XX_TOP_NOC_QACCEPTN_HOSTIF_L2CACHE_MASK BIT_MASK(1) -#define MTL_VPU_TOP_NOC_QDENY 0x00000168u -#define MTL_VPU_TOP_NOC_QDENY_CPU_CTRL_MASK BIT_MASK(0) -#define MTL_VPU_TOP_NOC_QDENY_HOSTIF_L2CACHE_MASK BIT_MASK(1) +#define VPU_37XX_TOP_NOC_QDENY 0x00000168u +#define VPU_37XX_TOP_NOC_QDENY_CPU_CTRL_MASK BIT_MASK(0) +#define VPU_37XX_TOP_NOC_QDENY_HOSTIF_L2CACHE_MASK BIT_MASK(1) #define VPU_37XX_HOST_SS_FW_SOC_IRQ_EN 0x00000170u #define VPU_37XX_HOST_SS_FW_SOC_IRQ_EN_CSS_ROM_CMX_MASK BIT_MASK(0) @@ -140,9 +140,9 @@ #define VPU_37XX_HOST_SS_ICB_STATUS_0_TIMER_2_INT_MASK BIT_MASK(2) #define VPU_37XX_HOST_SS_ICB_STATUS_0_TIMER_3_INT_MASK BIT_MASK(3) #define VPU_37XX_HOST_SS_ICB_STATUS_0_HOST_IPC_FIFO_INT_MASK BIT_MASK(4) -#define VPU_37XX_HOST_SS_ICB_STATUS_0_MMU_IRQ_0_INT_MASK BIT_MASK(5) -#define VPU_37XX_HOST_SS_ICB_STATUS_0_MMU_IRQ_1_INT_MASK BIT_MASK(6) -#define VPU_37XX_HOST_SS_ICB_STATUS_0_MMU_IRQ_2_INT_MASK BIT_MASK(7) +#define VPU_37XX_HOST_SS_ICB_STATUS_0_MMU_IRQ_0_INT_MASK BIT_MASK(5) +#define VPU_37XX_HOST_SS_ICB_STATUS_0_MMU_IRQ_1_INT_MASK BIT_MASK(6) +#define VPU_37XX_HOST_SS_ICB_STATUS_0_MMU_IRQ_2_INT_MASK BIT_MASK(7) #define VPU_37XX_HOST_SS_ICB_STATUS_0_NOC_FIREWALL_INT_MASK BIT_MASK(8) #define VPU_37XX_HOST_SS_ICB_STATUS_0_CPU_INT_REDIRECT_0_INT_MASK BIT_MASK(30) #define VPU_37XX_HOST_SS_ICB_STATUS_0_CPU_INT_REDIRECT_1_INT_MASK BIT_MASK(31) @@ -164,14 +164,14 @@ #define VPU_37XX_HOST_SS_TIM_IPC_FIFO_STAT_FILL_LEVEL_MASK GENMASK(23, 16) #define VPU_37XX_HOST_SS_TIM_IPC_FIFO_STAT_RSVD0_MASK GENMASK(31, 24) -#define VPU_37XX_HOST_SS_AON_PWR_ISO_EN0 0x00030020u +#define VPU_37XX_HOST_SS_AON_PWR_ISO_EN0 0x00030020u #define VPU_37XX_HOST_SS_AON_PWR_ISO_EN0_MSS_CPU_MASK BIT_MASK(3) #define VPU_37XX_HOST_SS_AON_PWR_ISLAND_EN0 0x00030024u -#define VPU_37XX_HOST_SS_AON_PWR_ISLAND_EN0_MSS_CPU_MASK BIT_MASK(3) +#define VPU_37XX_HOST_SS_AON_PWR_ISLAND_EN0_MSS_CPU_MASK BIT_MASK(3) #define VPU_37XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0 0x00030028u -#define VPU_37XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0_MSS_CPU_MASK BIT_MASK(3) +#define VPU_37XX_HOST_SS_AON_PWR_ISLAND_TRICKLE_EN0_MSS_CPU_MASK BIT_MASK(3) #define VPU_37XX_HOST_SS_AON_PWR_ISLAND_STATUS0 0x0003002cu #define VPU_37XX_HOST_SS_AON_PWR_ISLAND_STATUS0_MSS_CPU_MASK BIT_MASK(3) @@ -187,47 +187,14 @@ #define VPU_37XX_HOST_SS_LOADING_ADDRESS_LO_IOSF_RS_ID_MASK GENMASK(2, 1) #define VPU_37XX_HOST_SS_LOADING_ADDRESS_LO_IMAGE_LOCATION_MASK GENMASK(31, 3) -#define VPU_37XX_HOST_SS_WORKPOINT_CONFIG_MIRROR 0x00082020u +#define VPU_37XX_HOST_SS_WORKPOINT_CONFIG_MIRROR 0x00082020u #define VPU_37XX_HOST_SS_WORKPOINT_CONFIG_MIRROR_FINAL_PLL_FREQ_MASK GENMASK(15, 0) #define VPU_37XX_HOST_SS_WORKPOINT_CONFIG_MIRROR_CONFIG_ID_MASK GENMASK(31, 16) -#define VPU_37XX_HOST_MMU_IDR0 0x00200000u -#define VPU_37XX_HOST_MMU_IDR1 0x00200004u -#define VPU_37XX_HOST_MMU_IDR3 0x0020000cu -#define VPU_37XX_HOST_MMU_IDR5 0x00200014u -#define VPU_37XX_HOST_MMU_CR0 0x00200020u -#define VPU_37XX_HOST_MMU_CR0ACK 0x00200024u -#define VPU_37XX_HOST_MMU_CR1 0x00200028u -#define VPU_37XX_HOST_MMU_CR2 0x0020002cu -#define VPU_37XX_HOST_MMU_IRQ_CTRL 0x00200050u -#define VPU_37XX_HOST_MMU_IRQ_CTRLACK 0x00200054u - -#define VPU_37XX_HOST_MMU_GERROR 0x00200060u -#define VPU_37XX_HOST_MMU_GERROR_CMDQ_MASK BIT_MASK(0) -#define VPU_37XX_HOST_MMU_GERROR_EVTQ_ABT_MASK BIT_MASK(2) -#define VPU_37XX_HOST_MMU_GERROR_PRIQ_ABT_MASK BIT_MASK(3) -#define VPU_37XX_HOST_MMU_GERROR_MSI_CMDQ_ABT_MASK BIT_MASK(4) -#define VPU_37XX_HOST_MMU_GERROR_MSI_EVTQ_ABT_MASK BIT_MASK(5) -#define VPU_37XX_HOST_MMU_GERROR_MSI_PRIQ_ABT_MASK BIT_MASK(6) -#define VPU_37XX_HOST_MMU_GERROR_MSI_ABT_MASK BIT_MASK(7) - -#define VPU_37XX_HOST_MMU_GERRORN 0x00200064u - -#define VPU_37XX_HOST_MMU_STRTAB_BASE 0x00200080u -#define VPU_37XX_HOST_MMU_STRTAB_BASE_CFG 0x00200088u -#define VPU_37XX_HOST_MMU_CMDQ_BASE 0x00200090u -#define VPU_37XX_HOST_MMU_CMDQ_PROD 0x00200098u -#define VPU_37XX_HOST_MMU_CMDQ_CONS 0x0020009cu -#define VPU_37XX_HOST_MMU_EVTQ_BASE 0x002000a0u -#define VPU_37XX_HOST_MMU_EVTQ_PROD 0x002000a8u -#define VPU_37XX_HOST_MMU_EVTQ_CONS 0x002000acu -#define VPU_37XX_HOST_MMU_EVTQ_PROD_SEC (0x002000a8u + SZ_64K) -#define VPU_37XX_HOST_MMU_EVTQ_CONS_SEC (0x002000acu + SZ_64K) - #define VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES 0x00360000u #define VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES_CACHE_OVERRIDE_EN_MASK BIT_MASK(0) -#define VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES_AWCACHE_OVERRIDE_MASK BIT_MASK(1) -#define VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES_ARCACHE_OVERRIDE_MASK BIT_MASK(2) +#define VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES_AWCACHE_OVERRIDE_MASK BIT_MASK(1) +#define VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES_ARCACHE_OVERRIDE_MASK BIT_MASK(2) #define VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES_NOSNOOP_OVERRIDE_EN_MASK BIT_MASK(3) #define VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES_AW_NOSNOOP_OVERRIDE_MASK BIT_MASK(4) #define VPU_37XX_HOST_IF_TCU_PTW_OVERRIDES_AR_NOSNOOP_OVERRIDE_MASK BIT_MASK(5) @@ -246,36 +213,36 @@ #define VPU_37XX_HOST_IF_TBU_MMUSSIDV_TBU4_AWMMUSSIDV_MASK BIT_MASK(8) #define VPU_37XX_HOST_IF_TBU_MMUSSIDV_TBU4_ARMMUSSIDV_MASK BIT_MASK(9) -#define MTL_VPU_CPU_SS_DSU_LEON_RT_BASE 0x04000000u -#define MTL_VPU_CPU_SS_DSU_LEON_RT_DSU_CTRL 0x04000000u -#define MTL_VPU_CPU_SS_DSU_LEON_RT_PC_REG 0x04400010u -#define MTL_VPU_CPU_SS_DSU_LEON_RT_NPC_REG 0x04400014u -#define MTL_VPU_CPU_SS_DSU_LEON_RT_DSU_TRAP_REG 0x04400020u +#define VPU_37XX_CPU_SS_DSU_LEON_RT_BASE 0x04000000u +#define VPU_37XX_CPU_SS_DSU_LEON_RT_DSU_CTRL 0x04000000u +#define VPU_37XX_CPU_SS_DSU_LEON_RT_PC_REG 0x04400010u +#define VPU_37XX_CPU_SS_DSU_LEON_RT_NPC_REG 0x04400014u +#define VPU_37XX_CPU_SS_DSU_LEON_RT_DSU_TRAP_REG 0x04400020u -#define MTL_VPU_CPU_SS_MSSCPU_CPR_CLK_SET 0x06010004u -#define MTL_VPU_CPU_SS_MSSCPU_CPR_CLK_SET_CPU_DSU_MASK BIT_MASK(1) +#define VPU_37XX_CPU_SS_MSSCPU_CPR_CLK_SET 0x06010004u +#define VPU_37XX_CPU_SS_MSSCPU_CPR_CLK_SET_CPU_DSU_MASK BIT_MASK(1) -#define MTL_VPU_CPU_SS_MSSCPU_CPR_RST_CLR 0x06010018u -#define MTL_VPU_CPU_SS_MSSCPU_CPR_RST_CLR_CPU_DSU_MASK BIT_MASK(1) +#define VPU_37XX_CPU_SS_MSSCPU_CPR_RST_CLR 0x06010018u +#define VPU_37XX_CPU_SS_MSSCPU_CPR_RST_CLR_CPU_DSU_MASK BIT_MASK(1) -#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC 0x06010040u -#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RSTRUN0_MASK BIT_MASK(0) -#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RESUME0_MASK BIT_MASK(1) -#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RSTRUN1_MASK BIT_MASK(2) -#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RESUME1_MASK BIT_MASK(3) -#define MTL_VPU_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RSTVEC_MASK GENMASK(31, 4) +#define VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC 0x06010040u +#define VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RSTRUN0_MASK BIT_MASK(0) +#define VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RESUME0_MASK BIT_MASK(1) +#define VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RSTRUN1_MASK BIT_MASK(2) +#define VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RESUME1_MASK BIT_MASK(3) +#define VPU_37XX_CPU_SS_MSSCPU_CPR_LEON_RT_VEC_IRQI_RSTVEC_MASK GENMASK(31, 4) -#define MTL_VPU_CPU_SS_TIM_WATCHDOG 0x0602009cu -#define MTL_VPU_CPU_SS_TIM_WDOG_EN 0x060200a4u -#define MTL_VPU_CPU_SS_TIM_SAFE 0x060200a8u -#define MTL_VPU_CPU_SS_TIM_IPC_FIFO 0x060200f0u +#define VPU_37XX_CPU_SS_TIM_WATCHDOG 0x0602009cu +#define VPU_37XX_CPU_SS_TIM_WDOG_EN 0x060200a4u +#define VPU_37XX_CPU_SS_TIM_SAFE 0x060200a8u +#define VPU_37XX_CPU_SS_TIM_IPC_FIFO 0x060200f0u -#define MTL_VPU_CPU_SS_TIM_GEN_CONFIG 0x06021008u -#define MTL_VPU_CPU_SS_TIM_GEN_CONFIG_WDOG_TO_INT_CLR_MASK BIT_MASK(9) +#define VPU_37XX_CPU_SS_TIM_GEN_CONFIG 0x06021008u +#define VPU_37XX_CPU_SS_TIM_GEN_CONFIG_WDOG_TO_INT_CLR_MASK BIT_MASK(9) -#define MTL_VPU_CPU_SS_DOORBELL_0 0x06300000u -#define MTL_VPU_CPU_SS_DOORBELL_0_SET_MASK BIT_MASK(0) +#define VPU_37XX_CPU_SS_DOORBELL_0 0x06300000u +#define VPU_37XX_CPU_SS_DOORBELL_0_SET_MASK BIT_MASK(0) -#define MTL_VPU_CPU_SS_DOORBELL_1 0x06301000u +#define VPU_37XX_CPU_SS_DOORBELL_1 0x06301000u -#endif /* __IVPU_HW_MTL_REG_H__ */ +#endif /* __IVPU_HW_37XX_REG_H__ */ diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c index 03600a7a5a..40f9ee99ec 100644 --- a/drivers/accel/ivpu/ivpu_hw_40xx.c +++ b/drivers/accel/ivpu/ivpu_hw_40xx.c @@ -24,7 +24,7 @@ #define SKU_HW_ID_SHIFT 16u #define SKU_HW_ID_MASK 0xffff0000u -#define PLL_CONFIG_DEFAULT 0x1 +#define PLL_CONFIG_DEFAULT 0x0 #define PLL_CDYN_DEFAULT 0x80 #define PLL_EPP_DEFAULT 0x80 #define PLL_REF_CLK_FREQ (50 * 1000000) @@ -138,16 +138,19 @@ static void ivpu_hw_timeouts_init(struct ivpu_device *vdev) vdev->timeout.jsm = 50000; vdev->timeout.tdr = 2000000; vdev->timeout.reschedule_suspend = 1000; + vdev->timeout.autosuspend = -1; } else if (ivpu_is_simics(vdev)) { vdev->timeout.boot = 50; vdev->timeout.jsm = 500; vdev->timeout.tdr = 10000; vdev->timeout.reschedule_suspend = 10; + vdev->timeout.autosuspend = -1; } else { vdev->timeout.boot = 1000; vdev->timeout.jsm = 500; vdev->timeout.tdr = 2000; vdev->timeout.reschedule_suspend = 10; + vdev->timeout.autosuspend = 10; } } @@ -523,7 +526,7 @@ static void ivpu_boot_no_snoop_enable(struct ivpu_device *vdev) u32 val = REGV_RD32(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES); val = REG_SET_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, SNOOP_OVERRIDE_EN, val); - val = REG_CLR_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, AW_SNOOP_OVERRIDE, val); + val = REG_SET_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, AW_SNOOP_OVERRIDE, val); val = REG_CLR_FLD(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, AR_SNOOP_OVERRIDE, val); REGV_WR32(VPU_40XX_HOST_IF_TCU_PTW_OVERRIDES, val); @@ -697,7 +700,6 @@ static int ivpu_hw_40xx_info_init(struct ivpu_device *vdev) { struct ivpu_hw_info *hw = vdev->hw; u32 tile_disable; - u32 tile_enable; u32 fuse; fuse = REGB_RD32(VPU_40XX_BUTTRESS_TILE_FUSE); @@ -718,10 +720,6 @@ static int ivpu_hw_40xx_info_init(struct ivpu_device *vdev) else ivpu_dbg(vdev, MISC, "Fuse: All %d tiles enabled\n", TILE_MAX_NUM); - tile_enable = (~tile_disable) & TILE_MAX_MASK; - - hw->sku = REG_SET_FLD_NUM(SKU, HW_ID, LNL_HW_ID, hw->sku); - hw->sku = REG_SET_FLD_NUM(SKU, TILE, tile_enable, hw->sku); hw->tile_fuse = tile_disable; hw->pll.profiling_freq = PLL_PROFILING_FREQ_DEFAULT; diff --git a/drivers/accel/ivpu/ivpu_hw_reg_io.h b/drivers/accel/ivpu/ivpu_hw_reg_io.h index 43c2c0c2d0..79b3f441ea 100644 --- a/drivers/accel/ivpu/ivpu_hw_reg_io.h +++ b/drivers/accel/ivpu/ivpu_hw_reg_io.h @@ -47,22 +47,30 @@ #define REG_TEST_FLD_NUM(REG, FLD, num, val) \ ((num) == FIELD_GET(REG##_##FLD##_MASK, val)) -#define REGB_POLL(reg, var, cond, timeout_us) \ - read_poll_timeout(REGB_RD32_SILENT, var, cond, REG_POLL_SLEEP_US, timeout_us, false, reg) - -#define REGV_POLL(reg, var, cond, timeout_us) \ - read_poll_timeout(REGV_RD32_SILENT, var, cond, REG_POLL_SLEEP_US, timeout_us, false, reg) - #define REGB_POLL_FLD(reg, fld, val, timeout_us) \ ({ \ u32 var; \ - REGB_POLL(reg, var, (FIELD_GET(reg##_##fld##_MASK, var) == (val)), timeout_us); \ + int r; \ + ivpu_dbg(vdev, REG, "%s : %s (0x%08x) Polling field %s started (expected 0x%x)\n", \ + __func__, #reg, reg, #fld, val); \ + r = read_poll_timeout(REGB_RD32_SILENT, var, (FIELD_GET(reg##_##fld##_MASK, var) == (val)),\ + REG_POLL_SLEEP_US, timeout_us, false, (reg)); \ + ivpu_dbg(vdev, REG, "%s : %s (0x%08x) Polling field %s %s (reg val 0x%08x)\n", \ + __func__, #reg, reg, #fld, r ? "ETIMEDOUT" : "OK", var); \ + r; \ }) #define REGV_POLL_FLD(reg, fld, val, timeout_us) \ ({ \ u32 var; \ - REGV_POLL(reg, var, (FIELD_GET(reg##_##fld##_MASK, var) == (val)), timeout_us); \ + int r; \ + ivpu_dbg(vdev, REG, "%s : %s (0x%08x) Polling field %s started (expected 0x%x)\n", \ + __func__, #reg, reg, #fld, val); \ + r = read_poll_timeout(REGV_RD32_SILENT, var, (FIELD_GET(reg##_##fld##_MASK, var) == (val)),\ + REG_POLL_SLEEP_US, timeout_us, false, (reg)); \ + ivpu_dbg(vdev, REG, "%s : %s (0x%08x) Polling field %s %s (reg val 0x%08x)\n", \ + __func__, #reg, reg, #fld, r ? "ETIMEDOUT" : "OK", var); \ + r; \ }) static inline u32 @@ -71,7 +79,7 @@ ivpu_hw_reg_rd32(struct ivpu_device *vdev, void __iomem *base, u32 reg, { u32 val = readl(base + reg); - ivpu_dbg(vdev, REG, "%s RD: %s (0x%08x) => 0x%08x\n", func, name, reg, val); + ivpu_dbg(vdev, REG, "%s : %s (0x%08x) RD: 0x%08x\n", func, name, reg, val); return val; } @@ -81,7 +89,7 @@ ivpu_hw_reg_rd64(struct ivpu_device *vdev, void __iomem *base, u32 reg, { u64 val = readq(base + reg); - ivpu_dbg(vdev, REG, "%s RD: %s (0x%08x) => 0x%016llx\n", func, name, reg, val); + ivpu_dbg(vdev, REG, "%s : %s (0x%08x) RD: 0x%016llx\n", func, name, reg, val); return val; } @@ -89,7 +97,7 @@ static inline void ivpu_hw_reg_wr32(struct ivpu_device *vdev, void __iomem *base, u32 reg, u32 val, const char *name, const char *func) { - ivpu_dbg(vdev, REG, "%s WR: %s (0x%08x) <= 0x%08x\n", func, name, reg, val); + ivpu_dbg(vdev, REG, "%s : %s (0x%08x) WR: 0x%08x\n", func, name, reg, val); writel(val, base + reg); } @@ -97,7 +105,7 @@ static inline void ivpu_hw_reg_wr64(struct ivpu_device *vdev, void __iomem *base, u32 reg, u64 val, const char *name, const char *func) { - ivpu_dbg(vdev, REG, "%s WR: %s (0x%08x) <= 0x%016llx\n", func, name, reg, val); + ivpu_dbg(vdev, REG, "%s : %s (0x%08x) WR: 0x%016llx\n", func, name, reg, val); writeq(val, base + reg); } diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c index 295c0d7b50..a4ca40b184 100644 --- a/drivers/accel/ivpu/ivpu_ipc.c +++ b/drivers/accel/ivpu/ivpu_ipc.c @@ -45,8 +45,9 @@ static void ivpu_jsm_msg_dump(struct ivpu_device *vdev, char *c, u32 *payload = (u32 *)&jsm_msg->payload; ivpu_dbg(vdev, JSM, - "%s: vpu:0x%08x (type:0x%x, status:0x%x, id: 0x%x, result: 0x%x, payload:0x%x 0x%x 0x%x 0x%x 0x%x)\n", - c, vpu_addr, jsm_msg->type, jsm_msg->status, jsm_msg->request_id, jsm_msg->result, + "%s: vpu:0x%08x (type:%s, status:0x%x, id: 0x%x, result: 0x%x, payload:0x%x 0x%x 0x%x 0x%x 0x%x)\n", + c, vpu_addr, ivpu_jsm_msg_type_to_str(jsm_msg->type), + jsm_msg->status, jsm_msg->request_id, jsm_msg->result, payload[0], payload[1], payload[2], payload[3], payload[4]); } @@ -79,8 +80,8 @@ ivpu_ipc_tx_prepare(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, tx_buf_vpu_addr = gen_pool_alloc(ipc->mm_tx, sizeof(*tx_buf)); if (!tx_buf_vpu_addr) { - ivpu_err(vdev, "Failed to reserve IPC buffer, size %ld\n", - sizeof(*tx_buf)); + ivpu_err_ratelimited(vdev, "Failed to reserve IPC buffer, size %ld\n", + sizeof(*tx_buf)); return -ENOMEM; } @@ -93,12 +94,12 @@ ivpu_ipc_tx_prepare(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, jsm_vpu_addr = tx_buf_vpu_addr + offsetof(struct ivpu_ipc_tx_buf, jsm); if (tx_buf->ipc.status != IVPU_IPC_HDR_FREE) - ivpu_warn(vdev, "IPC message vpu:0x%x not released by firmware\n", - tx_buf_vpu_addr); + ivpu_warn_ratelimited(vdev, "IPC message vpu:0x%x not released by firmware\n", + tx_buf_vpu_addr); if (tx_buf->jsm.status != VPU_JSM_MSG_FREE) - ivpu_warn(vdev, "JSM message vpu:0x%x not released by firmware\n", - jsm_vpu_addr); + ivpu_warn_ratelimited(vdev, "JSM message vpu:0x%x not released by firmware\n", + jsm_vpu_addr); memset(tx_buf, 0, sizeof(*tx_buf)); tx_buf->ipc.data_addr = jsm_vpu_addr; @@ -263,18 +264,19 @@ ivpu_ipc_send_receive_internal(struct ivpu_device *vdev, struct vpu_jsm_msg *req ret = ivpu_ipc_send(vdev, &cons, req); if (ret) { - ivpu_warn(vdev, "IPC send failed: %d\n", ret); + ivpu_warn_ratelimited(vdev, "IPC send failed: %d\n", ret); goto consumer_del; } ret = ivpu_ipc_receive(vdev, &cons, NULL, resp, timeout_ms); if (ret) { - ivpu_warn(vdev, "IPC receive failed: type 0x%x, ret %d\n", req->type, ret); + ivpu_warn_ratelimited(vdev, "IPC receive failed: type %s, ret %d\n", + ivpu_jsm_msg_type_to_str(req->type), ret); goto consumer_del; } if (resp->type != expected_resp_type) { - ivpu_warn(vdev, "Invalid JSM response type: 0x%x\n", resp->type); + ivpu_warn_ratelimited(vdev, "Invalid JSM response type: 0x%x\n", resp->type); ret = -EBADE; } @@ -372,13 +374,13 @@ int ivpu_ipc_irq_handler(struct ivpu_device *vdev) while (ivpu_hw_reg_ipc_rx_count_get(vdev)) { vpu_addr = ivpu_hw_reg_ipc_rx_addr_get(vdev); if (vpu_addr == REG_IO_ERROR) { - ivpu_err(vdev, "Failed to read IPC rx addr register\n"); + ivpu_err_ratelimited(vdev, "Failed to read IPC rx addr register\n"); return -EIO; } ipc_hdr = ivpu_to_cpu_addr(ipc->mem_rx, vpu_addr); if (!ipc_hdr) { - ivpu_warn(vdev, "IPC msg 0x%x out of range\n", vpu_addr); + ivpu_warn_ratelimited(vdev, "IPC msg 0x%x out of range\n", vpu_addr); continue; } ivpu_ipc_msg_dump(vdev, "RX", ipc_hdr, vpu_addr); @@ -387,7 +389,8 @@ int ivpu_ipc_irq_handler(struct ivpu_device *vdev) if (ipc_hdr->channel != IVPU_IPC_CHAN_BOOT_MSG) { jsm_msg = ivpu_to_cpu_addr(ipc->mem_rx, ipc_hdr->data_addr); if (!jsm_msg) { - ivpu_warn(vdev, "JSM msg 0x%x out of range\n", ipc_hdr->data_addr); + ivpu_warn_ratelimited(vdev, "JSM msg 0x%x out of range\n", + ipc_hdr->data_addr); ivpu_ipc_rx_mark_free(vdev, ipc_hdr, NULL); continue; } @@ -395,7 +398,8 @@ int ivpu_ipc_irq_handler(struct ivpu_device *vdev) } if (atomic_read(&ipc->rx_msg_count) > IPC_MAX_RX_MSG) { - ivpu_warn(vdev, "IPC RX msg dropped, msg count %d\n", IPC_MAX_RX_MSG); + ivpu_warn_ratelimited(vdev, "IPC RX msg dropped, msg count %d\n", + IPC_MAX_RX_MSG); ivpu_ipc_rx_mark_free(vdev, ipc_hdr, jsm_msg); continue; } @@ -423,15 +427,20 @@ int ivpu_ipc_irq_handler(struct ivpu_device *vdev) int ivpu_ipc_init(struct ivpu_device *vdev) { struct ivpu_ipc_info *ipc = vdev->ipc; - int ret = -ENOMEM; + int ret; ipc->mem_tx = ivpu_bo_alloc_internal(vdev, 0, SZ_16K, DRM_IVPU_BO_WC); - if (!ipc->mem_tx) - return ret; + if (!ipc->mem_tx) { + ivpu_err(vdev, "Failed to allocate mem_tx\n"); + return -ENOMEM; + } ipc->mem_rx = ivpu_bo_alloc_internal(vdev, 0, SZ_16K, DRM_IVPU_BO_WC); - if (!ipc->mem_rx) + if (!ipc->mem_rx) { + ivpu_err(vdev, "Failed to allocate mem_rx\n"); + ret = -ENOMEM; goto err_free_tx; + } ipc->mm_tx = devm_gen_pool_create(vdev->drm.dev, __ffs(IVPU_IPC_ALIGNMENT), -1, "TX_IPC_JSM"); @@ -441,7 +450,7 @@ int ivpu_ipc_init(struct ivpu_device *vdev) goto err_free_rx; } - ret = gen_pool_add(ipc->mm_tx, ipc->mem_tx->vpu_addr, ipc->mem_tx->base.size, -1); + ret = gen_pool_add(ipc->mm_tx, ipc->mem_tx->vpu_addr, ivpu_bo_size(ipc->mem_tx), -1); if (ret) { ivpu_err(vdev, "gen_pool_add failed, ret %d\n", ret); goto err_free_rx; @@ -497,8 +506,8 @@ void ivpu_ipc_reset(struct ivpu_device *vdev) mutex_lock(&ipc->lock); - memset(ipc->mem_tx->kvaddr, 0, ipc->mem_tx->base.size); - memset(ipc->mem_rx->kvaddr, 0, ipc->mem_rx->base.size); + memset(ivpu_bo_vaddr(ipc->mem_tx), 0, ivpu_bo_size(ipc->mem_tx)); + memset(ivpu_bo_vaddr(ipc->mem_rx), 0, ivpu_bo_size(ipc->mem_rx)); wmb(); /* Flush WC buffers for TX and RX rings */ mutex_unlock(&ipc->lock); diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index de9e69f70a..8983e3a4fd 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -48,10 +48,10 @@ static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv, u16 e goto cmdq_free; cmdq->db_id = file_priv->ctx.id + engine * ivpu_get_context_count(vdev); - cmdq->entry_count = (u32)((cmdq->mem->base.size - sizeof(struct vpu_job_queue_header)) / + cmdq->entry_count = (u32)((ivpu_bo_size(cmdq->mem) - sizeof(struct vpu_job_queue_header)) / sizeof(struct vpu_job_queue_entry)); - cmdq->jobq = (struct vpu_job_queue *)cmdq->mem->kvaddr; + cmdq->jobq = (struct vpu_job_queue *)ivpu_bo_vaddr(cmdq->mem); jobq_header = &cmdq->jobq->header; jobq_header->engine_idx = engine; jobq_header->head = 0; @@ -93,7 +93,7 @@ static struct ivpu_cmdq *ivpu_cmdq_acquire(struct ivpu_file_priv *file_priv, u16 return cmdq; ret = ivpu_jsm_register_db(vdev, file_priv->ctx.id, cmdq->db_id, - cmdq->mem->vpu_addr, cmdq->mem->base.size); + cmdq->mem->vpu_addr, ivpu_bo_size(cmdq->mem)); if (ret) return NULL; @@ -453,7 +453,7 @@ ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32 return -EBUSY; } - if (commands_offset >= bo->base.size) { + if (commands_offset >= ivpu_bo_size(bo)) { ivpu_warn(vdev, "Invalid command buffer offset %u\n", commands_offset); return -EINVAL; } @@ -618,6 +618,5 @@ int ivpu_job_done_thread_init(struct ivpu_device *vdev) void ivpu_job_done_thread_fini(struct ivpu_device *vdev) { - kthread_stop(vdev->job_done_thread); - put_task_struct(vdev->job_done_thread); + kthread_stop_put(vdev->job_done_thread); } diff --git a/drivers/accel/ivpu/ivpu_job.h b/drivers/accel/ivpu/ivpu_job.h index aa1f0b9479..5514c2d8a6 100644 --- a/drivers/accel/ivpu/ivpu_job.h +++ b/drivers/accel/ivpu/ivpu_job.h @@ -51,7 +51,7 @@ struct ivpu_job { u32 job_id; u32 engine_idx; size_t bo_count; - struct ivpu_bo *bos[]; + struct ivpu_bo *bos[] __counted_by(bo_count); }; int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file); diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.c b/drivers/accel/ivpu/ivpu_jsm_msg.c index bdddef2c59..0c2fe71420 100644 --- a/drivers/accel/ivpu/ivpu_jsm_msg.c +++ b/drivers/accel/ivpu/ivpu_jsm_msg.c @@ -7,6 +7,70 @@ #include "ivpu_ipc.h" #include "ivpu_jsm_msg.h" +const char *ivpu_jsm_msg_type_to_str(enum vpu_ipc_msg_type type) +{ + #define IVPU_CASE_TO_STR(x) case x: return #x + switch (type) { + IVPU_CASE_TO_STR(VPU_JSM_MSG_UNKNOWN); + IVPU_CASE_TO_STR(VPU_JSM_MSG_ENGINE_RESET); + IVPU_CASE_TO_STR(VPU_JSM_MSG_ENGINE_PREEMPT); + IVPU_CASE_TO_STR(VPU_JSM_MSG_REGISTER_DB); + IVPU_CASE_TO_STR(VPU_JSM_MSG_UNREGISTER_DB); + IVPU_CASE_TO_STR(VPU_JSM_MSG_QUERY_ENGINE_HB); + IVPU_CASE_TO_STR(VPU_JSM_MSG_GET_POWER_LEVEL_COUNT); + IVPU_CASE_TO_STR(VPU_JSM_MSG_GET_POWER_LEVEL); + IVPU_CASE_TO_STR(VPU_JSM_MSG_SET_POWER_LEVEL); + IVPU_CASE_TO_STR(VPU_JSM_MSG_METRIC_STREAMER_OPEN); + IVPU_CASE_TO_STR(VPU_JSM_MSG_METRIC_STREAMER_CLOSE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_TRACE_SET_CONFIG); + IVPU_CASE_TO_STR(VPU_JSM_MSG_TRACE_GET_CONFIG); + IVPU_CASE_TO_STR(VPU_JSM_MSG_TRACE_GET_CAPABILITY); + IVPU_CASE_TO_STR(VPU_JSM_MSG_TRACE_GET_NAME); + IVPU_CASE_TO_STR(VPU_JSM_MSG_SSID_RELEASE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_METRIC_STREAMER_START); + IVPU_CASE_TO_STR(VPU_JSM_MSG_METRIC_STREAMER_STOP); + IVPU_CASE_TO_STR(VPU_JSM_MSG_METRIC_STREAMER_UPDATE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_METRIC_STREAMER_INFO); + IVPU_CASE_TO_STR(VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP); + IVPU_CASE_TO_STR(VPU_JSM_MSG_CREATE_CMD_QUEUE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_DESTROY_CMD_QUEUE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES); + IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_REGISTER_DB); + IVPU_CASE_TO_STR(VPU_JSM_MSG_BLOB_DEINIT); + IVPU_CASE_TO_STR(VPU_JSM_MSG_DYNDBG_CONTROL); + IVPU_CASE_TO_STR(VPU_JSM_MSG_JOB_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_ENGINE_RESET_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_ENGINE_PREEMPT_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_REGISTER_DB_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_UNREGISTER_DB_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_QUERY_ENGINE_HB_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_GET_POWER_LEVEL_COUNT_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_GET_POWER_LEVEL_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_SET_POWER_LEVEL_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_METRIC_STREAMER_OPEN_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_METRIC_STREAMER_CLOSE_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_TRACE_SET_CONFIG_RSP); + IVPU_CASE_TO_STR(VPU_JSM_MSG_TRACE_GET_CONFIG_RSP); + IVPU_CASE_TO_STR(VPU_JSM_MSG_TRACE_GET_CAPABILITY_RSP); + IVPU_CASE_TO_STR(VPU_JSM_MSG_TRACE_GET_NAME_RSP); + IVPU_CASE_TO_STR(VPU_JSM_MSG_SSID_RELEASE_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_METRIC_STREAMER_START_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_METRIC_STREAMER_STOP_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_METRIC_STREAMER_UPDATE_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_METRIC_STREAMER_INFO_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_METRIC_STREAMER_NOTIFICATION); + IVPU_CASE_TO_STR(VPU_JSM_MSG_SET_PRIORITY_BAND_SETUP_RSP); + IVPU_CASE_TO_STR(VPU_JSM_MSG_CREATE_CMD_QUEUE_RSP); + IVPU_CASE_TO_STR(VPU_JSM_MSG_DESTROY_CMD_QUEUE_RSP); + IVPU_CASE_TO_STR(VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES_RSP); + IVPU_CASE_TO_STR(VPU_JSM_MSG_BLOB_DEINIT_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_DYNDBG_CONTROL_RSP); + } + #undef IVPU_CASE_TO_STR + + return "Unknown JSM message type"; +} + int ivpu_jsm_register_db(struct ivpu_device *vdev, u32 ctx_id, u32 db_id, u64 jobq_base, u32 jobq_size) { @@ -22,7 +86,7 @@ int ivpu_jsm_register_db(struct ivpu_device *vdev, u32 ctx_id, u32 db_id, ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_REGISTER_DB_DONE, &resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); if (ret) { - ivpu_err(vdev, "Failed to register doorbell %d: %d\n", db_id, ret); + ivpu_err_ratelimited(vdev, "Failed to register doorbell %d: %d\n", db_id, ret); return ret; } @@ -42,7 +106,7 @@ int ivpu_jsm_unregister_db(struct ivpu_device *vdev, u32 db_id) ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_UNREGISTER_DB_DONE, &resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); if (ret) { - ivpu_warn(vdev, "Failed to unregister doorbell %d: %d\n", db_id, ret); + ivpu_warn_ratelimited(vdev, "Failed to unregister doorbell %d: %d\n", db_id, ret); return ret; } @@ -65,7 +129,8 @@ int ivpu_jsm_get_heartbeat(struct ivpu_device *vdev, u32 engine, u64 *heartbeat) ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_QUERY_ENGINE_HB_DONE, &resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); if (ret) { - ivpu_err(vdev, "Failed to get heartbeat from engine %d: %d\n", engine, ret); + ivpu_err_ratelimited(vdev, "Failed to get heartbeat from engine %d: %d\n", + engine, ret); return ret; } @@ -87,7 +152,7 @@ int ivpu_jsm_reset_engine(struct ivpu_device *vdev, u32 engine) ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_ENGINE_RESET_DONE, &resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); if (ret) - ivpu_err(vdev, "Failed to reset engine %d: %d\n", engine, ret); + ivpu_err_ratelimited(vdev, "Failed to reset engine %d: %d\n", engine, ret); return ret; } @@ -107,7 +172,7 @@ int ivpu_jsm_preempt_engine(struct ivpu_device *vdev, u32 engine, u32 preempt_id ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_ENGINE_PREEMPT_DONE, &resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); if (ret) - ivpu_err(vdev, "Failed to preempt engine %d: %d\n", engine, ret); + ivpu_err_ratelimited(vdev, "Failed to preempt engine %d: %d\n", engine, ret); return ret; } @@ -123,7 +188,8 @@ int ivpu_jsm_dyndbg_control(struct ivpu_device *vdev, char *command, size_t size ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_DYNDBG_CONTROL_RSP, &resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); if (ret) - ivpu_warn(vdev, "Failed to send command \"%s\": ret %d\n", command, ret); + ivpu_warn_ratelimited(vdev, "Failed to send command \"%s\": ret %d\n", + command, ret); return ret; } @@ -138,7 +204,7 @@ int ivpu_jsm_trace_get_capability(struct ivpu_device *vdev, u32 *trace_destinati ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_TRACE_GET_CAPABILITY_RSP, &resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); if (ret) { - ivpu_warn(vdev, "Failed to get trace capability: %d\n", ret); + ivpu_warn_ratelimited(vdev, "Failed to get trace capability: %d\n", ret); return ret; } @@ -162,7 +228,7 @@ int ivpu_jsm_trace_set_config(struct ivpu_device *vdev, u32 trace_level, u32 tra ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_TRACE_SET_CONFIG_RSP, &resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); if (ret) - ivpu_warn(vdev, "Failed to set config: %d\n", ret); + ivpu_warn_ratelimited(vdev, "Failed to set config: %d\n", ret); return ret; } diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.h b/drivers/accel/ivpu/ivpu_jsm_msg.h index ab50d7b017..66979a948c 100644 --- a/drivers/accel/ivpu/ivpu_jsm_msg.h +++ b/drivers/accel/ivpu/ivpu_jsm_msg.h @@ -8,6 +8,8 @@ #include "vpu_jsm_api.h" +const char *ivpu_jsm_msg_type_to_str(enum vpu_ipc_msg_type type); + int ivpu_jsm_register_db(struct ivpu_device *vdev, u32 ctx_id, u32 db_id, u64 jobq_base, u32 jobq_size); int ivpu_jsm_unregister_db(struct ivpu_device *vdev, u32 db_id); diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c index baefaf7bb3..9898946174 100644 --- a/drivers/accel/ivpu/ivpu_mmu.c +++ b/drivers/accel/ivpu/ivpu_mmu.c @@ -7,12 +7,48 @@ #include <linux/highmem.h> #include "ivpu_drv.h" -#include "ivpu_hw_37xx_reg.h" #include "ivpu_hw_reg_io.h" #include "ivpu_mmu.h" #include "ivpu_mmu_context.h" #include "ivpu_pm.h" +#define IVPU_MMU_REG_IDR0 0x00200000u +#define IVPU_MMU_REG_IDR1 0x00200004u +#define IVPU_MMU_REG_IDR3 0x0020000cu +#define IVPU_MMU_REG_IDR5 0x00200014u +#define IVPU_MMU_REG_CR0 0x00200020u +#define IVPU_MMU_REG_CR0ACK 0x00200024u +#define IVPU_MMU_REG_CR0ACK_VAL_MASK GENMASK(31, 0) +#define IVPU_MMU_REG_CR1 0x00200028u +#define IVPU_MMU_REG_CR2 0x0020002cu +#define IVPU_MMU_REG_IRQ_CTRL 0x00200050u +#define IVPU_MMU_REG_IRQ_CTRLACK 0x00200054u +#define IVPU_MMU_REG_IRQ_CTRLACK_VAL_MASK GENMASK(31, 0) + +#define IVPU_MMU_REG_GERROR 0x00200060u +#define IVPU_MMU_REG_GERROR_CMDQ_MASK BIT_MASK(0) +#define IVPU_MMU_REG_GERROR_EVTQ_ABT_MASK BIT_MASK(2) +#define IVPU_MMU_REG_GERROR_PRIQ_ABT_MASK BIT_MASK(3) +#define IVPU_MMU_REG_GERROR_MSI_CMDQ_ABT_MASK BIT_MASK(4) +#define IVPU_MMU_REG_GERROR_MSI_EVTQ_ABT_MASK BIT_MASK(5) +#define IVPU_MMU_REG_GERROR_MSI_PRIQ_ABT_MASK BIT_MASK(6) +#define IVPU_MMU_REG_GERROR_MSI_ABT_MASK BIT_MASK(7) + +#define IVPU_MMU_REG_GERRORN 0x00200064u + +#define IVPU_MMU_REG_STRTAB_BASE 0x00200080u +#define IVPU_MMU_REG_STRTAB_BASE_CFG 0x00200088u +#define IVPU_MMU_REG_CMDQ_BASE 0x00200090u +#define IVPU_MMU_REG_CMDQ_PROD 0x00200098u +#define IVPU_MMU_REG_CMDQ_CONS 0x0020009cu +#define IVPU_MMU_REG_CMDQ_CONS_VAL_MASK GENMASK(23, 0) +#define IVPU_MMU_REG_CMDQ_CONS_ERR_MASK GENMASK(30, 24) +#define IVPU_MMU_REG_EVTQ_BASE 0x002000a0u +#define IVPU_MMU_REG_EVTQ_PROD 0x002000a8u +#define IVPU_MMU_REG_EVTQ_CONS 0x002000acu +#define IVPU_MMU_REG_EVTQ_PROD_SEC (0x002000a8u + SZ_64K) +#define IVPU_MMU_REG_EVTQ_CONS_SEC (0x002000acu + SZ_64K) + #define IVPU_MMU_IDR0_REF 0x080f3e0f #define IVPU_MMU_IDR0_REF_SIMICS 0x080f3e1f #define IVPU_MMU_IDR1_REF 0x0e739d18 @@ -186,13 +222,13 @@ #define IVPU_MMU_REG_TIMEOUT_US (10 * USEC_PER_MSEC) #define IVPU_MMU_QUEUE_TIMEOUT_US (100 * USEC_PER_MSEC) -#define IVPU_MMU_GERROR_ERR_MASK ((REG_FLD(VPU_37XX_HOST_MMU_GERROR, CMDQ)) | \ - (REG_FLD(VPU_37XX_HOST_MMU_GERROR, EVTQ_ABT)) | \ - (REG_FLD(VPU_37XX_HOST_MMU_GERROR, PRIQ_ABT)) | \ - (REG_FLD(VPU_37XX_HOST_MMU_GERROR, MSI_CMDQ_ABT)) | \ - (REG_FLD(VPU_37XX_HOST_MMU_GERROR, MSI_EVTQ_ABT)) | \ - (REG_FLD(VPU_37XX_HOST_MMU_GERROR, MSI_PRIQ_ABT)) | \ - (REG_FLD(VPU_37XX_HOST_MMU_GERROR, MSI_ABT))) +#define IVPU_MMU_GERROR_ERR_MASK ((REG_FLD(IVPU_MMU_REG_GERROR, CMDQ)) | \ + (REG_FLD(IVPU_MMU_REG_GERROR, EVTQ_ABT)) | \ + (REG_FLD(IVPU_MMU_REG_GERROR, PRIQ_ABT)) | \ + (REG_FLD(IVPU_MMU_REG_GERROR, MSI_CMDQ_ABT)) | \ + (REG_FLD(IVPU_MMU_REG_GERROR, MSI_EVTQ_ABT)) | \ + (REG_FLD(IVPU_MMU_REG_GERROR, MSI_PRIQ_ABT)) | \ + (REG_FLD(IVPU_MMU_REG_GERROR, MSI_ABT))) static char *ivpu_mmu_event_to_str(u32 cmd) { @@ -250,15 +286,15 @@ static void ivpu_mmu_config_check(struct ivpu_device *vdev) else val_ref = IVPU_MMU_IDR0_REF; - val = REGV_RD32(VPU_37XX_HOST_MMU_IDR0); + val = REGV_RD32(IVPU_MMU_REG_IDR0); if (val != val_ref) ivpu_dbg(vdev, MMU, "IDR0 0x%x != IDR0_REF 0x%x\n", val, val_ref); - val = REGV_RD32(VPU_37XX_HOST_MMU_IDR1); + val = REGV_RD32(IVPU_MMU_REG_IDR1); if (val != IVPU_MMU_IDR1_REF) ivpu_dbg(vdev, MMU, "IDR1 0x%x != IDR1_REF 0x%x\n", val, IVPU_MMU_IDR1_REF); - val = REGV_RD32(VPU_37XX_HOST_MMU_IDR3); + val = REGV_RD32(IVPU_MMU_REG_IDR3); if (val != IVPU_MMU_IDR3_REF) ivpu_dbg(vdev, MMU, "IDR3 0x%x != IDR3_REF 0x%x\n", val, IVPU_MMU_IDR3_REF); @@ -269,7 +305,7 @@ static void ivpu_mmu_config_check(struct ivpu_device *vdev) else val_ref = IVPU_MMU_IDR5_REF; - val = REGV_RD32(VPU_37XX_HOST_MMU_IDR5); + val = REGV_RD32(IVPU_MMU_REG_IDR5); if (val != val_ref) ivpu_dbg(vdev, MMU, "IDR5 0x%x != IDR5_REF 0x%x\n", val, val_ref); } @@ -376,19 +412,18 @@ static int ivpu_mmu_structs_alloc(struct ivpu_device *vdev) return ret; } -static int ivpu_mmu_reg_write(struct ivpu_device *vdev, u32 reg, u32 val) +static int ivpu_mmu_reg_write_cr0(struct ivpu_device *vdev, u32 val) { - u32 reg_ack = reg + 4; /* ACK register is 4B after base register */ - u32 val_ack; - int ret; + REGV_WR32(IVPU_MMU_REG_CR0, val); - REGV_WR32(reg, val); + return REGV_POLL_FLD(IVPU_MMU_REG_CR0ACK, VAL, val, IVPU_MMU_REG_TIMEOUT_US); +} - ret = REGV_POLL(reg_ack, val_ack, (val == val_ack), IVPU_MMU_REG_TIMEOUT_US); - if (ret) - ivpu_err(vdev, "Failed to write register 0x%x\n", reg); +static int ivpu_mmu_reg_write_irq_ctrl(struct ivpu_device *vdev, u32 val) +{ + REGV_WR32(IVPU_MMU_REG_IRQ_CTRL, val); - return ret; + return REGV_POLL_FLD(IVPU_MMU_REG_IRQ_CTRLACK, VAL, val, IVPU_MMU_REG_TIMEOUT_US); } static int ivpu_mmu_irqs_setup(struct ivpu_device *vdev) @@ -396,19 +431,26 @@ static int ivpu_mmu_irqs_setup(struct ivpu_device *vdev) u32 irq_ctrl = IVPU_MMU_IRQ_EVTQ_EN | IVPU_MMU_IRQ_GERROR_EN; int ret; - ret = ivpu_mmu_reg_write(vdev, VPU_37XX_HOST_MMU_IRQ_CTRL, 0); + ret = ivpu_mmu_reg_write_irq_ctrl(vdev, 0); if (ret) return ret; - return ivpu_mmu_reg_write(vdev, VPU_37XX_HOST_MMU_IRQ_CTRL, irq_ctrl); + return ivpu_mmu_reg_write_irq_ctrl(vdev, irq_ctrl); } static int ivpu_mmu_cmdq_wait_for_cons(struct ivpu_device *vdev) { struct ivpu_mmu_queue *cmdq = &vdev->mmu->cmdq; + int ret; - return REGV_POLL(VPU_37XX_HOST_MMU_CMDQ_CONS, cmdq->cons, (cmdq->prod == cmdq->cons), - IVPU_MMU_QUEUE_TIMEOUT_US); + ret = REGV_POLL_FLD(IVPU_MMU_REG_CMDQ_CONS, VAL, cmdq->prod, + IVPU_MMU_QUEUE_TIMEOUT_US); + if (ret) + return ret; + + cmdq->cons = cmdq->prod; + + return 0; } static int ivpu_mmu_cmdq_cmd_write(struct ivpu_device *vdev, const char *name, u64 data0, u64 data1) @@ -447,7 +489,7 @@ static int ivpu_mmu_cmdq_sync(struct ivpu_device *vdev) return ret; clflush_cache_range(q->base, IVPU_MMU_CMDQ_SIZE); - REGV_WR32(VPU_37XX_HOST_MMU_CMDQ_PROD, q->prod); + REGV_WR32(IVPU_MMU_REG_CMDQ_PROD, q->prod); ret = ivpu_mmu_cmdq_wait_for_cons(vdev); if (ret) @@ -491,11 +533,10 @@ static int ivpu_mmu_reset(struct ivpu_device *vdev) mmu->cmdq.cons = 0; memset(mmu->evtq.base, 0, IVPU_MMU_EVTQ_SIZE); - clflush_cache_range(mmu->evtq.base, IVPU_MMU_EVTQ_SIZE); mmu->evtq.prod = 0; mmu->evtq.cons = 0; - ret = ivpu_mmu_reg_write(vdev, VPU_37XX_HOST_MMU_CR0, 0); + ret = ivpu_mmu_reg_write_cr0(vdev, 0); if (ret) return ret; @@ -505,17 +546,17 @@ static int ivpu_mmu_reset(struct ivpu_device *vdev) FIELD_PREP(IVPU_MMU_CR1_QUEUE_SH, IVPU_MMU_SH_ISH) | FIELD_PREP(IVPU_MMU_CR1_QUEUE_OC, IVPU_MMU_CACHE_WB) | FIELD_PREP(IVPU_MMU_CR1_QUEUE_IC, IVPU_MMU_CACHE_WB); - REGV_WR32(VPU_37XX_HOST_MMU_CR1, val); + REGV_WR32(IVPU_MMU_REG_CR1, val); - REGV_WR64(VPU_37XX_HOST_MMU_STRTAB_BASE, mmu->strtab.dma_q); - REGV_WR32(VPU_37XX_HOST_MMU_STRTAB_BASE_CFG, mmu->strtab.base_cfg); + REGV_WR64(IVPU_MMU_REG_STRTAB_BASE, mmu->strtab.dma_q); + REGV_WR32(IVPU_MMU_REG_STRTAB_BASE_CFG, mmu->strtab.base_cfg); - REGV_WR64(VPU_37XX_HOST_MMU_CMDQ_BASE, mmu->cmdq.dma_q); - REGV_WR32(VPU_37XX_HOST_MMU_CMDQ_PROD, 0); - REGV_WR32(VPU_37XX_HOST_MMU_CMDQ_CONS, 0); + REGV_WR64(IVPU_MMU_REG_CMDQ_BASE, mmu->cmdq.dma_q); + REGV_WR32(IVPU_MMU_REG_CMDQ_PROD, 0); + REGV_WR32(IVPU_MMU_REG_CMDQ_CONS, 0); val = IVPU_MMU_CR0_CMDQEN; - ret = ivpu_mmu_reg_write(vdev, VPU_37XX_HOST_MMU_CR0, val); + ret = ivpu_mmu_reg_write_cr0(vdev, val); if (ret) return ret; @@ -531,17 +572,17 @@ static int ivpu_mmu_reset(struct ivpu_device *vdev) if (ret) return ret; - REGV_WR64(VPU_37XX_HOST_MMU_EVTQ_BASE, mmu->evtq.dma_q); - REGV_WR32(VPU_37XX_HOST_MMU_EVTQ_PROD_SEC, 0); - REGV_WR32(VPU_37XX_HOST_MMU_EVTQ_CONS_SEC, 0); + REGV_WR64(IVPU_MMU_REG_EVTQ_BASE, mmu->evtq.dma_q); + REGV_WR32(IVPU_MMU_REG_EVTQ_PROD_SEC, 0); + REGV_WR32(IVPU_MMU_REG_EVTQ_CONS_SEC, 0); val |= IVPU_MMU_CR0_EVTQEN; - ret = ivpu_mmu_reg_write(vdev, VPU_37XX_HOST_MMU_CR0, val); + ret = ivpu_mmu_reg_write_cr0(vdev, val); if (ret) return ret; val |= IVPU_MMU_CR0_ATSCHK; - ret = ivpu_mmu_reg_write(vdev, VPU_37XX_HOST_MMU_CR0, val); + ret = ivpu_mmu_reg_write_cr0(vdev, val); if (ret) return ret; @@ -550,7 +591,7 @@ static int ivpu_mmu_reset(struct ivpu_device *vdev) return ret; val |= IVPU_MMU_CR0_SMMUEN; - return ivpu_mmu_reg_write(vdev, VPU_37XX_HOST_MMU_CR0, val); + return ivpu_mmu_reg_write_cr0(vdev, val); } static void ivpu_mmu_strtab_link_cd(struct ivpu_device *vdev, u32 sid) @@ -801,14 +842,12 @@ static u32 *ivpu_mmu_get_event(struct ivpu_device *vdev) u32 idx = IVPU_MMU_Q_IDX(evtq->cons); u32 *evt = evtq->base + (idx * IVPU_MMU_EVTQ_CMD_SIZE); - evtq->prod = REGV_RD32(VPU_37XX_HOST_MMU_EVTQ_PROD_SEC); + evtq->prod = REGV_RD32(IVPU_MMU_REG_EVTQ_PROD_SEC); if (!CIRC_CNT(IVPU_MMU_Q_IDX(evtq->prod), IVPU_MMU_Q_IDX(evtq->cons), IVPU_MMU_Q_COUNT)) return NULL; - clflush_cache_range(evt, IVPU_MMU_EVTQ_CMD_SIZE); - evtq->cons = (evtq->cons + 1) & IVPU_MMU_Q_WRAP_MASK; - REGV_WR32(VPU_37XX_HOST_MMU_EVTQ_CONS_SEC, evtq->cons); + REGV_WR32(IVPU_MMU_REG_EVTQ_CONS_SEC, evtq->cons); return evt; } @@ -841,35 +880,35 @@ void ivpu_mmu_irq_gerr_handler(struct ivpu_device *vdev) ivpu_dbg(vdev, IRQ, "MMU error\n"); - gerror_val = REGV_RD32(VPU_37XX_HOST_MMU_GERROR); - gerrorn_val = REGV_RD32(VPU_37XX_HOST_MMU_GERRORN); + gerror_val = REGV_RD32(IVPU_MMU_REG_GERROR); + gerrorn_val = REGV_RD32(IVPU_MMU_REG_GERRORN); active = gerror_val ^ gerrorn_val; if (!(active & IVPU_MMU_GERROR_ERR_MASK)) return; - if (REG_TEST_FLD(VPU_37XX_HOST_MMU_GERROR, MSI_ABT, active)) + if (REG_TEST_FLD(IVPU_MMU_REG_GERROR, MSI_ABT, active)) ivpu_warn_ratelimited(vdev, "MMU MSI ABT write aborted\n"); - if (REG_TEST_FLD(VPU_37XX_HOST_MMU_GERROR, MSI_PRIQ_ABT, active)) + if (REG_TEST_FLD(IVPU_MMU_REG_GERROR, MSI_PRIQ_ABT, active)) ivpu_warn_ratelimited(vdev, "MMU PRIQ MSI ABT write aborted\n"); - if (REG_TEST_FLD(VPU_37XX_HOST_MMU_GERROR, MSI_EVTQ_ABT, active)) + if (REG_TEST_FLD(IVPU_MMU_REG_GERROR, MSI_EVTQ_ABT, active)) ivpu_warn_ratelimited(vdev, "MMU EVTQ MSI ABT write aborted\n"); - if (REG_TEST_FLD(VPU_37XX_HOST_MMU_GERROR, MSI_CMDQ_ABT, active)) + if (REG_TEST_FLD(IVPU_MMU_REG_GERROR, MSI_CMDQ_ABT, active)) ivpu_warn_ratelimited(vdev, "MMU CMDQ MSI ABT write aborted\n"); - if (REG_TEST_FLD(VPU_37XX_HOST_MMU_GERROR, PRIQ_ABT, active)) + if (REG_TEST_FLD(IVPU_MMU_REG_GERROR, PRIQ_ABT, active)) ivpu_err_ratelimited(vdev, "MMU PRIQ write aborted\n"); - if (REG_TEST_FLD(VPU_37XX_HOST_MMU_GERROR, EVTQ_ABT, active)) + if (REG_TEST_FLD(IVPU_MMU_REG_GERROR, EVTQ_ABT, active)) ivpu_err_ratelimited(vdev, "MMU EVTQ write aborted\n"); - if (REG_TEST_FLD(VPU_37XX_HOST_MMU_GERROR, CMDQ, active)) + if (REG_TEST_FLD(IVPU_MMU_REG_GERROR, CMDQ, active)) ivpu_err_ratelimited(vdev, "MMU CMDQ write aborted\n"); - REGV_WR32(VPU_37XX_HOST_MMU_GERRORN, gerror_val); + REGV_WR32(IVPU_MMU_REG_GERRORN, gerror_val); } int ivpu_mmu_set_pgtable(struct ivpu_device *vdev, int ssid, struct ivpu_mmu_pgtable *pgtable) diff --git a/drivers/accel/ivpu/ivpu_mmu_context.c b/drivers/accel/ivpu/ivpu_mmu_context.c index ce94f40291..c1050a2df9 100644 --- a/drivers/accel/ivpu/ivpu_mmu_context.c +++ b/drivers/accel/ivpu/ivpu_mmu_context.c @@ -424,8 +424,10 @@ ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u3 INIT_LIST_HEAD(&ctx->bo_list); ret = ivpu_mmu_pgtable_init(vdev, &ctx->pgtable); - if (ret) + if (ret) { + ivpu_err(vdev, "Failed to initialize pgtable for ctx %u: %d\n", context_id, ret); return ret; + } if (!context_id) { start = vdev->hw->ranges.global.start; @@ -464,6 +466,16 @@ void ivpu_mmu_global_context_fini(struct ivpu_device *vdev) return ivpu_mmu_context_fini(vdev, &vdev->gctx); } +int ivpu_mmu_reserved_context_init(struct ivpu_device *vdev) +{ + return ivpu_mmu_user_context_init(vdev, &vdev->rctx, IVPU_RESERVED_CONTEXT_MMU_SSID); +} + +void ivpu_mmu_reserved_context_fini(struct ivpu_device *vdev) +{ + return ivpu_mmu_user_context_fini(vdev, &vdev->rctx); +} + void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid) { struct ivpu_file_priv *file_priv; @@ -485,13 +497,13 @@ int ivpu_mmu_user_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context ret = ivpu_mmu_context_init(vdev, ctx, ctx_id); if (ret) { - ivpu_err(vdev, "Failed to initialize context: %d\n", ret); + ivpu_err(vdev, "Failed to initialize context %u: %d\n", ctx_id, ret); return ret; } ret = ivpu_mmu_set_pgtable(vdev, ctx_id, &ctx->pgtable); if (ret) { - ivpu_err(vdev, "Failed to set page table: %d\n", ret); + ivpu_err(vdev, "Failed to set page table for context %u: %d\n", ctx_id, ret); goto err_context_fini; } diff --git a/drivers/accel/ivpu/ivpu_mmu_context.h b/drivers/accel/ivpu/ivpu_mmu_context.h index 961a0d6a6c..f15d8c630d 100644 --- a/drivers/accel/ivpu/ivpu_mmu_context.h +++ b/drivers/accel/ivpu/ivpu_mmu_context.h @@ -32,6 +32,8 @@ struct ivpu_mmu_context { int ivpu_mmu_global_context_init(struct ivpu_device *vdev); void ivpu_mmu_global_context_fini(struct ivpu_device *vdev); +int ivpu_mmu_reserved_context_init(struct ivpu_device *vdev); +void ivpu_mmu_reserved_context_fini(struct ivpu_device *vdev); int ivpu_mmu_user_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 ctx_id); void ivpu_mmu_user_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx); diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c index ffff2496e8..e9b16cbc26 100644 --- a/drivers/accel/ivpu/ivpu_pm.c +++ b/drivers/accel/ivpu/ivpu_pm.c @@ -37,7 +37,7 @@ static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev) static void ivpu_pm_prepare_warm_boot(struct ivpu_device *vdev) { struct ivpu_fw_info *fw = vdev->fw; - struct vpu_boot_params *bp = fw->mem->kvaddr; + struct vpu_boot_params *bp = ivpu_bo_vaddr(fw->mem); if (!bp->save_restore_ret_address) { ivpu_pm_prepare_cold_boot(vdev); @@ -246,6 +246,16 @@ int ivpu_rpm_get(struct ivpu_device *vdev) return ret; } +int ivpu_rpm_get_if_active(struct ivpu_device *vdev) +{ + int ret; + + ret = pm_runtime_get_if_active(vdev->drm.dev, false); + drm_WARN_ON(&vdev->drm, ret < 0); + + return ret; +} + void ivpu_rpm_put(struct ivpu_device *vdev) { pm_runtime_mark_last_busy(vdev->drm.dev); @@ -283,10 +293,11 @@ void ivpu_pm_reset_done_cb(struct pci_dev *pdev) pm_runtime_put_autosuspend(vdev->drm.dev); } -int ivpu_pm_init(struct ivpu_device *vdev) +void ivpu_pm_init(struct ivpu_device *vdev) { struct device *dev = vdev->drm.dev; struct ivpu_pm_info *pm = vdev->pm; + int delay; pm->vdev = vdev; pm->suspend_reschedule_counter = PM_RESCHEDULE_LIMIT; @@ -294,16 +305,15 @@ int ivpu_pm_init(struct ivpu_device *vdev) atomic_set(&pm->in_reset, 0); INIT_WORK(&pm->recovery_work, ivpu_pm_recovery_work); - pm_runtime_use_autosuspend(dev); - if (ivpu_disable_recovery) - pm_runtime_set_autosuspend_delay(dev, -1); - else if (ivpu_is_silicon(vdev)) - pm_runtime_set_autosuspend_delay(dev, 100); + delay = -1; else - pm_runtime_set_autosuspend_delay(dev, 60000); + delay = vdev->timeout.autosuspend; - return 0; + pm_runtime_use_autosuspend(dev); + pm_runtime_set_autosuspend_delay(dev, delay); + + ivpu_dbg(vdev, PM, "Autosuspend delay = %d\n", delay); } void ivpu_pm_cancel_recovery(struct ivpu_device *vdev) diff --git a/drivers/accel/ivpu/ivpu_pm.h b/drivers/accel/ivpu/ivpu_pm.h index fd4eada129..044db150be 100644 --- a/drivers/accel/ivpu/ivpu_pm.h +++ b/drivers/accel/ivpu/ivpu_pm.h @@ -19,7 +19,7 @@ struct ivpu_pm_info { u32 suspend_reschedule_counter; }; -int ivpu_pm_init(struct ivpu_device *vdev); +void ivpu_pm_init(struct ivpu_device *vdev); void ivpu_pm_enable(struct ivpu_device *vdev); void ivpu_pm_disable(struct ivpu_device *vdev); void ivpu_pm_cancel_recovery(struct ivpu_device *vdev); @@ -33,6 +33,7 @@ void ivpu_pm_reset_prepare_cb(struct pci_dev *pdev); void ivpu_pm_reset_done_cb(struct pci_dev *pdev); int __must_check ivpu_rpm_get(struct ivpu_device *vdev); +int __must_check ivpu_rpm_get_if_active(struct ivpu_device *vdev); void ivpu_rpm_put(struct ivpu_device *vdev); void ivpu_pm_schedule_recovery(struct ivpu_device *vdev); diff --git a/drivers/accel/qaic/qaic.h b/drivers/accel/qaic/qaic.h index f2bd637a0d..e3f4c30f3f 100644 --- a/drivers/accel/qaic/qaic.h +++ b/drivers/accel/qaic/qaic.h @@ -27,6 +27,9 @@ #define QAIC_DBC_OFF(i) ((i) * QAIC_DBC_SIZE + QAIC_DBC_BASE) #define to_qaic_bo(obj) container_of(obj, struct qaic_bo, base) +#define to_qaic_drm_device(dev) container_of(dev, struct qaic_drm_device, drm) +#define to_drm(qddev) (&(qddev)->drm) +#define to_accel_kdev(qddev) (to_drm(qddev)->accel->kdev) /* Return Linux device of accel node */ extern bool datapath_polling; @@ -137,6 +140,8 @@ struct qaic_device { }; struct qaic_drm_device { + /* The drm device struct of this drm device */ + struct drm_device drm; /* Pointer to the root device struct driven by this driver */ struct qaic_device *qdev; /* @@ -146,8 +151,6 @@ struct qaic_drm_device { * device is the actual physical device */ s32 partition_id; - /* Pointer to the drm device struct of this drm device */ - struct drm_device *ddev; /* Head in list of users who have opened this drm device */ struct list_head users; /* Synchronizes access to users list */ @@ -158,8 +161,6 @@ struct qaic_bo { struct drm_gem_object base; /* Scatter/gather table for allocate/imported BO */ struct sg_table *sgt; - /* BO size requested by user. GEM object might be bigger in size. */ - u64 size; /* Head in list of slices of this BO */ struct list_head slices; /* Total nents, for all slices of this BO */ @@ -221,7 +222,8 @@ struct qaic_bo { */ u32 queue_level_before; } perf_stats; - + /* Synchronizes BO operations */ + struct mutex lock; }; struct bo_slice { @@ -277,6 +279,7 @@ int qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *f int qaic_partial_execute_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int qaic_wait_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int qaic_perf_stats_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int qaic_detach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); void irq_polling_work(struct work_struct *work); #endif /* _QAIC_H_ */ diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c index ed1a5af434..d42f002bc0 100644 --- a/drivers/accel/qaic/qaic_data.c +++ b/drivers/accel/qaic/qaic_data.c @@ -154,6 +154,7 @@ static void free_slice(struct kref *kref) { struct bo_slice *slice = container_of(kref, struct bo_slice, ref_count); + slice->bo->total_slice_nents -= slice->nents; list_del(&slice->slice); drm_gem_object_put(&slice->bo->base); sg_free_table(slice->sgt); @@ -579,7 +580,7 @@ static void qaic_gem_print_info(struct drm_printer *p, unsigned int indent, { struct qaic_bo *bo = to_qaic_bo(obj); - drm_printf_indent(p, indent, "user requested size=%llu\n", bo->size); + drm_printf_indent(p, indent, "BO DMA direction %d\n", bo->dir); } static const struct vm_operations_struct drm_vm_ops = { @@ -623,6 +624,7 @@ static void qaic_free_object(struct drm_gem_object *obj) qaic_free_sgt(bo->sgt); } + mutex_destroy(&bo->lock); drm_gem_object_release(obj); kfree(bo); } @@ -634,6 +636,19 @@ static const struct drm_gem_object_funcs qaic_gem_funcs = { .vm_ops = &drm_vm_ops, }; +static void qaic_init_bo(struct qaic_bo *bo, bool reinit) +{ + if (reinit) { + bo->sliced = false; + reinit_completion(&bo->xfer_done); + } else { + mutex_init(&bo->lock); + init_completion(&bo->xfer_done); + } + complete_all(&bo->xfer_done); + INIT_LIST_HEAD(&bo->slices); +} + static struct qaic_bo *qaic_alloc_init_bo(void) { struct qaic_bo *bo; @@ -642,9 +657,7 @@ static struct qaic_bo *qaic_alloc_init_bo(void) if (!bo) return ERR_PTR(-ENOMEM); - INIT_LIST_HEAD(&bo->slices); - init_completion(&bo->xfer_done); - complete_all(&bo->xfer_done); + qaic_init_bo(bo, false); return bo; } @@ -695,8 +708,6 @@ int qaic_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *fi if (ret) goto free_bo; - bo->size = args->size; - ret = drm_gem_handle_create(file_priv, obj, &args->handle); if (ret) goto free_sgt; @@ -826,7 +837,6 @@ static int qaic_prepare_import_bo(struct qaic_bo *bo, struct qaic_attach_slice_h } bo->sgt = sgt; - bo->size = hdr->size; return 0; } @@ -836,7 +846,7 @@ static int qaic_prepare_export_bo(struct qaic_device *qdev, struct qaic_bo *bo, { int ret; - if (bo->size != hdr->size) + if (bo->base.size < hdr->size) return -EINVAL; ret = dma_map_sgtable(&qdev->pdev->dev, bo->sgt, hdr->dir, 0); @@ -855,9 +865,9 @@ static int qaic_prepare_bo(struct qaic_device *qdev, struct qaic_bo *bo, ret = qaic_prepare_import_bo(bo, hdr); else ret = qaic_prepare_export_bo(qdev, bo, hdr); - - if (ret == 0) - bo->dir = hdr->dir; + bo->dir = hdr->dir; + bo->dbc = &qdev->dbc[hdr->dbc_id]; + bo->nr_slice = hdr->count; return ret; } @@ -866,7 +876,6 @@ static void qaic_unprepare_import_bo(struct qaic_bo *bo) { dma_buf_unmap_attachment(bo->base.import_attach, bo->sgt, bo->dir); bo->sgt = NULL; - bo->size = 0; } static void qaic_unprepare_export_bo(struct qaic_device *qdev, struct qaic_bo *bo) @@ -882,6 +891,8 @@ static void qaic_unprepare_bo(struct qaic_device *qdev, struct qaic_bo *bo) qaic_unprepare_export_bo(qdev, bo); bo->dir = 0; + bo->dbc = NULL; + bo->nr_slice = 0; } static void qaic_free_slices_bo(struct qaic_bo *bo) @@ -890,6 +901,9 @@ static void qaic_free_slices_bo(struct qaic_bo *bo) list_for_each_entry_safe(slice, temp, &bo->slices, slice) kref_put(&slice->ref_count, free_slice); + if (WARN_ON_ONCE(bo->total_slice_nents != 0)) + bo->total_slice_nents = 0; + bo->nr_slice = 0; } static int qaic_attach_slicing_bo(struct qaic_device *qdev, struct qaic_bo *bo, @@ -906,15 +920,11 @@ static int qaic_attach_slicing_bo(struct qaic_device *qdev, struct qaic_bo *bo, } } - if (bo->total_slice_nents > qdev->dbc[hdr->dbc_id].nelem) { + if (bo->total_slice_nents > bo->dbc->nelem) { qaic_free_slices_bo(bo); return -ENOSPC; } - bo->sliced = true; - bo->nr_slice = hdr->count; - list_add_tail(&bo->bo_list, &qdev->dbc[hdr->dbc_id].bo_lists); - return 0; } @@ -992,10 +1002,13 @@ int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi } bo = to_qaic_bo(obj); + ret = mutex_lock_interruptible(&bo->lock); + if (ret) + goto put_bo; if (bo->sliced) { ret = -EINVAL; - goto put_bo; + goto unlock_bo; } dbc = &qdev->dbc[args->hdr.dbc_id]; @@ -1016,9 +1029,10 @@ int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi if (args->hdr.dir == DMA_TO_DEVICE) dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, args->hdr.dir); - bo->dbc = dbc; + bo->sliced = true; + list_add_tail(&bo->bo_list, &bo->dbc->bo_lists); srcu_read_unlock(&dbc->ch_lock, rcu_id); - drm_gem_object_put(obj); + mutex_unlock(&bo->lock); kfree(slice_ent); srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id); srcu_read_unlock(&usr->qddev_lock, usr_rcu_id); @@ -1029,6 +1043,8 @@ unprepare_bo: qaic_unprepare_bo(qdev, bo); unlock_ch_srcu: srcu_read_unlock(&dbc->ch_lock, rcu_id); +unlock_bo: + mutex_unlock(&bo->lock); put_bo: drm_gem_object_put(obj); free_slice_ent: @@ -1183,15 +1199,18 @@ static int send_bo_list_to_device(struct qaic_device *qdev, struct drm_file *fil } bo = to_qaic_bo(obj); + ret = mutex_lock_interruptible(&bo->lock); + if (ret) + goto failed_to_send_bo; if (!bo->sliced) { ret = -EINVAL; - goto failed_to_send_bo; + goto unlock_bo; } - if (is_partial && pexec[i].resize > bo->size) { + if (is_partial && pexec[i].resize > bo->base.size) { ret = -EINVAL; - goto failed_to_send_bo; + goto unlock_bo; } spin_lock_irqsave(&dbc->xfer_lock, flags); @@ -1200,7 +1219,7 @@ static int send_bo_list_to_device(struct qaic_device *qdev, struct drm_file *fil if (queued) { spin_unlock_irqrestore(&dbc->xfer_lock, flags); ret = -EINVAL; - goto failed_to_send_bo; + goto unlock_bo; } bo->req_id = dbc->next_req_id++; @@ -1231,17 +1250,20 @@ static int send_bo_list_to_device(struct qaic_device *qdev, struct drm_file *fil if (ret) { bo->queued = false; spin_unlock_irqrestore(&dbc->xfer_lock, flags); - goto failed_to_send_bo; + goto unlock_bo; } } reinit_completion(&bo->xfer_done); list_add_tail(&bo->xfer_list, &dbc->xfer_list); spin_unlock_irqrestore(&dbc->xfer_lock, flags); dma_sync_sgtable_for_device(&qdev->pdev->dev, bo->sgt, bo->dir); + mutex_unlock(&bo->lock); } return 0; +unlock_bo: + mutex_unlock(&bo->lock); failed_to_send_bo: if (likely(obj)) drm_gem_object_put(obj); @@ -1797,6 +1819,91 @@ unlock_usr_srcu: return ret; } +static void detach_slice_bo(struct qaic_device *qdev, struct qaic_bo *bo) +{ + qaic_free_slices_bo(bo); + qaic_unprepare_bo(qdev, bo); + qaic_init_bo(bo, true); + list_del(&bo->bo_list); + drm_gem_object_put(&bo->base); +} + +int qaic_detach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) +{ + struct qaic_detach_slice *args = data; + int rcu_id, usr_rcu_id, qdev_rcu_id; + struct dma_bridge_chan *dbc; + struct drm_gem_object *obj; + struct qaic_device *qdev; + struct qaic_user *usr; + unsigned long flags; + struct qaic_bo *bo; + int ret; + + if (args->pad != 0) + return -EINVAL; + + usr = file_priv->driver_priv; + usr_rcu_id = srcu_read_lock(&usr->qddev_lock); + if (!usr->qddev) { + ret = -ENODEV; + goto unlock_usr_srcu; + } + + qdev = usr->qddev->qdev; + qdev_rcu_id = srcu_read_lock(&qdev->dev_lock); + if (qdev->in_reset) { + ret = -ENODEV; + goto unlock_dev_srcu; + } + + obj = drm_gem_object_lookup(file_priv, args->handle); + if (!obj) { + ret = -ENOENT; + goto unlock_dev_srcu; + } + + bo = to_qaic_bo(obj); + ret = mutex_lock_interruptible(&bo->lock); + if (ret) + goto put_bo; + + if (!bo->sliced) { + ret = -EINVAL; + goto unlock_bo; + } + + dbc = bo->dbc; + rcu_id = srcu_read_lock(&dbc->ch_lock); + if (dbc->usr != usr) { + ret = -EINVAL; + goto unlock_ch_srcu; + } + + /* Check if BO is committed to H/W for DMA */ + spin_lock_irqsave(&dbc->xfer_lock, flags); + if (bo->queued) { + spin_unlock_irqrestore(&dbc->xfer_lock, flags); + ret = -EBUSY; + goto unlock_ch_srcu; + } + spin_unlock_irqrestore(&dbc->xfer_lock, flags); + + detach_slice_bo(qdev, bo); + +unlock_ch_srcu: + srcu_read_unlock(&dbc->ch_lock, rcu_id); +unlock_bo: + mutex_unlock(&bo->lock); +put_bo: + drm_gem_object_put(obj); +unlock_dev_srcu: + srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id); +unlock_usr_srcu: + srcu_read_unlock(&usr->qddev_lock, usr_rcu_id); + return ret; +} + static void empty_xfer_list(struct qaic_device *qdev, struct dma_bridge_chan *dbc) { unsigned long flags; @@ -1808,6 +1915,12 @@ static void empty_xfer_list(struct qaic_device *qdev, struct dma_bridge_chan *db bo->queued = false; list_del(&bo->xfer_list); spin_unlock_irqrestore(&dbc->xfer_lock, flags); + bo->nr_slice_xfer_done = 0; + bo->req_id = 0; + bo->perf_stats.req_received_ts = 0; + bo->perf_stats.req_submit_ts = 0; + bo->perf_stats.req_processed_ts = 0; + bo->perf_stats.queue_level_before = 0; dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, bo->dir); complete_all(&bo->xfer_done); drm_gem_object_put(&bo->base); @@ -1855,7 +1968,6 @@ void wakeup_dbc(struct qaic_device *qdev, u32 dbc_id) void release_dbc(struct qaic_device *qdev, u32 dbc_id) { - struct bo_slice *slice, *slice_temp; struct qaic_bo *bo, *bo_temp; struct dma_bridge_chan *dbc; @@ -1873,24 +1985,11 @@ void release_dbc(struct qaic_device *qdev, u32 dbc_id) dbc->usr = NULL; list_for_each_entry_safe(bo, bo_temp, &dbc->bo_lists, bo_list) { - list_for_each_entry_safe(slice, slice_temp, &bo->slices, slice) - kref_put(&slice->ref_count, free_slice); - bo->sliced = false; - INIT_LIST_HEAD(&bo->slices); - bo->total_slice_nents = 0; - bo->dir = 0; - bo->dbc = NULL; - bo->nr_slice = 0; - bo->nr_slice_xfer_done = 0; - bo->queued = false; - bo->req_id = 0; - init_completion(&bo->xfer_done); - complete_all(&bo->xfer_done); - list_del(&bo->bo_list); - bo->perf_stats.req_received_ts = 0; - bo->perf_stats.req_submit_ts = 0; - bo->perf_stats.req_processed_ts = 0; - bo->perf_stats.queue_level_before = 0; + drm_gem_object_get(&bo->base); + mutex_lock(&bo->lock); + detach_slice_bo(qdev, bo); + mutex_unlock(&bo->lock); + drm_gem_object_put(&bo->base); } dbc->in_use = false; diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c index b5de82e6eb..6f58095767 100644 --- a/drivers/accel/qaic/qaic_drv.c +++ b/drivers/accel/qaic/qaic_drv.c @@ -22,6 +22,7 @@ #include <drm/drm_file.h> #include <drm/drm_gem.h> #include <drm/drm_ioctl.h> +#include <drm/drm_managed.h> #include <uapi/drm/qaic_accel.h> #include "mhi_controller.h" @@ -55,7 +56,7 @@ static void free_usr(struct kref *kref) static int qaic_open(struct drm_device *dev, struct drm_file *file) { - struct qaic_drm_device *qddev = dev->dev_private; + struct qaic_drm_device *qddev = to_qaic_drm_device(dev); struct qaic_device *qdev = qddev->qdev; struct qaic_user *usr; int rcu_id; @@ -150,6 +151,7 @@ static const struct drm_ioctl_desc qaic_drm_ioctls[] = { DRM_IOCTL_DEF_DRV(QAIC_PARTIAL_EXECUTE_BO, qaic_partial_execute_bo_ioctl, 0), DRM_IOCTL_DEF_DRV(QAIC_WAIT_BO, qaic_wait_bo_ioctl, 0), DRM_IOCTL_DEF_DRV(QAIC_PERF_STATS_BO, qaic_perf_stats_bo_ioctl, 0), + DRM_IOCTL_DEF_DRV(QAIC_DETACH_SLICE_BO, qaic_detach_slice_bo_ioctl, 0), }; static const struct drm_driver qaic_accel_driver = { @@ -170,64 +172,39 @@ static const struct drm_driver qaic_accel_driver = { static int qaic_create_drm_device(struct qaic_device *qdev, s32 partition_id) { - struct qaic_drm_device *qddev; - struct drm_device *ddev; - struct device *pdev; + struct qaic_drm_device *qddev = qdev->qddev; + struct drm_device *drm = to_drm(qddev); int ret; /* Hold off implementing partitions until the uapi is determined */ if (partition_id != QAIC_NO_PARTITION) return -EINVAL; - pdev = &qdev->pdev->dev; - - qddev = kzalloc(sizeof(*qddev), GFP_KERNEL); - if (!qddev) - return -ENOMEM; - - ddev = drm_dev_alloc(&qaic_accel_driver, pdev); - if (IS_ERR(ddev)) { - ret = PTR_ERR(ddev); - goto ddev_fail; - } - - ddev->dev_private = qddev; - qddev->ddev = ddev; - - qddev->qdev = qdev; qddev->partition_id = partition_id; - INIT_LIST_HEAD(&qddev->users); - mutex_init(&qddev->users_mutex); - - qdev->qddev = qddev; - - ret = drm_dev_register(ddev, 0); - if (ret) { - pci_dbg(qdev->pdev, "%s: drm_dev_register failed %d\n", __func__, ret); - goto drm_reg_fail; - } - return 0; + /* + * drm_dev_unregister() sets the driver data to NULL and + * drm_dev_register() does not update the driver data. During a SOC + * reset drm dev is unregistered and registered again leaving the + * driver data to NULL. + */ + dev_set_drvdata(to_accel_kdev(qddev), drm->accel); + ret = drm_dev_register(drm, 0); + if (ret) + pci_dbg(qdev->pdev, "drm_dev_register failed %d\n", ret); -drm_reg_fail: - mutex_destroy(&qddev->users_mutex); - qdev->qddev = NULL; - drm_dev_put(ddev); -ddev_fail: - kfree(qddev); return ret; } static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 partition_id) { - struct qaic_drm_device *qddev; + struct qaic_drm_device *qddev = qdev->qddev; + struct drm_device *drm = to_drm(qddev); struct qaic_user *usr; - qddev = qdev->qddev; - qdev->qddev = NULL; - if (!qddev) - return; - + drm_dev_get(drm); + drm_dev_unregister(drm); + qddev->partition_id = 0; /* * Existing users get unresolvable errors till they close FDs. * Need to sync carefully with users calling close(). The @@ -254,13 +231,7 @@ static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 partition_id) mutex_lock(&qddev->users_mutex); } mutex_unlock(&qddev->users_mutex); - - if (qddev->ddev) { - drm_dev_unregister(qddev->ddev); - drm_dev_put(qddev->ddev); - } - - kfree(qddev); + drm_dev_put(drm); } static int qaic_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id) @@ -344,8 +315,20 @@ void qaic_dev_reset_clean_local_state(struct qaic_device *qdev, bool exit_reset) qdev->in_reset = false; } +static void cleanup_qdev(struct qaic_device *qdev) +{ + int i; + + for (i = 0; i < qdev->num_dbc; ++i) + cleanup_srcu_struct(&qdev->dbc[i].ch_lock); + cleanup_srcu_struct(&qdev->dev_lock); + pci_set_drvdata(qdev->pdev, NULL); + destroy_workqueue(qdev->cntl_wq); +} + static struct qaic_device *create_qdev(struct pci_dev *pdev, const struct pci_device_id *id) { + struct qaic_drm_device *qddev; struct qaic_device *qdev; int i; @@ -381,18 +364,18 @@ static struct qaic_device *create_qdev(struct pci_dev *pdev, const struct pci_de INIT_LIST_HEAD(&qdev->dbc[i].bo_lists); } - return qdev; -} + qddev = devm_drm_dev_alloc(&pdev->dev, &qaic_accel_driver, struct qaic_drm_device, drm); + if (IS_ERR(qddev)) { + cleanup_qdev(qdev); + return NULL; + } -static void cleanup_qdev(struct qaic_device *qdev) -{ - int i; + drmm_mutex_init(to_drm(qddev), &qddev->users_mutex); + INIT_LIST_HEAD(&qddev->users); + qddev->qdev = qdev; + qdev->qddev = qddev; - for (i = 0; i < qdev->num_dbc; ++i) - cleanup_srcu_struct(&qdev->dbc[i].ch_lock); - cleanup_srcu_struct(&qdev->dev_lock); - pci_set_drvdata(qdev->pdev, NULL); - destroy_workqueue(qdev->cntl_wq); + return qdev; } static int init_pci(struct qaic_device *qdev, struct pci_dev *pdev) @@ -591,22 +574,22 @@ static int __init qaic_init(void) { int ret; - ret = mhi_driver_register(&qaic_mhi_driver); + ret = pci_register_driver(&qaic_pci_driver); if (ret) { - pr_debug("qaic: mhi_driver_register failed %d\n", ret); + pr_debug("qaic: pci_register_driver failed %d\n", ret); return ret; } - ret = pci_register_driver(&qaic_pci_driver); + ret = mhi_driver_register(&qaic_mhi_driver); if (ret) { - pr_debug("qaic: pci_register_driver failed %d\n", ret); - goto free_mhi; + pr_debug("qaic: mhi_driver_register failed %d\n", ret); + goto free_pci; } return 0; -free_mhi: - mhi_driver_unregister(&qaic_mhi_driver); +free_pci: + pci_unregister_driver(&qaic_pci_driver); return ret; } @@ -628,8 +611,8 @@ static void __exit qaic_exit(void) * reinitializing the link_up state after the cleanup is done. */ link_up = true; - pci_unregister_driver(&qaic_pci_driver); mhi_driver_unregister(&qaic_mhi_driver); + pci_unregister_driver(&qaic_pci_driver); } module_init(qaic_init); diff --git a/drivers/accel/habanalabs/include/common/cpucp_if.h b/include/linux/habanalabs/cpucp_if.h index 33807b839c..86ea7c63a0 100644 --- a/drivers/accel/habanalabs/include/common/cpucp_if.h +++ b/include/linux/habanalabs/cpucp_if.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 * - * Copyright 2020-2022 HabanaLabs, Ltd. + * Copyright 2020-2023 HabanaLabs, Ltd. * All Rights Reserved. * */ @@ -33,6 +33,17 @@ #define PLL_MAP_MAX_BITS 128 #define PLL_MAP_LEN (PLL_MAP_MAX_BITS / 8) +enum eq_event_id { + EQ_EVENT_NIC_STS_REQUEST = 0, + EQ_EVENT_PWR_MODE_0, + EQ_EVENT_PWR_MODE_1, + EQ_EVENT_PWR_MODE_2, + EQ_EVENT_PWR_MODE_3, + EQ_EVENT_PWR_BRK_ENTRY, + EQ_EVENT_PWR_BRK_EXIT, + EQ_EVENT_HEARTBEAT, +}; + /* * info of the pkt queue pointers in the first async occurrence */ @@ -69,7 +80,8 @@ struct hl_eq_ecc_data { __le64 ecc_syndrom; __u8 memory_wrapper_idx; __u8 is_critical; - __u8 pad[6]; + __le16 block_id; + __u8 pad[4]; }; enum hl_sm_sei_cause { @@ -656,18 +668,19 @@ enum pq_init_status { * Obsolete. * * CPUCP_PACKET_GENERIC_PASSTHROUGH - - * Generic opcode for all firmware info that is only passed to host - * through the LKD, without getting parsed there. + * Generic opcode for all firmware info that is only passed to host + * through the LKD, without getting parsed there. * * CPUCP_PACKET_ACTIVE_STATUS_SET - * LKD sends FW indication whether device is free or in use, this indication is reported * also to the BMC. * - * CPUCP_PACKET_REGISTER_INTERRUPTS - - * Packet to register interrupts indicating LKD is ready to receive events from FW. - * * CPUCP_PACKET_SOFT_RESET - - * Packet to perform soft-reset. + * Packet to perform soft-reset. + * + * CPUCP_PACKET_INTS_REGISTER - + * Packet to inform FW that queues have been established and LKD is ready to receive + * EQ events. */ enum cpucp_packet_id { @@ -733,8 +746,9 @@ enum cpucp_packet_id { CPUCP_PACKET_RESERVED10, /* not used */ CPUCP_PACKET_RESERVED11, /* not used */ CPUCP_PACKET_RESERVED12, /* internal */ - CPUCP_PACKET_REGISTER_INTERRUPTS, /* internal */ + CPUCP_PACKET_RESERVED13, /* internal */ CPUCP_PACKET_SOFT_RESET, /* internal */ + CPUCP_PACKET_INTS_REGISTER, /* internal */ CPUCP_PACKET_ID_MAX /* must be last */ }; @@ -987,6 +1001,7 @@ enum cpucp_msi_type { CPUCP_NIC_PORT5_MSI_TYPE, CPUCP_NIC_PORT7_MSI_TYPE, CPUCP_NIC_PORT9_MSI_TYPE, + CPUCP_EVENT_QUEUE_ERR_MSI_TYPE, CPUCP_NUM_OF_MSI_TYPES }; @@ -1137,6 +1152,7 @@ struct cpucp_security_info { * (0 = functional 1 = binned) * @interposer_version: Interposer version programmed in eFuse * @substrate_version: Substrate version programmed in eFuse + * @eq_health_check_supported: eq health check feature supported in FW. * @fw_hbm_region_size: Size in bytes of FW reserved region in HBM. * @fw_os_version: Firmware OS Version */ @@ -1163,7 +1179,7 @@ struct cpucp_info { __u8 xbar_binning_mask; __u8 interposer_version; __u8 substrate_version; - __u8 reserved2; + __u8 eq_health_check_supported; struct cpucp_security_info sec_info; __le32 fw_hbm_region_size; __u8 pll_map[PLL_MAP_LEN]; diff --git a/drivers/accel/habanalabs/include/common/hl_boot_if.h b/include/linux/habanalabs/hl_boot_if.h index cff79f7f9f..93366d5621 100644 --- a/drivers/accel/habanalabs/include/common/hl_boot_if.h +++ b/include/linux/habanalabs/hl_boot_if.h @@ -394,6 +394,8 @@ enum cpu_boot_status { CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT = 16, /* Internal Security has been initialized, device can be accessed */ CPU_BOOT_STATUS_SECURITY_READY = 17, + /* FW component is preparing to shutdown and communication with host is not available */ + CPU_BOOT_STATUS_FW_SHUTDOWN_PREP = 18, }; enum kmd_msg { @@ -570,6 +572,8 @@ struct lkd_fw_comms_desc { __le64 img_addr; /* address for next FW component load */ struct lkd_fw_binning_info binning_info; struct lkd_fw_ascii_msg ascii_msg[LKD_FW_ASCII_MSG_MAX]; + __le32 rsvd_mem_size_mb; /* reserved memory size [MB] for FW/SVE */ + char reserved1[4]; }; enum comms_reset_cause { @@ -596,6 +600,9 @@ struct lkd_fw_comms_msg { __le64 img_addr; struct lkd_fw_binning_info binning_info; struct lkd_fw_ascii_msg ascii_msg[LKD_FW_ASCII_MSG_MAX]; + /* reserved memory size [MB] for FW/SVE */ + __le32 rsvd_mem_size_mb; + char reserved1[4]; }; struct { __u8 reset_cause; |