diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 17:40:19 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 17:40:19 +0000 |
commit | 9f0fc191371843c4fc000a226b0a26b6c059aacd (patch) | |
tree | 35f8be3ef04506ac891ad001e8c41e535ae8d01d /drivers/accel/habanalabs/common/command_submission.c | |
parent | Releasing progress-linux version 6.6.15-2~progress7.99u1. (diff) | |
download | linux-9f0fc191371843c4fc000a226b0a26b6c059aacd.tar.xz linux-9f0fc191371843c4fc000a226b0a26b6c059aacd.zip |
Merging upstream version 6.7.7.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/accel/habanalabs/common/command_submission.c')
-rw-r--r-- | drivers/accel/habanalabs/common/command_submission.c | 488 |
1 files changed, 292 insertions, 196 deletions
diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c index c23829dab9..3aa6eeef44 100644 --- a/drivers/accel/habanalabs/common/command_submission.c +++ b/drivers/accel/habanalabs/common/command_submission.c @@ -31,6 +31,24 @@ enum hl_cs_wait_status { CS_WAIT_STATUS_GONE }; +/* + * Data used while handling wait/timestamp nodes. + * The purpose of this struct is to store the needed data for both operations + * in one variable instead of passing large number of arguments to functions. + */ +struct wait_interrupt_data { + struct hl_user_interrupt *interrupt; + struct hl_mmap_mem_buf *buf; + struct hl_mem_mgr *mmg; + struct hl_cb *cq_cb; + u64 ts_handle; + u64 ts_offset; + u64 cq_handle; + u64 cq_offset; + u64 target_value; + u64 intr_timeout_us; +}; + static void job_wq_completion(struct work_struct *work); static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, u64 timeout_us, u64 seq, enum hl_cs_wait_status *status, s64 *timestamp); @@ -1079,19 +1097,22 @@ static void wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt) { struct hl_user_pending_interrupt *pend, *temp; + unsigned long flags; - spin_lock(&interrupt->wait_list_lock); - list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, wait_list_node) { - if (pend->ts_reg_info.buf) { - list_del(&pend->wait_list_node); - hl_mmap_mem_buf_put(pend->ts_reg_info.buf); - hl_cb_put(pend->ts_reg_info.cq_cb); - } else { - pend->fence.error = -EIO; - complete_all(&pend->fence.completion); - } + spin_lock_irqsave(&interrupt->wait_list_lock, flags); + list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, list_node) { + pend->fence.error = -EIO; + complete_all(&pend->fence.completion); } - spin_unlock(&interrupt->wait_list_lock); + spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); + + spin_lock_irqsave(&interrupt->ts_list_lock, flags); + list_for_each_entry_safe(pend, temp, &interrupt->ts_list_head, list_node) { + list_del(&pend->list_node); + hl_mmap_mem_buf_put(pend->ts_reg_info.buf); + hl_cb_put(pend->ts_reg_info.cq_cb); + } + spin_unlock_irqrestore(&interrupt->ts_list_lock, flags); } void hl_release_pending_user_interrupts(struct hl_device *hdev) @@ -1730,16 +1751,11 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args, /* Need to wait for restore completion before execution phase */ if (num_chunks) { enum hl_cs_wait_status status; -wait_again: + ret = _hl_cs_wait_ioctl(hdev, ctx, jiffies_to_usecs(hdev->timeout_jiffies), *cs_seq, &status, NULL); if (ret) { - if (ret == -ERESTARTSYS) { - usleep_range(100, 200); - goto wait_again; - } - dev_err(hdev->dev, "Restore CS for context %d failed to complete %d\n", ctx->asid, ret); @@ -2539,8 +2555,9 @@ static int cs_ioctl_flush_pci_hbw_writes(struct hl_fpriv *hpriv) return 0; } -int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) +int hl_cs_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv) { + struct hl_fpriv *hpriv = file_priv->driver_priv; union hl_cs_args *args = data; enum hl_cs_type cs_type = 0; u64 cs_seq = ULONG_MAX; @@ -3197,166 +3214,241 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) return 0; } -static int ts_buff_get_kernel_ts_record(struct hl_mmap_mem_buf *buf, - struct hl_cb *cq_cb, - u64 ts_offset, u64 cq_offset, u64 target_value, - spinlock_t *wait_list_lock, - struct hl_user_pending_interrupt **pend) +static inline void set_record_cq_info(struct hl_user_pending_interrupt *record, + struct hl_cb *cq_cb, u32 cq_offset, u32 target_value) { - struct hl_ts_buff *ts_buff = buf->private; - struct hl_user_pending_interrupt *requested_offset_record = - (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address + - ts_offset; - struct hl_user_pending_interrupt *cb_last = - (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address + + record->ts_reg_info.cq_cb = cq_cb; + record->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_offset; + record->cq_target_value = target_value; +} + +static int validate_and_get_ts_record(struct device *dev, + struct hl_ts_buff *ts_buff, u64 ts_offset, + struct hl_user_pending_interrupt **req_event_record) +{ + struct hl_user_pending_interrupt *ts_cb_last; + + *req_event_record = (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address + + ts_offset; + ts_cb_last = (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address + (ts_buff->kernel_buff_size / sizeof(struct hl_user_pending_interrupt)); - unsigned long iter_counter = 0; - u64 current_cq_counter; - ktime_t timestamp; /* Validate ts_offset not exceeding last max */ - if (requested_offset_record >= cb_last) { - dev_err(buf->mmg->dev, "Ts offset exceeds max CB offset(0x%llx)\n", - (u64)(uintptr_t)cb_last); + if (*req_event_record >= ts_cb_last) { + dev_err(dev, "Ts offset(%llu) exceeds max CB offset(0x%llx)\n", + ts_offset, (u64)(uintptr_t)ts_cb_last); return -EINVAL; } - timestamp = ktime_get(); + return 0; +} -start_over: - spin_lock(wait_list_lock); +static void unregister_timestamp_node(struct hl_device *hdev, + struct hl_user_pending_interrupt *record, bool need_lock) +{ + struct hl_user_interrupt *interrupt = record->ts_reg_info.interrupt; + bool ts_rec_found = false; + unsigned long flags; - /* Unregister only if we didn't reach the target value - * since in this case there will be no handling in irq context - * and then it's safe to delete the node out of the interrupt list - * then re-use it on other interrupt - */ - if (requested_offset_record->ts_reg_info.in_use) { - current_cq_counter = *requested_offset_record->cq_kernel_addr; - if (current_cq_counter < requested_offset_record->cq_target_value) { - list_del(&requested_offset_record->wait_list_node); - spin_unlock(wait_list_lock); + if (need_lock) + spin_lock_irqsave(&interrupt->ts_list_lock, flags); - hl_mmap_mem_buf_put(requested_offset_record->ts_reg_info.buf); - hl_cb_put(requested_offset_record->ts_reg_info.cq_cb); + if (record->ts_reg_info.in_use) { + record->ts_reg_info.in_use = false; + list_del(&record->list_node); + ts_rec_found = true; + } - dev_dbg(buf->mmg->dev, - "ts node removed from interrupt list now can re-use\n"); - } else { - dev_dbg(buf->mmg->dev, - "ts node in middle of irq handling\n"); - - /* irq thread handling in the middle give it time to finish */ - spin_unlock(wait_list_lock); - usleep_range(100, 1000); - if (++iter_counter == MAX_TS_ITER_NUM) { - dev_err(buf->mmg->dev, - "Timestamp offset processing reached timeout of %lld ms\n", - ktime_ms_delta(ktime_get(), timestamp)); - return -EAGAIN; - } + if (need_lock) + spin_unlock_irqrestore(&interrupt->ts_list_lock, flags); - goto start_over; + /* Put refcounts that were taken when we registered the event */ + if (ts_rec_found) { + hl_mmap_mem_buf_put(record->ts_reg_info.buf); + hl_cb_put(record->ts_reg_info.cq_cb); + } +} + +static int ts_get_and_handle_kernel_record(struct hl_device *hdev, struct hl_ctx *ctx, + struct wait_interrupt_data *data, unsigned long *flags, + struct hl_user_pending_interrupt **pend) +{ + struct hl_user_pending_interrupt *req_offset_record; + struct hl_ts_buff *ts_buff = data->buf->private; + bool need_lock = false; + int rc; + + rc = validate_and_get_ts_record(data->buf->mmg->dev, ts_buff, data->ts_offset, + &req_offset_record); + if (rc) + return rc; + + /* In case the node already registered, need to unregister first then re-use */ + if (req_offset_record->ts_reg_info.in_use) { + dev_dbg(data->buf->mmg->dev, + "Requested record %p is in use on irq: %u ts addr: %p, unregister first then put on irq: %u\n", + req_offset_record, + req_offset_record->ts_reg_info.interrupt->interrupt_id, + req_offset_record->ts_reg_info.timestamp_kernel_addr, + data->interrupt->interrupt_id); + /* + * Since interrupt here can be different than the one the node currently registered + * on, and we don't want to lock two lists while we're doing unregister, so + * unlock the new interrupt wait list here and acquire the lock again after you done + */ + if (data->interrupt->interrupt_id != + req_offset_record->ts_reg_info.interrupt->interrupt_id) { + + need_lock = true; + spin_unlock_irqrestore(&data->interrupt->ts_list_lock, *flags); } - } else { - /* Fill up the new registration node info */ - requested_offset_record->ts_reg_info.buf = buf; - requested_offset_record->ts_reg_info.cq_cb = cq_cb; - requested_offset_record->ts_reg_info.timestamp_kernel_addr = - (u64 *) ts_buff->user_buff_address + ts_offset; - requested_offset_record->cq_kernel_addr = - (u64 *) cq_cb->kernel_address + cq_offset; - requested_offset_record->cq_target_value = target_value; - spin_unlock(wait_list_lock); + unregister_timestamp_node(hdev, req_offset_record, need_lock); + + if (need_lock) + spin_lock_irqsave(&data->interrupt->ts_list_lock, *flags); } - *pend = requested_offset_record; + /* Fill up the new registration node info and add it to the list */ + req_offset_record->ts_reg_info.in_use = true; + req_offset_record->ts_reg_info.buf = data->buf; + req_offset_record->ts_reg_info.timestamp_kernel_addr = + (u64 *) ts_buff->user_buff_address + data->ts_offset; + req_offset_record->ts_reg_info.interrupt = data->interrupt; + set_record_cq_info(req_offset_record, data->cq_cb, data->cq_offset, + data->target_value); - dev_dbg(buf->mmg->dev, "Found available node in TS kernel CB %p\n", - requested_offset_record); - return 0; + *pend = req_offset_record; + + return rc; +} + +static int _hl_interrupt_ts_reg_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, + struct wait_interrupt_data *data, + u32 *status, u64 *timestamp) +{ + struct hl_user_pending_interrupt *pend; + unsigned long flags; + int rc = 0; + + hl_ctx_get(ctx); + + data->cq_cb = hl_cb_get(data->mmg, data->cq_handle); + if (!data->cq_cb) { + rc = -EINVAL; + goto put_ctx; + } + + /* Validate the cq offset */ + if (((u64 *) data->cq_cb->kernel_address + data->cq_offset) >= + ((u64 *) data->cq_cb->kernel_address + (data->cq_cb->size / sizeof(u64)))) { + rc = -EINVAL; + goto put_cq_cb; + } + + dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, handle: 0x%llx, ts offset: %llu, cq_offset: %llu\n", + data->interrupt->interrupt_id, data->ts_handle, + data->ts_offset, data->cq_offset); + + data->buf = hl_mmap_mem_buf_get(data->mmg, data->ts_handle); + if (!data->buf) { + rc = -EINVAL; + goto put_cq_cb; + } + + spin_lock_irqsave(&data->interrupt->ts_list_lock, flags); + + /* get ts buffer record */ + rc = ts_get_and_handle_kernel_record(hdev, ctx, data, &flags, &pend); + if (rc) { + spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags); + goto put_ts_buff; + } + + /* We check for completion value as interrupt could have been received + * before we add the timestamp node to the ts list. + */ + if (*pend->cq_kernel_addr >= data->target_value) { + spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags); + + dev_dbg(hdev->dev, "Target value already reached release ts record: pend: %p, offset: %llu, interrupt: %u\n", + pend, data->ts_offset, data->interrupt->interrupt_id); + + pend->ts_reg_info.in_use = 0; + *status = HL_WAIT_CS_STATUS_COMPLETED; + *pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns(); + + goto put_ts_buff; + } + + list_add_tail(&pend->list_node, &data->interrupt->ts_list_head); + spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags); + + rc = *status = HL_WAIT_CS_STATUS_COMPLETED; + + hl_ctx_put(ctx); + + return rc; + +put_ts_buff: + hl_mmap_mem_buf_put(data->buf); +put_cq_cb: + hl_cb_put(data->cq_cb); +put_ctx: + hl_ctx_put(ctx); + + return rc; } static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, - struct hl_mem_mgr *cb_mmg, struct hl_mem_mgr *mmg, - u64 timeout_us, u64 cq_counters_handle, u64 cq_counters_offset, - u64 target_value, struct hl_user_interrupt *interrupt, - bool register_ts_record, u64 ts_handle, u64 ts_offset, + struct wait_interrupt_data *data, u32 *status, u64 *timestamp) { struct hl_user_pending_interrupt *pend; - struct hl_mmap_mem_buf *buf; - struct hl_cb *cq_cb; - unsigned long timeout; + unsigned long timeout, flags; long completion_rc; int rc = 0; - timeout = hl_usecs64_to_jiffies(timeout_us); + timeout = hl_usecs64_to_jiffies(data->intr_timeout_us); hl_ctx_get(ctx); - cq_cb = hl_cb_get(cb_mmg, cq_counters_handle); - if (!cq_cb) { + data->cq_cb = hl_cb_get(data->mmg, data->cq_handle); + if (!data->cq_cb) { rc = -EINVAL; goto put_ctx; } /* Validate the cq offset */ - if (((u64 *) cq_cb->kernel_address + cq_counters_offset) >= - ((u64 *) cq_cb->kernel_address + (cq_cb->size / sizeof(u64)))) { + if (((u64 *) data->cq_cb->kernel_address + data->cq_offset) >= + ((u64 *) data->cq_cb->kernel_address + (data->cq_cb->size / sizeof(u64)))) { rc = -EINVAL; goto put_cq_cb; } - if (register_ts_record) { - dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, ts offset: %llu, cq_offset: %llu\n", - interrupt->interrupt_id, ts_offset, cq_counters_offset); - buf = hl_mmap_mem_buf_get(mmg, ts_handle); - if (!buf) { - rc = -EINVAL; - goto put_cq_cb; - } - - /* get ts buffer record */ - rc = ts_buff_get_kernel_ts_record(buf, cq_cb, ts_offset, - cq_counters_offset, target_value, - &interrupt->wait_list_lock, &pend); - if (rc) - goto put_ts_buff; - } else { - pend = kzalloc(sizeof(*pend), GFP_KERNEL); - if (!pend) { - rc = -ENOMEM; - goto put_cq_cb; - } - hl_fence_init(&pend->fence, ULONG_MAX); - pend->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_counters_offset; - pend->cq_target_value = target_value; + pend = kzalloc(sizeof(*pend), GFP_KERNEL); + if (!pend) { + rc = -ENOMEM; + goto put_cq_cb; } - spin_lock(&interrupt->wait_list_lock); + hl_fence_init(&pend->fence, ULONG_MAX); + pend->cq_kernel_addr = (u64 *) data->cq_cb->kernel_address + data->cq_offset; + pend->cq_target_value = data->target_value; + spin_lock_irqsave(&data->interrupt->wait_list_lock, flags); + /* We check for completion value as interrupt could have been received - * before we added the node to the wait list + * before we add the wait node to the wait list. */ - if (*pend->cq_kernel_addr >= target_value) { - if (register_ts_record) - pend->ts_reg_info.in_use = 0; - spin_unlock(&interrupt->wait_list_lock); + if (*pend->cq_kernel_addr >= data->target_value || (!data->intr_timeout_us)) { + spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags); - *status = HL_WAIT_CS_STATUS_COMPLETED; + if (*pend->cq_kernel_addr >= data->target_value) + *status = HL_WAIT_CS_STATUS_COMPLETED; + else + *status = HL_WAIT_CS_STATUS_BUSY; - if (register_ts_record) { - *pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns(); - goto put_ts_buff; - } else { - pend->fence.timestamp = ktime_get(); - goto set_timestamp; - } - } else if (!timeout_us) { - spin_unlock(&interrupt->wait_list_lock); - *status = HL_WAIT_CS_STATUS_BUSY; pend->fence.timestamp = ktime_get(); goto set_timestamp; } @@ -3366,55 +3458,38 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, * Note that we cannot have sorted list by target value, * in order to shorten the list pass loop, since * same list could have nodes for different cq counter handle. - * Note: - * Mark ts buff offset as in use here in the spinlock protection area - * to avoid getting in the re-use section in ts_buff_get_kernel_ts_record - * before adding the node to the list. this scenario might happen when - * multiple threads are racing on same offset and one thread could - * set the ts buff in ts_buff_get_kernel_ts_record then the other thread - * takes over and get to ts_buff_get_kernel_ts_record and then we will try - * to re-use the same ts buff offset, and will try to delete a non existing - * node from the list. */ - if (register_ts_record) - pend->ts_reg_info.in_use = 1; - - list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head); - spin_unlock(&interrupt->wait_list_lock); - - if (register_ts_record) { - rc = *status = HL_WAIT_CS_STATUS_COMPLETED; - goto ts_registration_exit; - } + list_add_tail(&pend->list_node, &data->interrupt->wait_list_head); + spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags); /* Wait for interrupt handler to signal completion */ completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion, timeout); if (completion_rc > 0) { - *status = HL_WAIT_CS_STATUS_COMPLETED; + if (pend->fence.error == -EIO) { + dev_err_ratelimited(hdev->dev, + "interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n", + pend->fence.error); + rc = -EIO; + *status = HL_WAIT_CS_STATUS_ABORTED; + } else { + *status = HL_WAIT_CS_STATUS_COMPLETED; + } } else { if (completion_rc == -ERESTARTSYS) { dev_err_ratelimited(hdev->dev, "user process got signal while waiting for interrupt ID %d\n", - interrupt->interrupt_id); + data->interrupt->interrupt_id); rc = -EINTR; *status = HL_WAIT_CS_STATUS_ABORTED; } else { - if (pend->fence.error == -EIO) { - dev_err_ratelimited(hdev->dev, - "interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n", - pend->fence.error); - rc = -EIO; - *status = HL_WAIT_CS_STATUS_ABORTED; - } else { - /* The wait has timed-out. We don't know anything beyond that - * because the workload wasn't submitted through the driver. - * Therefore, from driver's perspective, the workload is still - * executing. - */ - rc = 0; - *status = HL_WAIT_CS_STATUS_BUSY; - } + /* The wait has timed-out. We don't know anything beyond that + * because the workload was not submitted through the driver. + * Therefore, from driver's perspective, the workload is still + * executing. + */ + rc = 0; + *status = HL_WAIT_CS_STATUS_BUSY; } } @@ -3424,23 +3499,20 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, * for ts record, the node will be deleted in the irq handler after * we reach the target value. */ - spin_lock(&interrupt->wait_list_lock); - list_del(&pend->wait_list_node); - spin_unlock(&interrupt->wait_list_lock); + spin_lock_irqsave(&data->interrupt->wait_list_lock, flags); + list_del(&pend->list_node); + spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags); set_timestamp: *timestamp = ktime_to_ns(pend->fence.timestamp); kfree(pend); - hl_cb_put(cq_cb); -ts_registration_exit: + hl_cb_put(data->cq_cb); hl_ctx_put(ctx); return rc; -put_ts_buff: - hl_mmap_mem_buf_put(buf); put_cq_cb: - hl_cb_put(cq_cb); + hl_cb_put(data->cq_cb); put_ctx: hl_ctx_put(ctx); @@ -3454,7 +3526,7 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_ u64 *timestamp) { struct hl_user_pending_interrupt *pend; - unsigned long timeout; + unsigned long timeout, flags; u64 completion_value; long completion_rc; int rc = 0; @@ -3474,9 +3546,9 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_ /* Add pending user interrupt to relevant list for the interrupt * handler to monitor */ - spin_lock(&interrupt->wait_list_lock); - list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head); - spin_unlock(&interrupt->wait_list_lock); + spin_lock_irqsave(&interrupt->wait_list_lock, flags); + list_add_tail(&pend->list_node, &interrupt->wait_list_head); + spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); /* We check for completion value as interrupt could have been received * before we added the node to the wait list @@ -3507,14 +3579,14 @@ wait_again: * If comparison fails, keep waiting until timeout expires */ if (completion_rc > 0) { - spin_lock(&interrupt->wait_list_lock); + spin_lock_irqsave(&interrupt->wait_list_lock, flags); /* reinit_completion must be called before we check for user * completion value, otherwise, if interrupt is received after * the comparison and before the next wait_for_completion, * we will reach timeout and fail */ reinit_completion(&pend->fence.completion); - spin_unlock(&interrupt->wait_list_lock); + spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) { dev_err(hdev->dev, "Failed to copy completion value from user\n"); @@ -3551,9 +3623,9 @@ wait_again: } remove_pending_user_interrupt: - spin_lock(&interrupt->wait_list_lock); - list_del(&pend->wait_list_node); - spin_unlock(&interrupt->wait_list_lock); + spin_lock_irqsave(&interrupt->wait_list_lock, flags); + list_del(&pend->list_node); + spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); *timestamp = ktime_to_ns(pend->fence.timestamp); @@ -3611,19 +3683,42 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data) return -EINVAL; } - if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ) - rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->mem_mgr, &hpriv->mem_mgr, - args->in.interrupt_timeout_us, args->in.cq_counters_handle, - args->in.cq_counters_offset, - args->in.target, interrupt, - !!(args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT), - args->in.timestamp_handle, args->in.timestamp_offset, - &status, ×tamp); - else + if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ) { + struct wait_interrupt_data wait_intr_data = {0}; + + wait_intr_data.interrupt = interrupt; + wait_intr_data.mmg = &hpriv->mem_mgr; + wait_intr_data.cq_handle = args->in.cq_counters_handle; + wait_intr_data.cq_offset = args->in.cq_counters_offset; + wait_intr_data.ts_handle = args->in.timestamp_handle; + wait_intr_data.ts_offset = args->in.timestamp_offset; + wait_intr_data.target_value = args->in.target; + wait_intr_data.intr_timeout_us = args->in.interrupt_timeout_us; + + if (args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT) { + /* + * Allow only one registration at a time. this is needed in order to prevent + * issues while handling the flow of re-use of the same offset. + * Since the registration flow is protected only by the interrupt lock, + * re-use flow might request to move ts node to another interrupt list, + * and in such case we're not protected. + */ + mutex_lock(&hpriv->ctx->ts_reg_lock); + + rc = _hl_interrupt_ts_reg_ioctl(hdev, hpriv->ctx, &wait_intr_data, + &status, ×tamp); + + mutex_unlock(&hpriv->ctx->ts_reg_lock); + } else + rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &wait_intr_data, + &status, ×tamp); + } else { rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx, args->in.interrupt_timeout_us, args->in.addr, args->in.target, interrupt, &status, ×tamp); + } + if (rc) return rc; @@ -3638,8 +3733,9 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data) return 0; } -int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data) +int hl_wait_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv) { + struct hl_fpriv *hpriv = file_priv->driver_priv; struct hl_device *hdev = hpriv->hdev; union hl_wait_cs_args *args = data; u32 flags = args->in.flags; |