diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 10:05:51 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 10:05:51 +0000 |
commit | 5d1646d90e1f2cceb9f0828f4b28318cd0ec7744 (patch) | |
tree | a94efe259b9009378be6d90eb30d2b019d95c194 /drivers/misc/habanalabs/common | |
parent | Initial commit. (diff) | |
download | linux-430c2fc249ea5c0536abd21c23382884005c9093.tar.xz linux-430c2fc249ea5c0536abd21c23382884005c9093.zip |
Adding upstream version 5.10.209.upstream/5.10.209upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/misc/habanalabs/common')
19 files changed, 15035 insertions, 0 deletions
diff --git a/drivers/misc/habanalabs/common/Makefile b/drivers/misc/habanalabs/common/Makefile new file mode 100644 index 000000000..eccd8c7dc --- /dev/null +++ b/drivers/misc/habanalabs/common/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only +HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \ + common/asid.o common/habanalabs_ioctl.o \ + common/command_buffer.o common/hw_queue.o common/irq.o \ + common/sysfs.o common/hwmon.o common/memory.o \ + common/command_submission.o common/mmu.o common/mmu_v1.o \ + common/firmware_if.o common/pci.o diff --git a/drivers/misc/habanalabs/common/asid.c b/drivers/misc/habanalabs/common/asid.c new file mode 100644 index 000000000..a2fdf31cf --- /dev/null +++ b/drivers/misc/habanalabs/common/asid.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include "habanalabs.h" + +#include <linux/slab.h> + +int hl_asid_init(struct hl_device *hdev) +{ + hdev->asid_bitmap = kcalloc(BITS_TO_LONGS(hdev->asic_prop.max_asid), + sizeof(*hdev->asid_bitmap), GFP_KERNEL); + if (!hdev->asid_bitmap) + return -ENOMEM; + + mutex_init(&hdev->asid_mutex); + + /* ASID 0 is reserved for the kernel driver and device CPU */ + set_bit(0, hdev->asid_bitmap); + + return 0; +} + +void hl_asid_fini(struct hl_device *hdev) +{ + mutex_destroy(&hdev->asid_mutex); + kfree(hdev->asid_bitmap); +} + +unsigned long hl_asid_alloc(struct hl_device *hdev) +{ + unsigned long found; + + mutex_lock(&hdev->asid_mutex); + + found = find_first_zero_bit(hdev->asid_bitmap, + hdev->asic_prop.max_asid); + if (found == hdev->asic_prop.max_asid) + found = 0; + else + set_bit(found, hdev->asid_bitmap); + + mutex_unlock(&hdev->asid_mutex); + + return found; +} + +void hl_asid_free(struct hl_device *hdev, unsigned long asid) +{ + if (WARN((asid == 0 || asid >= hdev->asic_prop.max_asid), + "Invalid ASID %lu", asid)) + return; + clear_bit(asid, hdev->asid_bitmap); +} diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c new file mode 100644 index 000000000..ada570f35 --- /dev/null +++ b/drivers/misc/habanalabs/common/command_buffer.c @@ -0,0 +1,687 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include <uapi/misc/habanalabs.h> +#include "habanalabs.h" + +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include <linux/genalloc.h> + +static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb) +{ + struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_vm_va_block *va_block, *tmp; + dma_addr_t bus_addr; + u64 virt_addr; + u32 page_size = prop->pmmu.page_size; + s32 offset; + int rc; + + if (!hdev->supports_cb_mapping) { + dev_err_ratelimited(hdev->dev, + "Cannot map CB because no VA range is allocated for CB mapping\n"); + return -EINVAL; + } + + if (!hdev->mmu_enable) { + dev_err_ratelimited(hdev->dev, + "Cannot map CB because MMU is disabled\n"); + return -EINVAL; + } + + INIT_LIST_HEAD(&cb->va_block_list); + + for (bus_addr = cb->bus_address; + bus_addr < cb->bus_address + cb->size; + bus_addr += page_size) { + + virt_addr = (u64) gen_pool_alloc(ctx->cb_va_pool, page_size); + if (!virt_addr) { + dev_err(hdev->dev, + "Failed to allocate device virtual address for CB\n"); + rc = -ENOMEM; + goto err_va_pool_free; + } + + va_block = kzalloc(sizeof(*va_block), GFP_KERNEL); + if (!va_block) { + rc = -ENOMEM; + gen_pool_free(ctx->cb_va_pool, virt_addr, page_size); + goto err_va_pool_free; + } + + va_block->start = virt_addr; + va_block->end = virt_addr + page_size; + va_block->size = page_size; + list_add_tail(&va_block->node, &cb->va_block_list); + } + + mutex_lock(&ctx->mmu_lock); + + bus_addr = cb->bus_address; + offset = 0; + list_for_each_entry(va_block, &cb->va_block_list, node) { + rc = hl_mmu_map(ctx, va_block->start, bus_addr, va_block->size, + list_is_last(&va_block->node, + &cb->va_block_list)); + if (rc) { + dev_err(hdev->dev, "Failed to map VA %#llx to CB\n", + va_block->start); + goto err_va_umap; + } + + bus_addr += va_block->size; + offset += va_block->size; + } + + hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR); + + mutex_unlock(&ctx->mmu_lock); + + cb->is_mmu_mapped = true; + + return 0; + +err_va_umap: + list_for_each_entry(va_block, &cb->va_block_list, node) { + if (offset <= 0) + break; + hl_mmu_unmap(ctx, va_block->start, va_block->size, + offset <= va_block->size); + offset -= va_block->size; + } + + hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR); + + mutex_unlock(&ctx->mmu_lock); + +err_va_pool_free: + list_for_each_entry_safe(va_block, tmp, &cb->va_block_list, node) { + gen_pool_free(ctx->cb_va_pool, va_block->start, va_block->size); + list_del(&va_block->node); + kfree(va_block); + } + + return rc; +} + +static void cb_unmap_mem(struct hl_ctx *ctx, struct hl_cb *cb) +{ + struct hl_device *hdev = ctx->hdev; + struct hl_vm_va_block *va_block, *tmp; + + mutex_lock(&ctx->mmu_lock); + + list_for_each_entry(va_block, &cb->va_block_list, node) + if (hl_mmu_unmap(ctx, va_block->start, va_block->size, + list_is_last(&va_block->node, + &cb->va_block_list))) + dev_warn_ratelimited(hdev->dev, + "Failed to unmap CB's va 0x%llx\n", + va_block->start); + + hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR); + + mutex_unlock(&ctx->mmu_lock); + + list_for_each_entry_safe(va_block, tmp, &cb->va_block_list, node) { + gen_pool_free(ctx->cb_va_pool, va_block->start, va_block->size); + list_del(&va_block->node); + kfree(va_block); + } +} + +static void cb_fini(struct hl_device *hdev, struct hl_cb *cb) +{ + if (cb->is_internal) + gen_pool_free(hdev->internal_cb_pool, + (uintptr_t)cb->kernel_address, cb->size); + else + hdev->asic_funcs->asic_dma_free_coherent(hdev, cb->size, + cb->kernel_address, cb->bus_address); + + kfree(cb); +} + +static void cb_do_release(struct hl_device *hdev, struct hl_cb *cb) +{ + if (cb->is_pool) { + spin_lock(&hdev->cb_pool_lock); + list_add(&cb->pool_list, &hdev->cb_pool); + spin_unlock(&hdev->cb_pool_lock); + } else { + cb_fini(hdev, cb); + } +} + +static void cb_release(struct kref *ref) +{ + struct hl_device *hdev; + struct hl_cb *cb; + + cb = container_of(ref, struct hl_cb, refcount); + hdev = cb->hdev; + + hl_debugfs_remove_cb(cb); + + if (cb->is_mmu_mapped) + cb_unmap_mem(cb->ctx, cb); + + hl_ctx_put(cb->ctx); + + cb_do_release(hdev, cb); +} + +static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size, + int ctx_id, bool internal_cb) +{ + struct hl_cb *cb; + u32 cb_offset; + void *p; + + /* + * We use of GFP_ATOMIC here because this function can be called from + * the latency-sensitive code path for command submission. Due to H/W + * limitations in some of the ASICs, the kernel must copy the user CB + * that is designated for an external queue and actually enqueue + * the kernel's copy. Hence, we must never sleep in this code section + * and must use GFP_ATOMIC for all memory allocations. + */ + if (ctx_id == HL_KERNEL_ASID_ID) + cb = kzalloc(sizeof(*cb), GFP_ATOMIC); + else + cb = kzalloc(sizeof(*cb), GFP_KERNEL); + + if (!cb) + return NULL; + + if (internal_cb) { + p = (void *) gen_pool_alloc(hdev->internal_cb_pool, cb_size); + if (!p) { + kfree(cb); + return NULL; + } + + cb_offset = p - hdev->internal_cb_pool_virt_addr; + cb->is_internal = true; + cb->bus_address = hdev->internal_cb_va_base + cb_offset; + } else if (ctx_id == HL_KERNEL_ASID_ID) { + p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size, + &cb->bus_address, GFP_ATOMIC); + } else { + p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size, + &cb->bus_address, + GFP_USER | __GFP_ZERO); + } + + if (!p) { + dev_err(hdev->dev, + "failed to allocate %d of dma memory for CB\n", + cb_size); + kfree(cb); + return NULL; + } + + cb->kernel_address = p; + cb->size = cb_size; + + return cb; +} + +int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, + struct hl_ctx *ctx, u32 cb_size, bool internal_cb, + bool map_cb, u64 *handle) +{ + struct hl_cb *cb; + bool alloc_new_cb = true; + int rc, ctx_id = ctx->asid; + + /* + * Can't use generic function to check this because of special case + * where we create a CB as part of the reset process + */ + if ((hdev->disabled) || ((atomic_read(&hdev->in_reset)) && + (ctx_id != HL_KERNEL_ASID_ID))) { + dev_warn_ratelimited(hdev->dev, + "Device is disabled or in reset. Can't create new CBs\n"); + rc = -EBUSY; + goto out_err; + } + + if (cb_size > SZ_2M) { + dev_err(hdev->dev, "CB size %d must be less than %d\n", + cb_size, SZ_2M); + rc = -EINVAL; + goto out_err; + } + + if (!internal_cb) { + /* Minimum allocation must be PAGE SIZE */ + if (cb_size < PAGE_SIZE) + cb_size = PAGE_SIZE; + + if (ctx_id == HL_KERNEL_ASID_ID && + cb_size <= hdev->asic_prop.cb_pool_cb_size) { + + spin_lock(&hdev->cb_pool_lock); + if (!list_empty(&hdev->cb_pool)) { + cb = list_first_entry(&hdev->cb_pool, + typeof(*cb), pool_list); + list_del(&cb->pool_list); + spin_unlock(&hdev->cb_pool_lock); + alloc_new_cb = false; + } else { + spin_unlock(&hdev->cb_pool_lock); + dev_dbg(hdev->dev, "CB pool is empty\n"); + } + } + } + + if (alloc_new_cb) { + cb = hl_cb_alloc(hdev, cb_size, ctx_id, internal_cb); + if (!cb) { + rc = -ENOMEM; + goto out_err; + } + } + + cb->hdev = hdev; + cb->ctx = ctx; + hl_ctx_get(hdev, cb->ctx); + + if (map_cb) { + if (ctx_id == HL_KERNEL_ASID_ID) { + dev_err(hdev->dev, + "CB mapping is not supported for kernel context\n"); + rc = -EINVAL; + goto release_cb; + } + + rc = cb_map_mem(ctx, cb); + if (rc) + goto release_cb; + } + + spin_lock(&mgr->cb_lock); + rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_ATOMIC); + spin_unlock(&mgr->cb_lock); + + if (rc < 0) { + dev_err(hdev->dev, "Failed to allocate IDR for a new CB\n"); + goto unmap_mem; + } + + cb->id = (u64) rc; + + kref_init(&cb->refcount); + spin_lock_init(&cb->lock); + + /* + * idr is 32-bit so we can safely OR it with a mask that is above + * 32 bit + */ + *handle = cb->id | HL_MMAP_TYPE_CB; + *handle <<= PAGE_SHIFT; + + hl_debugfs_add_cb(cb); + + return 0; + +unmap_mem: + if (cb->is_mmu_mapped) + cb_unmap_mem(cb->ctx, cb); +release_cb: + hl_ctx_put(cb->ctx); + cb_do_release(hdev, cb); +out_err: + *handle = 0; + + return rc; +} + +int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle) +{ + struct hl_cb *cb; + u32 handle; + int rc = 0; + + /* + * handle was given to user to do mmap, I need to shift it back to + * how the idr module gave it to me + */ + cb_handle >>= PAGE_SHIFT; + handle = (u32) cb_handle; + + spin_lock(&mgr->cb_lock); + + cb = idr_find(&mgr->cb_handles, handle); + if (cb) { + idr_remove(&mgr->cb_handles, handle); + spin_unlock(&mgr->cb_lock); + kref_put(&cb->refcount, cb_release); + } else { + spin_unlock(&mgr->cb_lock); + dev_err(hdev->dev, + "CB destroy failed, no match to handle 0x%x\n", handle); + rc = -EINVAL; + } + + return rc; +} + +int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data) +{ + union hl_cb_args *args = data; + struct hl_device *hdev = hpriv->hdev; + u64 handle = 0; + int rc; + + if (hl_device_disabled_or_in_reset(hdev)) { + dev_warn_ratelimited(hdev->dev, + "Device is %s. Can't execute CB IOCTL\n", + atomic_read(&hdev->in_reset) ? "in_reset" : "disabled"); + return -EBUSY; + } + + switch (args->in.op) { + case HL_CB_OP_CREATE: + if (args->in.cb_size > HL_MAX_CB_SIZE) { + dev_err(hdev->dev, + "User requested CB size %d must be less than %d\n", + args->in.cb_size, HL_MAX_CB_SIZE); + rc = -EINVAL; + } else { + rc = hl_cb_create(hdev, &hpriv->cb_mgr, hpriv->ctx, + args->in.cb_size, false, + !!(args->in.flags & HL_CB_FLAGS_MAP), + &handle); + } + + memset(args, 0, sizeof(*args)); + args->out.cb_handle = handle; + break; + + case HL_CB_OP_DESTROY: + rc = hl_cb_destroy(hdev, &hpriv->cb_mgr, + args->in.cb_handle); + break; + + default: + rc = -ENOTTY; + break; + } + + return rc; +} + +static void cb_vm_close(struct vm_area_struct *vma) +{ + struct hl_cb *cb = (struct hl_cb *) vma->vm_private_data; + long new_mmap_size; + + new_mmap_size = cb->mmap_size - (vma->vm_end - vma->vm_start); + + if (new_mmap_size > 0) { + cb->mmap_size = new_mmap_size; + return; + } + + spin_lock(&cb->lock); + cb->mmap = false; + spin_unlock(&cb->lock); + + hl_cb_put(cb); + vma->vm_private_data = NULL; +} + +static const struct vm_operations_struct cb_vm_ops = { + .close = cb_vm_close +}; + +int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma) +{ + struct hl_device *hdev = hpriv->hdev; + struct hl_cb *cb; + u32 handle, user_cb_size; + int rc; + + /* We use the page offset to hold the idr and thus we need to clear + * it before doing the mmap itself + */ + handle = vma->vm_pgoff; + vma->vm_pgoff = 0; + + /* reference was taken here */ + cb = hl_cb_get(hdev, &hpriv->cb_mgr, handle); + if (!cb) { + dev_err(hdev->dev, + "CB mmap failed, no match to handle 0x%x\n", handle); + return -EINVAL; + } + + /* Validation check */ + user_cb_size = vma->vm_end - vma->vm_start; + if (user_cb_size != ALIGN(cb->size, PAGE_SIZE)) { + dev_err(hdev->dev, + "CB mmap failed, mmap size 0x%lx != 0x%x cb size\n", + vma->vm_end - vma->vm_start, cb->size); + rc = -EINVAL; + goto put_cb; + } + + if (!access_ok((void __user *) (uintptr_t) vma->vm_start, + user_cb_size)) { + dev_err(hdev->dev, + "user pointer is invalid - 0x%lx\n", + vma->vm_start); + + rc = -EINVAL; + goto put_cb; + } + + spin_lock(&cb->lock); + + if (cb->mmap) { + dev_err(hdev->dev, + "CB mmap failed, CB already mmaped to user\n"); + rc = -EINVAL; + goto release_lock; + } + + cb->mmap = true; + + spin_unlock(&cb->lock); + + vma->vm_ops = &cb_vm_ops; + + /* + * Note: We're transferring the cb reference to + * vma->vm_private_data here. + */ + + vma->vm_private_data = cb; + + rc = hdev->asic_funcs->cb_mmap(hdev, vma, cb->kernel_address, + cb->bus_address, cb->size); + if (rc) { + spin_lock(&cb->lock); + cb->mmap = false; + goto release_lock; + } + + cb->mmap_size = cb->size; + + return 0; + +release_lock: + spin_unlock(&cb->lock); +put_cb: + hl_cb_put(cb); + return rc; +} + +struct hl_cb *hl_cb_get(struct hl_device *hdev, struct hl_cb_mgr *mgr, + u32 handle) +{ + struct hl_cb *cb; + + spin_lock(&mgr->cb_lock); + cb = idr_find(&mgr->cb_handles, handle); + + if (!cb) { + spin_unlock(&mgr->cb_lock); + dev_warn(hdev->dev, + "CB get failed, no match to handle 0x%x\n", handle); + return NULL; + } + + kref_get(&cb->refcount); + + spin_unlock(&mgr->cb_lock); + + return cb; + +} + +void hl_cb_put(struct hl_cb *cb) +{ + kref_put(&cb->refcount, cb_release); +} + +void hl_cb_mgr_init(struct hl_cb_mgr *mgr) +{ + spin_lock_init(&mgr->cb_lock); + idr_init(&mgr->cb_handles); +} + +void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr) +{ + struct hl_cb *cb; + struct idr *idp; + u32 id; + + idp = &mgr->cb_handles; + + idr_for_each_entry(idp, cb, id) { + if (kref_put(&cb->refcount, cb_release) != 1) + dev_err(hdev->dev, + "CB %d for CTX ID %d is still alive\n", + id, cb->ctx->asid); + } + + idr_destroy(&mgr->cb_handles); +} + +struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size, + bool internal_cb) +{ + u64 cb_handle; + struct hl_cb *cb; + int rc; + + rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx, cb_size, + internal_cb, false, &cb_handle); + if (rc) { + dev_err(hdev->dev, + "Failed to allocate CB for the kernel driver %d\n", rc); + return NULL; + } + + cb_handle >>= PAGE_SHIFT; + cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr, (u32) cb_handle); + /* hl_cb_get should never fail here so use kernel WARN */ + WARN(!cb, "Kernel CB handle invalid 0x%x\n", (u32) cb_handle); + if (!cb) + goto destroy_cb; + + return cb; + +destroy_cb: + hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb_handle << PAGE_SHIFT); + + return NULL; +} + +int hl_cb_pool_init(struct hl_device *hdev) +{ + struct hl_cb *cb; + int i; + + INIT_LIST_HEAD(&hdev->cb_pool); + spin_lock_init(&hdev->cb_pool_lock); + + for (i = 0 ; i < hdev->asic_prop.cb_pool_cb_cnt ; i++) { + cb = hl_cb_alloc(hdev, hdev->asic_prop.cb_pool_cb_size, + HL_KERNEL_ASID_ID, false); + if (cb) { + cb->is_pool = true; + list_add(&cb->pool_list, &hdev->cb_pool); + } else { + hl_cb_pool_fini(hdev); + return -ENOMEM; + } + } + + return 0; +} + +int hl_cb_pool_fini(struct hl_device *hdev) +{ + struct hl_cb *cb, *tmp; + + list_for_each_entry_safe(cb, tmp, &hdev->cb_pool, pool_list) { + list_del(&cb->pool_list); + cb_fini(hdev, cb); + } + + return 0; +} + +int hl_cb_va_pool_init(struct hl_ctx *ctx) +{ + struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + int rc; + + if (!hdev->supports_cb_mapping) + return 0; + + ctx->cb_va_pool = gen_pool_create(__ffs(prop->pmmu.page_size), -1); + if (!ctx->cb_va_pool) { + dev_err(hdev->dev, + "Failed to create VA gen pool for CB mapping\n"); + return -ENOMEM; + } + + rc = gen_pool_add(ctx->cb_va_pool, prop->cb_va_start_addr, + prop->cb_va_end_addr - prop->cb_va_start_addr, -1); + if (rc) { + dev_err(hdev->dev, + "Failed to add memory to VA gen pool for CB mapping\n"); + goto err_pool_destroy; + } + + return 0; + +err_pool_destroy: + gen_pool_destroy(ctx->cb_va_pool); + + return rc; +} + +void hl_cb_va_pool_fini(struct hl_ctx *ctx) +{ + struct hl_device *hdev = ctx->hdev; + + if (!hdev->supports_cb_mapping) + return; + + gen_pool_destroy(ctx->cb_va_pool); +} diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c new file mode 100644 index 000000000..b2b974ecc --- /dev/null +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -0,0 +1,1245 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include <uapi/misc/habanalabs.h> +#include "habanalabs.h" + +#include <linux/uaccess.h> +#include <linux/slab.h> + +#define HL_CS_FLAGS_SIG_WAIT (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT) + +static void job_wq_completion(struct work_struct *work); +static long _hl_cs_wait_ioctl(struct hl_device *hdev, + struct hl_ctx *ctx, u64 timeout_us, u64 seq); +static void cs_do_release(struct kref *ref); + +static void hl_sob_reset(struct kref *ref) +{ + struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob, + kref); + struct hl_device *hdev = hw_sob->hdev; + + hdev->asic_funcs->reset_sob(hdev, hw_sob); +} + +void hl_sob_reset_error(struct kref *ref) +{ + struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob, + kref); + struct hl_device *hdev = hw_sob->hdev; + + dev_crit(hdev->dev, + "SOB release shouldn't be called here, q_idx: %d, sob_id: %d\n", + hw_sob->q_idx, hw_sob->sob_id); +} + +static void hl_fence_release(struct kref *kref) +{ + struct hl_fence *fence = + container_of(kref, struct hl_fence, refcount); + struct hl_cs_compl *hl_cs_cmpl = + container_of(fence, struct hl_cs_compl, base_fence); + struct hl_device *hdev = hl_cs_cmpl->hdev; + + /* EBUSY means the CS was never submitted and hence we don't have + * an attached hw_sob object that we should handle here + */ + if (fence->error == -EBUSY) + goto free; + + if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) || + (hl_cs_cmpl->type == CS_TYPE_WAIT)) { + + dev_dbg(hdev->dev, + "CS 0x%llx type %d finished, sob_id: %d, sob_val: 0x%x\n", + hl_cs_cmpl->cs_seq, + hl_cs_cmpl->type, + hl_cs_cmpl->hw_sob->sob_id, + hl_cs_cmpl->sob_val); + + /* + * A signal CS can get completion while the corresponding wait + * for signal CS is on its way to the PQ. The wait for signal CS + * will get stuck if the signal CS incremented the SOB to its + * max value and there are no pending (submitted) waits on this + * SOB. + * We do the following to void this situation: + * 1. The wait for signal CS must get a ref for the signal CS as + * soon as possible in cs_ioctl_signal_wait() and put it + * before being submitted to the PQ but after it incremented + * the SOB refcnt in init_signal_wait_cs(). + * 2. Signal/Wait for signal CS will decrement the SOB refcnt + * here. + * These two measures guarantee that the wait for signal CS will + * reset the SOB upon completion rather than the signal CS and + * hence the above scenario is avoided. + */ + kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset); + } + +free: + kfree(hl_cs_cmpl); +} + +void hl_fence_put(struct hl_fence *fence) +{ + if (fence) + kref_put(&fence->refcount, hl_fence_release); +} + +void hl_fence_get(struct hl_fence *fence) +{ + if (fence) + kref_get(&fence->refcount); +} + +static void hl_fence_init(struct hl_fence *fence) +{ + kref_init(&fence->refcount); + fence->error = 0; + init_completion(&fence->completion); +} + +static void cs_get(struct hl_cs *cs) +{ + kref_get(&cs->refcount); +} + +static int cs_get_unless_zero(struct hl_cs *cs) +{ + return kref_get_unless_zero(&cs->refcount); +} + +static void cs_put(struct hl_cs *cs) +{ + kref_put(&cs->refcount, cs_do_release); +} + +static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job) +{ + /* + * Patched CB is created for external queues jobs, and for H/W queues + * jobs if the user CB was allocated by driver and MMU is disabled. + */ + return (job->queue_type == QUEUE_TYPE_EXT || + (job->queue_type == QUEUE_TYPE_HW && + job->is_kernel_allocated_cb && + !hdev->mmu_enable)); +} + +/* + * cs_parser - parse the user command submission + * + * @hpriv : pointer to the private data of the fd + * @job : pointer to the job that holds the command submission info + * + * The function parses the command submission of the user. It calls the + * ASIC specific parser, which returns a list of memory blocks to send + * to the device as different command buffers + * + */ +static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job) +{ + struct hl_device *hdev = hpriv->hdev; + struct hl_cs_parser parser; + int rc; + + parser.ctx_id = job->cs->ctx->asid; + parser.cs_sequence = job->cs->sequence; + parser.job_id = job->id; + + parser.hw_queue_id = job->hw_queue_id; + parser.job_userptr_list = &job->userptr_list; + parser.patched_cb = NULL; + parser.user_cb = job->user_cb; + parser.user_cb_size = job->user_cb_size; + parser.queue_type = job->queue_type; + parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb; + job->patched_cb = NULL; + + rc = hdev->asic_funcs->cs_parser(hdev, &parser); + + if (is_cb_patched(hdev, job)) { + if (!rc) { + job->patched_cb = parser.patched_cb; + job->job_cb_size = parser.patched_cb_size; + job->contains_dma_pkt = parser.contains_dma_pkt; + + spin_lock(&job->patched_cb->lock); + job->patched_cb->cs_cnt++; + spin_unlock(&job->patched_cb->lock); + } + + /* + * Whether the parsing worked or not, we don't need the + * original CB anymore because it was already parsed and + * won't be accessed again for this CS + */ + spin_lock(&job->user_cb->lock); + job->user_cb->cs_cnt--; + spin_unlock(&job->user_cb->lock); + hl_cb_put(job->user_cb); + job->user_cb = NULL; + } else if (!rc) { + job->job_cb_size = job->user_cb_size; + } + + return rc; +} + +static void free_job(struct hl_device *hdev, struct hl_cs_job *job) +{ + struct hl_cs *cs = job->cs; + + if (is_cb_patched(hdev, job)) { + hl_userptr_delete_list(hdev, &job->userptr_list); + + /* + * We might arrive here from rollback and patched CB wasn't + * created, so we need to check it's not NULL + */ + if (job->patched_cb) { + spin_lock(&job->patched_cb->lock); + job->patched_cb->cs_cnt--; + spin_unlock(&job->patched_cb->lock); + + hl_cb_put(job->patched_cb); + } + } + + /* For H/W queue jobs, if a user CB was allocated by driver and MMU is + * enabled, the user CB isn't released in cs_parser() and thus should be + * released here. + */ + if (job->queue_type == QUEUE_TYPE_HW && + job->is_kernel_allocated_cb && hdev->mmu_enable) { + spin_lock(&job->user_cb->lock); + job->user_cb->cs_cnt--; + spin_unlock(&job->user_cb->lock); + + hl_cb_put(job->user_cb); + } + + /* + * This is the only place where there can be multiple threads + * modifying the list at the same time + */ + spin_lock(&cs->job_lock); + list_del(&job->cs_node); + spin_unlock(&cs->job_lock); + + hl_debugfs_remove_job(hdev, job); + + if (job->queue_type == QUEUE_TYPE_EXT || + job->queue_type == QUEUE_TYPE_HW) + cs_put(cs); + + kfree(job); +} + +static void cs_counters_aggregate(struct hl_device *hdev, struct hl_ctx *ctx) +{ + hdev->aggregated_cs_counters.device_in_reset_drop_cnt += + ctx->cs_counters.device_in_reset_drop_cnt; + hdev->aggregated_cs_counters.out_of_mem_drop_cnt += + ctx->cs_counters.out_of_mem_drop_cnt; + hdev->aggregated_cs_counters.parsing_drop_cnt += + ctx->cs_counters.parsing_drop_cnt; + hdev->aggregated_cs_counters.queue_full_drop_cnt += + ctx->cs_counters.queue_full_drop_cnt; + hdev->aggregated_cs_counters.max_cs_in_flight_drop_cnt += + ctx->cs_counters.max_cs_in_flight_drop_cnt; +} + +static void cs_do_release(struct kref *ref) +{ + struct hl_cs *cs = container_of(ref, struct hl_cs, + refcount); + struct hl_device *hdev = cs->ctx->hdev; + struct hl_cs_job *job, *tmp; + + cs->completed = true; + + /* + * Although if we reached here it means that all external jobs have + * finished, because each one of them took refcnt to CS, we still + * need to go over the internal jobs and free them. Otherwise, we + * will have leaked memory and what's worse, the CS object (and + * potentially the CTX object) could be released, while the JOB + * still holds a pointer to them (but no reference). + */ + list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) + free_job(hdev, job); + + /* We also need to update CI for internal queues */ + if (cs->submitted) { + hdev->asic_funcs->hw_queues_lock(hdev); + + hdev->cs_active_cnt--; + if (!hdev->cs_active_cnt) { + struct hl_device_idle_busy_ts *ts; + + ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx++]; + ts->busy_to_idle_ts = ktime_get(); + + if (hdev->idle_busy_ts_idx == HL_IDLE_BUSY_TS_ARR_SIZE) + hdev->idle_busy_ts_idx = 0; + } else if (hdev->cs_active_cnt < 0) { + dev_crit(hdev->dev, "CS active cnt %d is negative\n", + hdev->cs_active_cnt); + } + + hdev->asic_funcs->hw_queues_unlock(hdev); + + hl_int_hw_queue_update_ci(cs); + + spin_lock(&hdev->hw_queues_mirror_lock); + /* remove CS from hw_queues mirror list */ + list_del_init(&cs->mirror_node); + spin_unlock(&hdev->hw_queues_mirror_lock); + + /* + * Don't cancel TDR in case this CS was timedout because we + * might be running from the TDR context + */ + if ((!cs->timedout) && + (hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT)) { + struct hl_cs *next; + + if (cs->tdr_active) + cancel_delayed_work_sync(&cs->work_tdr); + + spin_lock(&hdev->hw_queues_mirror_lock); + + /* queue TDR for next CS */ + next = list_first_entry_or_null( + &hdev->hw_queues_mirror_list, + struct hl_cs, mirror_node); + + if ((next) && (!next->tdr_active)) { + next->tdr_active = true; + schedule_delayed_work(&next->work_tdr, + hdev->timeout_jiffies); + } + + spin_unlock(&hdev->hw_queues_mirror_lock); + } + } else if (cs->type == CS_TYPE_WAIT) { + /* + * In case the wait for signal CS was submitted, the put occurs + * in init_signal_wait_cs() right before hanging on the PQ. + */ + hl_fence_put(cs->signal_fence); + } + + /* + * Must be called before hl_ctx_put because inside we use ctx to get + * the device + */ + hl_debugfs_remove_cs(cs); + + hl_ctx_put(cs->ctx); + + /* We need to mark an error for not submitted because in that case + * the hl fence release flow is different. Mainly, we don't need + * to handle hw_sob for signal/wait + */ + if (cs->timedout) + cs->fence->error = -ETIMEDOUT; + else if (cs->aborted) + cs->fence->error = -EIO; + else if (!cs->submitted) + cs->fence->error = -EBUSY; + + complete_all(&cs->fence->completion); + hl_fence_put(cs->fence); + cs_counters_aggregate(hdev, cs->ctx); + + kfree(cs->jobs_in_queue_cnt); + kfree(cs); +} + +static void cs_timedout(struct work_struct *work) +{ + struct hl_device *hdev; + int rc; + struct hl_cs *cs = container_of(work, struct hl_cs, + work_tdr.work); + rc = cs_get_unless_zero(cs); + if (!rc) + return; + + if ((!cs->submitted) || (cs->completed)) { + cs_put(cs); + return; + } + + /* Mark the CS is timed out so we won't try to cancel its TDR */ + cs->timedout = true; + + hdev = cs->ctx->hdev; + + dev_err(hdev->dev, + "Command submission %llu has not finished in time!\n", + cs->sequence); + + cs_put(cs); + + if (hdev->reset_on_lockup) + hl_device_reset(hdev, false, false); +} + +static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, + enum hl_cs_type cs_type, struct hl_cs **cs_new) +{ + struct hl_cs_compl *cs_cmpl; + struct hl_fence *other = NULL; + struct hl_cs *cs; + int rc; + + cs = kzalloc(sizeof(*cs), GFP_ATOMIC); + if (!cs) + return -ENOMEM; + + cs->ctx = ctx; + cs->submitted = false; + cs->completed = false; + cs->type = cs_type; + INIT_LIST_HEAD(&cs->job_list); + INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout); + kref_init(&cs->refcount); + spin_lock_init(&cs->job_lock); + + cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_ATOMIC); + if (!cs_cmpl) { + rc = -ENOMEM; + goto free_cs; + } + + cs_cmpl->hdev = hdev; + cs_cmpl->type = cs->type; + spin_lock_init(&cs_cmpl->lock); + cs->fence = &cs_cmpl->base_fence; + + spin_lock(&ctx->cs_lock); + + cs_cmpl->cs_seq = ctx->cs_sequence; + other = ctx->cs_pending[cs_cmpl->cs_seq & + (hdev->asic_prop.max_pending_cs - 1)]; + + if (other && !completion_done(&other->completion)) { + dev_dbg_ratelimited(hdev->dev, + "Rejecting CS because of too many in-flights CS\n"); + ctx->cs_counters.max_cs_in_flight_drop_cnt++; + rc = -EAGAIN; + goto free_fence; + } + + cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, + sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC); + if (!cs->jobs_in_queue_cnt) { + rc = -ENOMEM; + goto free_fence; + } + + /* init hl_fence */ + hl_fence_init(&cs_cmpl->base_fence); + + cs->sequence = cs_cmpl->cs_seq; + + ctx->cs_pending[cs_cmpl->cs_seq & + (hdev->asic_prop.max_pending_cs - 1)] = + &cs_cmpl->base_fence; + ctx->cs_sequence++; + + hl_fence_get(&cs_cmpl->base_fence); + + hl_fence_put(other); + + spin_unlock(&ctx->cs_lock); + + *cs_new = cs; + + return 0; + +free_fence: + spin_unlock(&ctx->cs_lock); + kfree(cs_cmpl); +free_cs: + kfree(cs); + return rc; +} + +static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs) +{ + struct hl_cs_job *job, *tmp; + + list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) + free_job(hdev, job); +} + +void hl_cs_rollback_all(struct hl_device *hdev) +{ + int i; + struct hl_cs *cs, *tmp; + + /* flush all completions */ + for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) + flush_workqueue(hdev->cq_wq[i]); + + /* Make sure we don't have leftovers in the H/W queues mirror list */ + list_for_each_entry_safe(cs, tmp, &hdev->hw_queues_mirror_list, + mirror_node) { + cs_get(cs); + cs->aborted = true; + dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n", + cs->ctx->asid, cs->sequence); + cs_rollback(hdev, cs); + cs_put(cs); + } +} + +static void job_wq_completion(struct work_struct *work) +{ + struct hl_cs_job *job = container_of(work, struct hl_cs_job, + finish_work); + struct hl_cs *cs = job->cs; + struct hl_device *hdev = cs->ctx->hdev; + + /* job is no longer needed */ + free_job(hdev, job); +} + +static int validate_queue_index(struct hl_device *hdev, + struct hl_cs_chunk *chunk, + enum hl_queue_type *queue_type, + bool *is_kernel_allocated_cb) +{ + struct asic_fixed_properties *asic = &hdev->asic_prop; + struct hw_queue_properties *hw_queue_prop; + + /* This must be checked here to prevent out-of-bounds access to + * hw_queues_props array + */ + if (chunk->queue_index >= asic->max_queues) { + dev_err(hdev->dev, "Queue index %d is invalid\n", + chunk->queue_index); + return -EINVAL; + } + + hw_queue_prop = &asic->hw_queues_props[chunk->queue_index]; + + if (hw_queue_prop->type == QUEUE_TYPE_NA) { + dev_err(hdev->dev, "Queue index %d is invalid\n", + chunk->queue_index); + return -EINVAL; + } + + if (hw_queue_prop->driver_only) { + dev_err(hdev->dev, + "Queue index %d is restricted for the kernel driver\n", + chunk->queue_index); + return -EINVAL; + } + + *queue_type = hw_queue_prop->type; + *is_kernel_allocated_cb = !!hw_queue_prop->requires_kernel_cb; + + return 0; +} + +static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev, + struct hl_cb_mgr *cb_mgr, + struct hl_cs_chunk *chunk) +{ + struct hl_cb *cb; + u32 cb_handle; + + cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT); + + cb = hl_cb_get(hdev, cb_mgr, cb_handle); + if (!cb) { + dev_err(hdev->dev, "CB handle 0x%x invalid\n", cb_handle); + return NULL; + } + + if ((chunk->cb_size < 8) || (chunk->cb_size > cb->size)) { + dev_err(hdev->dev, "CB size %u invalid\n", chunk->cb_size); + goto release_cb; + } + + spin_lock(&cb->lock); + cb->cs_cnt++; + spin_unlock(&cb->lock); + + return cb; + +release_cb: + hl_cb_put(cb); + return NULL; +} + +struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, + enum hl_queue_type queue_type, bool is_kernel_allocated_cb) +{ + struct hl_cs_job *job; + + job = kzalloc(sizeof(*job), GFP_ATOMIC); + if (!job) + return NULL; + + job->queue_type = queue_type; + job->is_kernel_allocated_cb = is_kernel_allocated_cb; + + if (is_cb_patched(hdev, job)) + INIT_LIST_HEAD(&job->userptr_list); + + if (job->queue_type == QUEUE_TYPE_EXT) + INIT_WORK(&job->finish_work, job_wq_completion); + + return job; +} + +static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, + u32 num_chunks, u64 *cs_seq) +{ + struct hl_device *hdev = hpriv->hdev; + struct hl_cs_chunk *cs_chunk_array; + struct hl_cs_job *job; + struct hl_cs *cs; + struct hl_cb *cb; + bool int_queues_only = true; + u32 size_to_copy; + int rc, i; + + *cs_seq = ULLONG_MAX; + + if (num_chunks > HL_MAX_JOBS_PER_CS) { + dev_err(hdev->dev, + "Number of chunks can NOT be larger than %d\n", + HL_MAX_JOBS_PER_CS); + rc = -EINVAL; + goto out; + } + + cs_chunk_array = kmalloc_array(num_chunks, sizeof(*cs_chunk_array), + GFP_ATOMIC); + if (!cs_chunk_array) { + rc = -ENOMEM; + goto out; + } + + size_to_copy = num_chunks * sizeof(struct hl_cs_chunk); + if (copy_from_user(cs_chunk_array, chunks, size_to_copy)) { + dev_err(hdev->dev, "Failed to copy cs chunk array from user\n"); + rc = -EFAULT; + goto free_cs_chunk_array; + } + + /* increment refcnt for context */ + hl_ctx_get(hdev, hpriv->ctx); + + rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT, &cs); + if (rc) { + hl_ctx_put(hpriv->ctx); + goto free_cs_chunk_array; + } + + *cs_seq = cs->sequence; + + hl_debugfs_add_cs(cs); + + /* Validate ALL the CS chunks before submitting the CS */ + for (i = 0 ; i < num_chunks ; i++) { + struct hl_cs_chunk *chunk = &cs_chunk_array[i]; + enum hl_queue_type queue_type; + bool is_kernel_allocated_cb; + + rc = validate_queue_index(hdev, chunk, &queue_type, + &is_kernel_allocated_cb); + if (rc) { + hpriv->ctx->cs_counters.parsing_drop_cnt++; + goto free_cs_object; + } + + if (is_kernel_allocated_cb) { + cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk); + if (!cb) { + hpriv->ctx->cs_counters.parsing_drop_cnt++; + rc = -EINVAL; + goto free_cs_object; + } + } else { + cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle; + } + + if (queue_type == QUEUE_TYPE_EXT || queue_type == QUEUE_TYPE_HW) + int_queues_only = false; + + job = hl_cs_allocate_job(hdev, queue_type, + is_kernel_allocated_cb); + if (!job) { + hpriv->ctx->cs_counters.out_of_mem_drop_cnt++; + dev_err(hdev->dev, "Failed to allocate a new job\n"); + rc = -ENOMEM; + if (is_kernel_allocated_cb) + goto release_cb; + + goto free_cs_object; + } + + job->id = i + 1; + job->cs = cs; + job->user_cb = cb; + job->user_cb_size = chunk->cb_size; + job->hw_queue_id = chunk->queue_index; + + cs->jobs_in_queue_cnt[job->hw_queue_id]++; + + list_add_tail(&job->cs_node, &cs->job_list); + + /* + * Increment CS reference. When CS reference is 0, CS is + * done and can be signaled to user and free all its resources + * Only increment for JOB on external or H/W queues, because + * only for those JOBs we get completion + */ + if (job->queue_type == QUEUE_TYPE_EXT || + job->queue_type == QUEUE_TYPE_HW) + cs_get(cs); + + hl_debugfs_add_job(hdev, job); + + rc = cs_parser(hpriv, job); + if (rc) { + hpriv->ctx->cs_counters.parsing_drop_cnt++; + dev_err(hdev->dev, + "Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n", + cs->ctx->asid, cs->sequence, job->id, rc); + goto free_cs_object; + } + } + + if (int_queues_only) { + hpriv->ctx->cs_counters.parsing_drop_cnt++; + dev_err(hdev->dev, + "Reject CS %d.%llu because only internal queues jobs are present\n", + cs->ctx->asid, cs->sequence); + rc = -EINVAL; + goto free_cs_object; + } + + rc = hl_hw_queue_schedule_cs(cs); + if (rc) { + if (rc != -EAGAIN) + dev_err(hdev->dev, + "Failed to submit CS %d.%llu to H/W queues, error %d\n", + cs->ctx->asid, cs->sequence, rc); + goto free_cs_object; + } + + rc = HL_CS_STATUS_SUCCESS; + goto put_cs; + +release_cb: + spin_lock(&cb->lock); + cb->cs_cnt--; + spin_unlock(&cb->lock); + hl_cb_put(cb); +free_cs_object: + cs_rollback(hdev, cs); + *cs_seq = ULLONG_MAX; + /* The path below is both for good and erroneous exits */ +put_cs: + /* We finished with the CS in this function, so put the ref */ + cs_put(cs); +free_cs_chunk_array: + kfree(cs_chunk_array); +out: + return rc; +} + +static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, + void __user *chunks, u32 num_chunks, + u64 *cs_seq) +{ + struct hl_device *hdev = hpriv->hdev; + struct hl_ctx *ctx = hpriv->ctx; + struct hl_cs_chunk *cs_chunk_array, *chunk; + struct hw_queue_properties *hw_queue_prop; + struct hl_fence *sig_fence = NULL; + struct hl_cs_job *job; + struct hl_cs *cs; + struct hl_cb *cb; + enum hl_queue_type q_type; + u64 *signal_seq_arr = NULL, signal_seq; + u32 size_to_copy, q_idx, signal_seq_arr_len, cb_size; + int rc; + + *cs_seq = ULLONG_MAX; + + if (num_chunks > HL_MAX_JOBS_PER_CS) { + dev_err(hdev->dev, + "Number of chunks can NOT be larger than %d\n", + HL_MAX_JOBS_PER_CS); + rc = -EINVAL; + goto out; + } + + cs_chunk_array = kmalloc_array(num_chunks, sizeof(*cs_chunk_array), + GFP_ATOMIC); + if (!cs_chunk_array) { + rc = -ENOMEM; + goto out; + } + + size_to_copy = num_chunks * sizeof(struct hl_cs_chunk); + if (copy_from_user(cs_chunk_array, chunks, size_to_copy)) { + dev_err(hdev->dev, "Failed to copy cs chunk array from user\n"); + rc = -EFAULT; + goto free_cs_chunk_array; + } + + /* currently it is guaranteed to have only one chunk */ + chunk = &cs_chunk_array[0]; + + if (chunk->queue_index >= hdev->asic_prop.max_queues) { + dev_err(hdev->dev, "Queue index %d is invalid\n", + chunk->queue_index); + rc = -EINVAL; + goto free_cs_chunk_array; + } + + q_idx = chunk->queue_index; + hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx]; + q_type = hw_queue_prop->type; + + if ((q_idx >= hdev->asic_prop.max_queues) || + (!hw_queue_prop->supports_sync_stream)) { + dev_err(hdev->dev, "Queue index %d is invalid\n", q_idx); + rc = -EINVAL; + goto free_cs_chunk_array; + } + + if (cs_type == CS_TYPE_WAIT) { + struct hl_cs_compl *sig_waitcs_cmpl; + + signal_seq_arr_len = chunk->num_signal_seq_arr; + + /* currently only one signal seq is supported */ + if (signal_seq_arr_len != 1) { + dev_err(hdev->dev, + "Wait for signal CS supports only one signal CS seq\n"); + rc = -EINVAL; + goto free_cs_chunk_array; + } + + signal_seq_arr = kmalloc_array(signal_seq_arr_len, + sizeof(*signal_seq_arr), + GFP_ATOMIC); + if (!signal_seq_arr) { + rc = -ENOMEM; + goto free_cs_chunk_array; + } + + size_to_copy = chunk->num_signal_seq_arr * + sizeof(*signal_seq_arr); + if (copy_from_user(signal_seq_arr, + u64_to_user_ptr(chunk->signal_seq_arr), + size_to_copy)) { + dev_err(hdev->dev, + "Failed to copy signal seq array from user\n"); + rc = -EFAULT; + goto free_signal_seq_array; + } + + /* currently it is guaranteed to have only one signal seq */ + signal_seq = signal_seq_arr[0]; + sig_fence = hl_ctx_get_fence(ctx, signal_seq); + if (IS_ERR(sig_fence)) { + dev_err(hdev->dev, + "Failed to get signal CS with seq 0x%llx\n", + signal_seq); + rc = PTR_ERR(sig_fence); + goto free_signal_seq_array; + } + + if (!sig_fence) { + /* signal CS already finished */ + rc = 0; + goto free_signal_seq_array; + } + + sig_waitcs_cmpl = + container_of(sig_fence, struct hl_cs_compl, base_fence); + + if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL) { + dev_err(hdev->dev, + "CS seq 0x%llx is not of a signal CS\n", + signal_seq); + hl_fence_put(sig_fence); + rc = -EINVAL; + goto free_signal_seq_array; + } + + if (completion_done(&sig_fence->completion)) { + /* signal CS already finished */ + hl_fence_put(sig_fence); + rc = 0; + goto free_signal_seq_array; + } + } + + /* increment refcnt for context */ + hl_ctx_get(hdev, ctx); + + rc = allocate_cs(hdev, ctx, cs_type, &cs); + if (rc) { + if (cs_type == CS_TYPE_WAIT) + hl_fence_put(sig_fence); + hl_ctx_put(ctx); + goto free_signal_seq_array; + } + + /* + * Save the signal CS fence for later initialization right before + * hanging the wait CS on the queue. + */ + if (cs->type == CS_TYPE_WAIT) + cs->signal_fence = sig_fence; + + hl_debugfs_add_cs(cs); + + *cs_seq = cs->sequence; + + job = hl_cs_allocate_job(hdev, q_type, true); + if (!job) { + ctx->cs_counters.out_of_mem_drop_cnt++; + dev_err(hdev->dev, "Failed to allocate a new job\n"); + rc = -ENOMEM; + goto put_cs; + } + + if (cs->type == CS_TYPE_WAIT) + cb_size = hdev->asic_funcs->get_wait_cb_size(hdev); + else + cb_size = hdev->asic_funcs->get_signal_cb_size(hdev); + + cb = hl_cb_kernel_create(hdev, cb_size, + q_type == QUEUE_TYPE_HW && hdev->mmu_enable); + if (!cb) { + ctx->cs_counters.out_of_mem_drop_cnt++; + kfree(job); + rc = -EFAULT; + goto put_cs; + } + + job->id = 0; + job->cs = cs; + job->user_cb = cb; + job->user_cb->cs_cnt++; + job->user_cb_size = cb_size; + job->hw_queue_id = q_idx; + + /* + * No need in parsing, user CB is the patched CB. + * We call hl_cb_destroy() out of two reasons - we don't need the CB in + * the CB idr anymore and to decrement its refcount as it was + * incremented inside hl_cb_kernel_create(). + */ + job->patched_cb = job->user_cb; + job->job_cb_size = job->user_cb_size; + hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT); + + cs->jobs_in_queue_cnt[job->hw_queue_id]++; + + list_add_tail(&job->cs_node, &cs->job_list); + + /* increment refcount as for external queues we get completion */ + cs_get(cs); + + hl_debugfs_add_job(hdev, job); + + rc = hl_hw_queue_schedule_cs(cs); + if (rc) { + if (rc != -EAGAIN) + dev_err(hdev->dev, + "Failed to submit CS %d.%llu to H/W queues, error %d\n", + ctx->asid, cs->sequence, rc); + goto free_cs_object; + } + + rc = HL_CS_STATUS_SUCCESS; + goto put_cs; + +free_cs_object: + cs_rollback(hdev, cs); + *cs_seq = ULLONG_MAX; + /* The path below is both for good and erroneous exits */ +put_cs: + /* We finished with the CS in this function, so put the ref */ + cs_put(cs); +free_signal_seq_array: + if (cs_type == CS_TYPE_WAIT) + kfree(signal_seq_arr); +free_cs_chunk_array: + kfree(cs_chunk_array); +out: + return rc; +} + +int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) +{ + struct hl_device *hdev = hpriv->hdev; + union hl_cs_args *args = data; + struct hl_ctx *ctx = hpriv->ctx; + void __user *chunks_execute, *chunks_restore; + enum hl_cs_type cs_type; + u32 num_chunks_execute, num_chunks_restore, sig_wait_flags; + u64 cs_seq = ULONG_MAX; + int rc, do_ctx_switch; + bool need_soft_reset = false; + + if (hl_device_disabled_or_in_reset(hdev)) { + dev_warn_ratelimited(hdev->dev, + "Device is %s. Can't submit new CS\n", + atomic_read(&hdev->in_reset) ? "in_reset" : "disabled"); + rc = -EBUSY; + goto out; + } + + sig_wait_flags = args->in.cs_flags & HL_CS_FLAGS_SIG_WAIT; + + if (unlikely(sig_wait_flags == HL_CS_FLAGS_SIG_WAIT)) { + dev_err(hdev->dev, + "Signal and wait CS flags are mutually exclusive, context %d\n", + ctx->asid); + rc = -EINVAL; + goto out; + } + + if (unlikely((sig_wait_flags & HL_CS_FLAGS_SIG_WAIT) && + (!hdev->supports_sync_stream))) { + dev_err(hdev->dev, "Sync stream CS is not supported\n"); + rc = -EINVAL; + goto out; + } + + if (args->in.cs_flags & HL_CS_FLAGS_SIGNAL) + cs_type = CS_TYPE_SIGNAL; + else if (args->in.cs_flags & HL_CS_FLAGS_WAIT) + cs_type = CS_TYPE_WAIT; + else + cs_type = CS_TYPE_DEFAULT; + + chunks_execute = (void __user *) (uintptr_t) args->in.chunks_execute; + num_chunks_execute = args->in.num_chunks_execute; + + if (cs_type == CS_TYPE_DEFAULT) { + if (!num_chunks_execute) { + dev_err(hdev->dev, + "Got execute CS with 0 chunks, context %d\n", + ctx->asid); + rc = -EINVAL; + goto out; + } + } else if (num_chunks_execute != 1) { + dev_err(hdev->dev, + "Sync stream CS mandates one chunk only, context %d\n", + ctx->asid); + rc = -EINVAL; + goto out; + } + + do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0); + + if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) { + long ret; + + chunks_restore = + (void __user *) (uintptr_t) args->in.chunks_restore; + num_chunks_restore = args->in.num_chunks_restore; + + mutex_lock(&hpriv->restore_phase_mutex); + + if (do_ctx_switch) { + rc = hdev->asic_funcs->context_switch(hdev, ctx->asid); + if (rc) { + dev_err_ratelimited(hdev->dev, + "Failed to switch to context %d, rejecting CS! %d\n", + ctx->asid, rc); + /* + * If we timedout, or if the device is not IDLE + * while we want to do context-switch (-EBUSY), + * we need to soft-reset because QMAN is + * probably stuck. However, we can't call to + * reset here directly because of deadlock, so + * need to do it at the very end of this + * function + */ + if ((rc == -ETIMEDOUT) || (rc == -EBUSY)) + need_soft_reset = true; + mutex_unlock(&hpriv->restore_phase_mutex); + goto out; + } + } + + hdev->asic_funcs->restore_phase_topology(hdev); + + if (!num_chunks_restore) { + dev_dbg(hdev->dev, + "Need to run restore phase but restore CS is empty\n"); + rc = 0; + } else { + rc = cs_ioctl_default(hpriv, chunks_restore, + num_chunks_restore, &cs_seq); + } + + mutex_unlock(&hpriv->restore_phase_mutex); + + if (rc) { + dev_err(hdev->dev, + "Failed to submit restore CS for context %d (%d)\n", + ctx->asid, rc); + goto out; + } + + /* Need to wait for restore completion before execution phase */ + if (num_chunks_restore) { + ret = _hl_cs_wait_ioctl(hdev, ctx, + jiffies_to_usecs(hdev->timeout_jiffies), + cs_seq); + if (ret <= 0) { + dev_err(hdev->dev, + "Restore CS for context %d failed to complete %ld\n", + ctx->asid, ret); + rc = -ENOEXEC; + goto out; + } + } + + ctx->thread_ctx_switch_wait_token = 1; + } else if (!ctx->thread_ctx_switch_wait_token) { + u32 tmp; + + rc = hl_poll_timeout_memory(hdev, + &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1), + 100, jiffies_to_usecs(hdev->timeout_jiffies), false); + + if (rc == -ETIMEDOUT) { + dev_err(hdev->dev, + "context switch phase timeout (%d)\n", tmp); + goto out; + } + } + + if (cs_type == CS_TYPE_DEFAULT) + rc = cs_ioctl_default(hpriv, chunks_execute, num_chunks_execute, + &cs_seq); + else + rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks_execute, + num_chunks_execute, &cs_seq); + +out: + if (rc != -EAGAIN) { + memset(args, 0, sizeof(*args)); + args->out.status = rc; + args->out.seq = cs_seq; + } + + if (((rc == -ETIMEDOUT) || (rc == -EBUSY)) && (need_soft_reset)) + hl_device_reset(hdev, false, false); + + return rc; +} + +static long _hl_cs_wait_ioctl(struct hl_device *hdev, + struct hl_ctx *ctx, u64 timeout_us, u64 seq) +{ + struct hl_fence *fence; + unsigned long timeout; + long rc; + + if (timeout_us == MAX_SCHEDULE_TIMEOUT) + timeout = timeout_us; + else + timeout = usecs_to_jiffies(timeout_us); + + hl_ctx_get(hdev, ctx); + + fence = hl_ctx_get_fence(ctx, seq); + if (IS_ERR(fence)) { + rc = PTR_ERR(fence); + if (rc == -EINVAL) + dev_notice_ratelimited(hdev->dev, + "Can't wait on CS %llu because current CS is at seq %llu\n", + seq, ctx->cs_sequence); + } else if (fence) { + if (!timeout_us) + rc = completion_done(&fence->completion); + else + rc = wait_for_completion_interruptible_timeout( + &fence->completion, timeout); + + if (fence->error == -ETIMEDOUT) + rc = -ETIMEDOUT; + else if (fence->error == -EIO) + rc = -EIO; + + hl_fence_put(fence); + } else { + dev_dbg(hdev->dev, + "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n", + seq, ctx->cs_sequence); + rc = 1; + } + + hl_ctx_put(ctx); + + return rc; +} + +int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) +{ + struct hl_device *hdev = hpriv->hdev; + union hl_wait_cs_args *args = data; + u64 seq = args->in.seq; + long rc; + + rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq); + + memset(args, 0, sizeof(*args)); + + if (rc < 0) { + if (rc == -ERESTARTSYS) { + dev_err_ratelimited(hdev->dev, + "user process got signal while waiting for CS handle %llu\n", + seq); + args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED; + rc = -EINTR; + } else if (rc == -ETIMEDOUT) { + dev_err_ratelimited(hdev->dev, + "CS %llu has timed-out while user process is waiting for it\n", + seq); + args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT; + } else if (rc == -EIO) { + dev_err_ratelimited(hdev->dev, + "CS %llu has been aborted while user process is waiting for it\n", + seq); + args->out.status = HL_WAIT_CS_STATUS_ABORTED; + } + return rc; + } + + if (rc == 0) + args->out.status = HL_WAIT_CS_STATUS_BUSY; + else + args->out.status = HL_WAIT_CS_STATUS_COMPLETED; + + return 0; +} diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c new file mode 100644 index 000000000..7a59dd7c6 --- /dev/null +++ b/drivers/misc/habanalabs/common/context.c @@ -0,0 +1,271 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include "habanalabs.h" + +#include <linux/slab.h> + +static void hl_ctx_fini(struct hl_ctx *ctx) +{ + struct hl_device *hdev = ctx->hdev; + u64 idle_mask = 0; + int i; + + /* + * If we arrived here, there are no jobs waiting for this context + * on its queues so we can safely remove it. + * This is because for each CS, we increment the ref count and for + * every CS that was finished we decrement it and we won't arrive + * to this function unless the ref count is 0 + */ + + for (i = 0 ; i < hdev->asic_prop.max_pending_cs ; i++) + hl_fence_put(ctx->cs_pending[i]); + + kfree(ctx->cs_pending); + + if (ctx->asid != HL_KERNEL_ASID_ID) { + dev_dbg(hdev->dev, "closing user context %d\n", ctx->asid); + + /* The engines are stopped as there is no executing CS, but the + * Coresight might be still working by accessing addresses + * related to the stopped engines. Hence stop it explicitly. + * Stop only if this is the compute context, as there can be + * only one compute context + */ + if ((hdev->in_debug) && (hdev->compute_ctx == ctx)) + hl_device_set_debug_mode(hdev, false); + + hl_cb_va_pool_fini(ctx); + hl_vm_ctx_fini(ctx); + hl_asid_free(hdev, ctx->asid); + + if ((!hdev->pldm) && (hdev->pdev) && + (!hdev->asic_funcs->is_device_idle(hdev, + &idle_mask, NULL))) + dev_notice(hdev->dev, + "device not idle after user context is closed (0x%llx)\n", + idle_mask); + } else { + dev_dbg(hdev->dev, "closing kernel context\n"); + hl_mmu_ctx_fini(ctx); + } +} + +void hl_ctx_do_release(struct kref *ref) +{ + struct hl_ctx *ctx; + + ctx = container_of(ref, struct hl_ctx, refcount); + + hl_ctx_fini(ctx); + + if (ctx->hpriv) + hl_hpriv_put(ctx->hpriv); + + kfree(ctx); +} + +int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv) +{ + struct hl_ctx_mgr *mgr = &hpriv->ctx_mgr; + struct hl_ctx *ctx; + int rc; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) { + rc = -ENOMEM; + goto out_err; + } + + mutex_lock(&mgr->ctx_lock); + rc = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL); + mutex_unlock(&mgr->ctx_lock); + + if (rc < 0) { + dev_err(hdev->dev, "Failed to allocate IDR for a new CTX\n"); + goto free_ctx; + } + + ctx->handle = rc; + + rc = hl_ctx_init(hdev, ctx, false); + if (rc) + goto remove_from_idr; + + hl_hpriv_get(hpriv); + ctx->hpriv = hpriv; + + /* TODO: remove for multiple contexts per process */ + hpriv->ctx = ctx; + + /* TODO: remove the following line for multiple process support */ + hdev->compute_ctx = ctx; + + return 0; + +remove_from_idr: + mutex_lock(&mgr->ctx_lock); + idr_remove(&mgr->ctx_handles, ctx->handle); + mutex_unlock(&mgr->ctx_lock); +free_ctx: + kfree(ctx); +out_err: + return rc; +} + +void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx) +{ + if (kref_put(&ctx->refcount, hl_ctx_do_release) == 1) + return; + + dev_warn(hdev->dev, + "user process released device but its command submissions are still executing\n"); +} + +int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx) +{ + int rc = 0; + + ctx->hdev = hdev; + + kref_init(&ctx->refcount); + + ctx->cs_sequence = 1; + spin_lock_init(&ctx->cs_lock); + atomic_set(&ctx->thread_ctx_switch_token, 1); + ctx->thread_ctx_switch_wait_token = 0; + ctx->cs_pending = kcalloc(hdev->asic_prop.max_pending_cs, + sizeof(struct hl_fence *), + GFP_KERNEL); + if (!ctx->cs_pending) + return -ENOMEM; + + if (is_kernel_ctx) { + ctx->asid = HL_KERNEL_ASID_ID; /* Kernel driver gets ASID 0 */ + rc = hl_mmu_ctx_init(ctx); + if (rc) { + dev_err(hdev->dev, "Failed to init mmu ctx module\n"); + goto err_free_cs_pending; + } + } else { + ctx->asid = hl_asid_alloc(hdev); + if (!ctx->asid) { + dev_err(hdev->dev, "No free ASID, failed to create context\n"); + rc = -ENOMEM; + goto err_free_cs_pending; + } + + rc = hl_vm_ctx_init(ctx); + if (rc) { + dev_err(hdev->dev, "Failed to init mem ctx module\n"); + rc = -ENOMEM; + goto err_asid_free; + } + + rc = hl_cb_va_pool_init(ctx); + if (rc) { + dev_err(hdev->dev, + "Failed to init VA pool for mapped CB\n"); + goto err_vm_ctx_fini; + } + + rc = hdev->asic_funcs->ctx_init(ctx); + if (rc) { + dev_err(hdev->dev, "ctx_init failed\n"); + goto err_cb_va_pool_fini; + } + + dev_dbg(hdev->dev, "create user context %d\n", ctx->asid); + } + + return 0; + +err_cb_va_pool_fini: + hl_cb_va_pool_fini(ctx); +err_vm_ctx_fini: + hl_vm_ctx_fini(ctx); +err_asid_free: + hl_asid_free(hdev, ctx->asid); +err_free_cs_pending: + kfree(ctx->cs_pending); + + return rc; +} + +void hl_ctx_get(struct hl_device *hdev, struct hl_ctx *ctx) +{ + kref_get(&ctx->refcount); +} + +int hl_ctx_put(struct hl_ctx *ctx) +{ + return kref_put(&ctx->refcount, hl_ctx_do_release); +} + +struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq) +{ + struct asic_fixed_properties *asic_prop = &ctx->hdev->asic_prop; + struct hl_fence *fence; + + spin_lock(&ctx->cs_lock); + + if (seq >= ctx->cs_sequence) { + spin_unlock(&ctx->cs_lock); + return ERR_PTR(-EINVAL); + } + + if (seq + asic_prop->max_pending_cs < ctx->cs_sequence) { + spin_unlock(&ctx->cs_lock); + return NULL; + } + + fence = ctx->cs_pending[seq & (asic_prop->max_pending_cs - 1)]; + hl_fence_get(fence); + + spin_unlock(&ctx->cs_lock); + + return fence; +} + +/* + * hl_ctx_mgr_init - initialize the context manager + * + * @mgr: pointer to context manager structure + * + * This manager is an object inside the hpriv object of the user process. + * The function is called when a user process opens the FD. + */ +void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr) +{ + mutex_init(&mgr->ctx_lock); + idr_init(&mgr->ctx_handles); +} + +/* + * hl_ctx_mgr_fini - finalize the context manager + * + * @hdev: pointer to device structure + * @mgr: pointer to context manager structure + * + * This function goes over all the contexts in the manager and frees them. + * It is called when a process closes the FD. + */ +void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr) +{ + struct hl_ctx *ctx; + struct idr *idp; + u32 id; + + idp = &mgr->ctx_handles; + + idr_for_each_entry(idp, ctx, id) + hl_ctx_free(hdev, ctx); + + idr_destroy(&mgr->ctx_handles); + mutex_destroy(&mgr->ctx_lock); +} diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c new file mode 100644 index 000000000..9716b0728 --- /dev/null +++ b/drivers/misc/habanalabs/common/debugfs.c @@ -0,0 +1,1456 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include "habanalabs.h" +#include "../include/hw_ip/mmu/mmu_general.h" + +#include <linux/pci.h> +#include <linux/debugfs.h> +#include <linux/uaccess.h> + +#define MMU_ADDR_BUF_SIZE 40 +#define MMU_ASID_BUF_SIZE 10 +#define MMU_KBUF_SIZE (MMU_ADDR_BUF_SIZE + MMU_ASID_BUF_SIZE) + +static struct dentry *hl_debug_root; + +static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, + u8 i2c_reg, long *val) +{ + struct cpucp_packet pkt; + int rc; + + if (hl_device_disabled_or_in_reset(hdev)) + return -EBUSY; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(CPUCP_PACKET_I2C_RD << + CPUCP_PKT_CTL_OPCODE_SHIFT); + pkt.i2c_bus = i2c_bus; + pkt.i2c_addr = i2c_addr; + pkt.i2c_reg = i2c_reg; + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + 0, val); + + if (rc) + dev_err(hdev->dev, "Failed to read from I2C, error %d\n", rc); + + return rc; +} + +static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, + u8 i2c_reg, u32 val) +{ + struct cpucp_packet pkt; + int rc; + + if (hl_device_disabled_or_in_reset(hdev)) + return -EBUSY; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(CPUCP_PACKET_I2C_WR << + CPUCP_PKT_CTL_OPCODE_SHIFT); + pkt.i2c_bus = i2c_bus; + pkt.i2c_addr = i2c_addr; + pkt.i2c_reg = i2c_reg; + pkt.value = cpu_to_le64(val); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + 0, NULL); + + if (rc) + dev_err(hdev->dev, "Failed to write to I2C, error %d\n", rc); + + return rc; +} + +static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state) +{ + struct cpucp_packet pkt; + int rc; + + if (hl_device_disabled_or_in_reset(hdev)) + return; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(CPUCP_PACKET_LED_SET << + CPUCP_PKT_CTL_OPCODE_SHIFT); + pkt.led_index = cpu_to_le32(led); + pkt.value = cpu_to_le64(state); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + 0, NULL); + + if (rc) + dev_err(hdev->dev, "Failed to set LED %d, error %d\n", led, rc); +} + +static int command_buffers_show(struct seq_file *s, void *data) +{ + struct hl_debugfs_entry *entry = s->private; + struct hl_dbg_device_entry *dev_entry = entry->dev_entry; + struct hl_cb *cb; + bool first = true; + + spin_lock(&dev_entry->cb_spinlock); + + list_for_each_entry(cb, &dev_entry->cb_list, debugfs_list) { + if (first) { + first = false; + seq_puts(s, "\n"); + seq_puts(s, " CB ID CTX ID CB size CB RefCnt mmap? CS counter\n"); + seq_puts(s, "---------------------------------------------------------------\n"); + } + seq_printf(s, + " %03llu %d 0x%08x %d %d %d\n", + cb->id, cb->ctx->asid, cb->size, + kref_read(&cb->refcount), + cb->mmap, cb->cs_cnt); + } + + spin_unlock(&dev_entry->cb_spinlock); + + if (!first) + seq_puts(s, "\n"); + + return 0; +} + +static int command_submission_show(struct seq_file *s, void *data) +{ + struct hl_debugfs_entry *entry = s->private; + struct hl_dbg_device_entry *dev_entry = entry->dev_entry; + struct hl_cs *cs; + bool first = true; + + spin_lock(&dev_entry->cs_spinlock); + + list_for_each_entry(cs, &dev_entry->cs_list, debugfs_list) { + if (first) { + first = false; + seq_puts(s, "\n"); + seq_puts(s, " CS ID CTX ASID CS RefCnt Submitted Completed\n"); + seq_puts(s, "------------------------------------------------------\n"); + } + seq_printf(s, + " %llu %d %d %d %d\n", + cs->sequence, cs->ctx->asid, + kref_read(&cs->refcount), + cs->submitted, cs->completed); + } + + spin_unlock(&dev_entry->cs_spinlock); + + if (!first) + seq_puts(s, "\n"); + + return 0; +} + +static int command_submission_jobs_show(struct seq_file *s, void *data) +{ + struct hl_debugfs_entry *entry = s->private; + struct hl_dbg_device_entry *dev_entry = entry->dev_entry; + struct hl_cs_job *job; + bool first = true; + + spin_lock(&dev_entry->cs_job_spinlock); + + list_for_each_entry(job, &dev_entry->cs_job_list, debugfs_list) { + if (first) { + first = false; + seq_puts(s, "\n"); + seq_puts(s, " JOB ID CS ID CTX ASID H/W Queue\n"); + seq_puts(s, "---------------------------------------\n"); + } + if (job->cs) + seq_printf(s, + " %02d %llu %d %d\n", + job->id, job->cs->sequence, job->cs->ctx->asid, + job->hw_queue_id); + else + seq_printf(s, + " %02d 0 %d %d\n", + job->id, HL_KERNEL_ASID_ID, job->hw_queue_id); + } + + spin_unlock(&dev_entry->cs_job_spinlock); + + if (!first) + seq_puts(s, "\n"); + + return 0; +} + +static int userptr_show(struct seq_file *s, void *data) +{ + struct hl_debugfs_entry *entry = s->private; + struct hl_dbg_device_entry *dev_entry = entry->dev_entry; + struct hl_userptr *userptr; + char dma_dir[4][30] = {"DMA_BIDIRECTIONAL", "DMA_TO_DEVICE", + "DMA_FROM_DEVICE", "DMA_NONE"}; + bool first = true; + + spin_lock(&dev_entry->userptr_spinlock); + + list_for_each_entry(userptr, &dev_entry->userptr_list, debugfs_list) { + if (first) { + first = false; + seq_puts(s, "\n"); + seq_puts(s, " user virtual address size dma dir\n"); + seq_puts(s, "----------------------------------------------------------\n"); + } + seq_printf(s, + " 0x%-14llx %-10u %-30s\n", + userptr->addr, userptr->size, dma_dir[userptr->dir]); + } + + spin_unlock(&dev_entry->userptr_spinlock); + + if (!first) + seq_puts(s, "\n"); + + return 0; +} + +static int vm_show(struct seq_file *s, void *data) +{ + struct hl_debugfs_entry *entry = s->private; + struct hl_dbg_device_entry *dev_entry = entry->dev_entry; + struct hl_ctx *ctx; + struct hl_vm *vm; + struct hl_vm_hash_node *hnode; + struct hl_userptr *userptr; + struct hl_vm_phys_pg_pack *phys_pg_pack = NULL; + enum vm_type_t *vm_type; + bool once = true; + u64 j; + int i; + + if (!dev_entry->hdev->mmu_enable) + return 0; + + spin_lock(&dev_entry->ctx_mem_hash_spinlock); + + list_for_each_entry(ctx, &dev_entry->ctx_mem_hash_list, debugfs_list) { + once = false; + seq_puts(s, "\n\n----------------------------------------------------"); + seq_puts(s, "\n----------------------------------------------------\n\n"); + seq_printf(s, "ctx asid: %u\n", ctx->asid); + + seq_puts(s, "\nmappings:\n\n"); + seq_puts(s, " virtual address size handle\n"); + seq_puts(s, "----------------------------------------------------\n"); + mutex_lock(&ctx->mem_hash_lock); + hash_for_each(ctx->mem_hash, i, hnode, node) { + vm_type = hnode->ptr; + + if (*vm_type == VM_TYPE_USERPTR) { + userptr = hnode->ptr; + seq_printf(s, + " 0x%-14llx %-10u\n", + hnode->vaddr, userptr->size); + } else { + phys_pg_pack = hnode->ptr; + seq_printf(s, + " 0x%-14llx %-10llu %-4u\n", + hnode->vaddr, phys_pg_pack->total_size, + phys_pg_pack->handle); + } + } + mutex_unlock(&ctx->mem_hash_lock); + + vm = &ctx->hdev->vm; + spin_lock(&vm->idr_lock); + + if (!idr_is_empty(&vm->phys_pg_pack_handles)) + seq_puts(s, "\n\nallocations:\n"); + + idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_pack, i) { + if (phys_pg_pack->asid != ctx->asid) + continue; + + seq_printf(s, "\nhandle: %u\n", phys_pg_pack->handle); + seq_printf(s, "page size: %u\n\n", + phys_pg_pack->page_size); + seq_puts(s, " physical address\n"); + seq_puts(s, "---------------------\n"); + for (j = 0 ; j < phys_pg_pack->npages ; j++) { + seq_printf(s, " 0x%-14llx\n", + phys_pg_pack->pages[j]); + } + } + spin_unlock(&vm->idr_lock); + + } + + spin_unlock(&dev_entry->ctx_mem_hash_spinlock); + + if (!once) + seq_puts(s, "\n"); + + return 0; +} + +/* these inline functions are copied from mmu.c */ +static inline u64 get_hop0_addr(struct hl_ctx *ctx) +{ + return ctx->hdev->asic_prop.mmu_pgt_addr + + (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size); +} + +static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr, + u64 virt_addr, u64 mask, u64 shift) +{ + return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * + ((virt_addr & mask) >> shift); +} + +static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_specs, + u64 hop_addr, u64 vaddr) +{ + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop0_mask, + mmu_specs->hop0_shift); +} + +static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_specs, + u64 hop_addr, u64 vaddr) +{ + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop1_mask, + mmu_specs->hop1_shift); +} + +static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_specs, + u64 hop_addr, u64 vaddr) +{ + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop2_mask, + mmu_specs->hop2_shift); +} + +static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_specs, + u64 hop_addr, u64 vaddr) +{ + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop3_mask, + mmu_specs->hop3_shift); +} + +static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_specs, + u64 hop_addr, u64 vaddr) +{ + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop4_mask, + mmu_specs->hop4_shift); +} + +static inline u64 get_hop5_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_specs, + u64 hop_addr, u64 vaddr) +{ + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop5_mask, + mmu_specs->hop5_shift); +} + +static inline u64 get_next_hop_addr(u64 curr_pte) +{ + if (curr_pte & PAGE_PRESENT_MASK) + return curr_pte & HOP_PHYS_ADDR_MASK; + else + return ULLONG_MAX; +} + +static int mmu_show(struct seq_file *s, void *data) +{ + struct hl_debugfs_entry *entry = s->private; + struct hl_dbg_device_entry *dev_entry = entry->dev_entry; + struct hl_device *hdev = dev_entry->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_mmu_properties *mmu_prop; + struct hl_ctx *ctx; + bool is_dram_addr; + + u64 hop0_addr = 0, hop0_pte_addr = 0, hop0_pte = 0, + hop1_addr = 0, hop1_pte_addr = 0, hop1_pte = 0, + hop2_addr = 0, hop2_pte_addr = 0, hop2_pte = 0, + hop3_addr = 0, hop3_pte_addr = 0, hop3_pte = 0, + hop4_addr = 0, hop4_pte_addr = 0, hop4_pte = 0, + hop5_addr = 0, hop5_pte_addr = 0, hop5_pte = 0, + virt_addr = dev_entry->mmu_addr; + + if (!hdev->mmu_enable) + return 0; + + if (dev_entry->mmu_asid == HL_KERNEL_ASID_ID) + ctx = hdev->kernel_ctx; + else + ctx = hdev->compute_ctx; + + if (!ctx) { + dev_err(hdev->dev, "no ctx available\n"); + return 0; + } + + is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, + prop->dmmu.start_addr, + prop->dmmu.end_addr); + + /* shifts and masks are the same in PMMU and HPMMU, use one of them */ + mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; + + mutex_lock(&ctx->mmu_lock); + + /* the following lookup is copied from unmap() in mmu.c */ + + hop0_addr = get_hop0_addr(ctx); + hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr); + hop0_pte = hdev->asic_funcs->read_pte(hdev, hop0_pte_addr); + hop1_addr = get_next_hop_addr(hop0_pte); + + if (hop1_addr == ULLONG_MAX) + goto not_mapped; + + hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr); + hop1_pte = hdev->asic_funcs->read_pte(hdev, hop1_pte_addr); + hop2_addr = get_next_hop_addr(hop1_pte); + + if (hop2_addr == ULLONG_MAX) + goto not_mapped; + + hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr); + hop2_pte = hdev->asic_funcs->read_pte(hdev, hop2_pte_addr); + hop3_addr = get_next_hop_addr(hop2_pte); + + if (hop3_addr == ULLONG_MAX) + goto not_mapped; + + hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr); + hop3_pte = hdev->asic_funcs->read_pte(hdev, hop3_pte_addr); + + if (mmu_prop->num_hops == MMU_ARCH_5_HOPS) { + if (!(hop3_pte & LAST_MASK)) { + hop4_addr = get_next_hop_addr(hop3_pte); + + if (hop4_addr == ULLONG_MAX) + goto not_mapped; + + hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, + hop4_addr, virt_addr); + hop4_pte = hdev->asic_funcs->read_pte(hdev, + hop4_pte_addr); + if (!(hop4_pte & PAGE_PRESENT_MASK)) + goto not_mapped; + } else { + if (!(hop3_pte & PAGE_PRESENT_MASK)) + goto not_mapped; + } + } else { + hop4_addr = get_next_hop_addr(hop3_pte); + + if (hop4_addr == ULLONG_MAX) + goto not_mapped; + + hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, + hop4_addr, virt_addr); + hop4_pte = hdev->asic_funcs->read_pte(hdev, + hop4_pte_addr); + if (!(hop4_pte & LAST_MASK)) { + hop5_addr = get_next_hop_addr(hop4_pte); + + if (hop5_addr == ULLONG_MAX) + goto not_mapped; + + hop5_pte_addr = get_hop5_pte_addr(ctx, mmu_prop, + hop5_addr, virt_addr); + hop5_pte = hdev->asic_funcs->read_pte(hdev, + hop5_pte_addr); + if (!(hop5_pte & PAGE_PRESENT_MASK)) + goto not_mapped; + } else { + if (!(hop4_pte & PAGE_PRESENT_MASK)) + goto not_mapped; + } + } + + seq_printf(s, "asid: %u, virt_addr: 0x%llx\n", + dev_entry->mmu_asid, dev_entry->mmu_addr); + + seq_printf(s, "hop0_addr: 0x%llx\n", hop0_addr); + seq_printf(s, "hop0_pte_addr: 0x%llx\n", hop0_pte_addr); + seq_printf(s, "hop0_pte: 0x%llx\n", hop0_pte); + + seq_printf(s, "hop1_addr: 0x%llx\n", hop1_addr); + seq_printf(s, "hop1_pte_addr: 0x%llx\n", hop1_pte_addr); + seq_printf(s, "hop1_pte: 0x%llx\n", hop1_pte); + + seq_printf(s, "hop2_addr: 0x%llx\n", hop2_addr); + seq_printf(s, "hop2_pte_addr: 0x%llx\n", hop2_pte_addr); + seq_printf(s, "hop2_pte: 0x%llx\n", hop2_pte); + + seq_printf(s, "hop3_addr: 0x%llx\n", hop3_addr); + seq_printf(s, "hop3_pte_addr: 0x%llx\n", hop3_pte_addr); + seq_printf(s, "hop3_pte: 0x%llx\n", hop3_pte); + + if (mmu_prop->num_hops == MMU_ARCH_5_HOPS) { + if (!(hop3_pte & LAST_MASK)) { + seq_printf(s, "hop4_addr: 0x%llx\n", hop4_addr); + seq_printf(s, "hop4_pte_addr: 0x%llx\n", hop4_pte_addr); + seq_printf(s, "hop4_pte: 0x%llx\n", hop4_pte); + } + } else { + seq_printf(s, "hop4_addr: 0x%llx\n", hop4_addr); + seq_printf(s, "hop4_pte_addr: 0x%llx\n", hop4_pte_addr); + seq_printf(s, "hop4_pte: 0x%llx\n", hop4_pte); + + if (!(hop4_pte & LAST_MASK)) { + seq_printf(s, "hop5_addr: 0x%llx\n", hop5_addr); + seq_printf(s, "hop5_pte_addr: 0x%llx\n", hop5_pte_addr); + seq_printf(s, "hop5_pte: 0x%llx\n", hop5_pte); + } + } + + goto out; + +not_mapped: + dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n", + virt_addr); +out: + mutex_unlock(&ctx->mmu_lock); + + return 0; +} + +static ssize_t mmu_asid_va_write(struct file *file, const char __user *buf, + size_t count, loff_t *f_pos) +{ + struct seq_file *s = file->private_data; + struct hl_debugfs_entry *entry = s->private; + struct hl_dbg_device_entry *dev_entry = entry->dev_entry; + struct hl_device *hdev = dev_entry->hdev; + char kbuf[MMU_KBUF_SIZE]; + char *c; + ssize_t rc; + + if (!hdev->mmu_enable) + return count; + + if (count > sizeof(kbuf) - 1) + goto err; + if (copy_from_user(kbuf, buf, count)) + goto err; + kbuf[count] = 0; + + c = strchr(kbuf, ' '); + if (!c) + goto err; + *c = '\0'; + + rc = kstrtouint(kbuf, 10, &dev_entry->mmu_asid); + if (rc) + goto err; + + if (strncmp(c+1, "0x", 2)) + goto err; + rc = kstrtoull(c+3, 16, &dev_entry->mmu_addr); + if (rc) + goto err; + + return count; + +err: + dev_err(hdev->dev, "usage: echo <asid> <0xaddr> > mmu\n"); + + return -EINVAL; +} + +static int engines_show(struct seq_file *s, void *data) +{ + struct hl_debugfs_entry *entry = s->private; + struct hl_dbg_device_entry *dev_entry = entry->dev_entry; + struct hl_device *hdev = dev_entry->hdev; + + if (atomic_read(&hdev->in_reset)) { + dev_warn_ratelimited(hdev->dev, + "Can't check device idle during reset\n"); + return 0; + } + + hdev->asic_funcs->is_device_idle(hdev, NULL, s); + + return 0; +} + +static bool hl_is_device_va(struct hl_device *hdev, u64 addr) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + + if (!hdev->mmu_enable) + goto out; + + if (hdev->dram_supports_virtual_memory && + (addr >= prop->dmmu.start_addr && addr < prop->dmmu.end_addr)) + return true; + + if (addr >= prop->pmmu.start_addr && + addr < prop->pmmu.end_addr) + return true; + + if (addr >= prop->pmmu_huge.start_addr && + addr < prop->pmmu_huge.end_addr) + return true; +out: + return false; +} + +static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, + u64 *phys_addr) +{ + struct hl_ctx *ctx = hdev->compute_ctx; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_mmu_properties *mmu_prop; + u64 hop_addr, hop_pte_addr, hop_pte; + u64 offset_mask = HOP4_MASK | FLAGS_MASK; + int rc = 0; + bool is_dram_addr; + + if (!ctx) { + dev_err(hdev->dev, "no ctx available\n"); + return -EINVAL; + } + + is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, + prop->dmmu.start_addr, + prop->dmmu.end_addr); + + /* shifts and masks are the same in PMMU and HPMMU, use one of them */ + mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; + + mutex_lock(&ctx->mmu_lock); + + /* hop 0 */ + hop_addr = get_hop0_addr(ctx); + hop_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); + hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); + + /* hop 1 */ + hop_addr = get_next_hop_addr(hop_pte); + if (hop_addr == ULLONG_MAX) + goto not_mapped; + hop_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); + hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); + + /* hop 2 */ + hop_addr = get_next_hop_addr(hop_pte); + if (hop_addr == ULLONG_MAX) + goto not_mapped; + hop_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); + hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); + + /* hop 3 */ + hop_addr = get_next_hop_addr(hop_pte); + if (hop_addr == ULLONG_MAX) + goto not_mapped; + hop_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); + hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); + + if (!(hop_pte & LAST_MASK)) { + /* hop 4 */ + hop_addr = get_next_hop_addr(hop_pte); + if (hop_addr == ULLONG_MAX) + goto not_mapped; + hop_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop_addr, + virt_addr); + hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); + + offset_mask = FLAGS_MASK; + } + + if (!(hop_pte & PAGE_PRESENT_MASK)) + goto not_mapped; + + *phys_addr = (hop_pte & ~offset_mask) | (virt_addr & offset_mask); + + goto out; + +not_mapped: + dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n", + virt_addr); + rc = -EINVAL; +out: + mutex_unlock(&ctx->mmu_lock); + return rc; +} + +static ssize_t hl_data_read32(struct file *f, char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + char tmp_buf[32]; + u64 addr = entry->addr; + u32 val; + ssize_t rc; + + if (atomic_read(&hdev->in_reset)) { + dev_warn_ratelimited(hdev->dev, "Can't read during reset\n"); + return 0; + } + + if (*ppos) + return 0; + + if (hl_is_device_va(hdev, addr)) { + rc = device_va_to_pa(hdev, addr, &addr); + if (rc) + return rc; + } + + rc = hdev->asic_funcs->debugfs_read32(hdev, addr, &val); + if (rc) { + dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr); + return rc; + } + + sprintf(tmp_buf, "0x%08x\n", val); + return simple_read_from_buffer(buf, count, ppos, tmp_buf, + strlen(tmp_buf)); +} + +static ssize_t hl_data_write32(struct file *f, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + u64 addr = entry->addr; + u32 value; + ssize_t rc; + + if (atomic_read(&hdev->in_reset)) { + dev_warn_ratelimited(hdev->dev, "Can't write during reset\n"); + return 0; + } + + rc = kstrtouint_from_user(buf, count, 16, &value); + if (rc) + return rc; + + if (hl_is_device_va(hdev, addr)) { + rc = device_va_to_pa(hdev, addr, &addr); + if (rc) + return rc; + } + + rc = hdev->asic_funcs->debugfs_write32(hdev, addr, value); + if (rc) { + dev_err(hdev->dev, "Failed to write 0x%08x to 0x%010llx\n", + value, addr); + return rc; + } + + return count; +} + +static ssize_t hl_data_read64(struct file *f, char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + char tmp_buf[32]; + u64 addr = entry->addr; + u64 val; + ssize_t rc; + + if (*ppos) + return 0; + + if (hl_is_device_va(hdev, addr)) { + rc = device_va_to_pa(hdev, addr, &addr); + if (rc) + return rc; + } + + rc = hdev->asic_funcs->debugfs_read64(hdev, addr, &val); + if (rc) { + dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr); + return rc; + } + + sprintf(tmp_buf, "0x%016llx\n", val); + return simple_read_from_buffer(buf, count, ppos, tmp_buf, + strlen(tmp_buf)); +} + +static ssize_t hl_data_write64(struct file *f, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + u64 addr = entry->addr; + u64 value; + ssize_t rc; + + rc = kstrtoull_from_user(buf, count, 16, &value); + if (rc) + return rc; + + if (hl_is_device_va(hdev, addr)) { + rc = device_va_to_pa(hdev, addr, &addr); + if (rc) + return rc; + } + + rc = hdev->asic_funcs->debugfs_write64(hdev, addr, value); + if (rc) { + dev_err(hdev->dev, "Failed to write 0x%016llx to 0x%010llx\n", + value, addr); + return rc; + } + + return count; +} + +static ssize_t hl_get_power_state(struct file *f, char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + char tmp_buf[200]; + int i; + + if (*ppos) + return 0; + + if (hdev->pdev->current_state == PCI_D0) + i = 1; + else if (hdev->pdev->current_state == PCI_D3hot) + i = 2; + else + i = 3; + + sprintf(tmp_buf, + "current power state: %d\n1 - D0\n2 - D3hot\n3 - Unknown\n", i); + return simple_read_from_buffer(buf, count, ppos, tmp_buf, + strlen(tmp_buf)); +} + +static ssize_t hl_set_power_state(struct file *f, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + u32 value; + ssize_t rc; + + rc = kstrtouint_from_user(buf, count, 10, &value); + if (rc) + return rc; + + if (value == 1) { + pci_set_power_state(hdev->pdev, PCI_D0); + pci_restore_state(hdev->pdev); + rc = pci_enable_device(hdev->pdev); + if (rc < 0) + return rc; + } else if (value == 2) { + pci_save_state(hdev->pdev); + pci_disable_device(hdev->pdev); + pci_set_power_state(hdev->pdev, PCI_D3hot); + } else { + dev_dbg(hdev->dev, "invalid power state value %u\n", value); + return -EINVAL; + } + + return count; +} + +static ssize_t hl_i2c_data_read(struct file *f, char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + char tmp_buf[32]; + long val; + ssize_t rc; + + if (*ppos) + return 0; + + rc = hl_debugfs_i2c_read(hdev, entry->i2c_bus, entry->i2c_addr, + entry->i2c_reg, &val); + if (rc) { + dev_err(hdev->dev, + "Failed to read from I2C bus %d, addr %d, reg %d\n", + entry->i2c_bus, entry->i2c_addr, entry->i2c_reg); + return rc; + } + + sprintf(tmp_buf, "0x%02lx\n", val); + rc = simple_read_from_buffer(buf, count, ppos, tmp_buf, + strlen(tmp_buf)); + + return rc; +} + +static ssize_t hl_i2c_data_write(struct file *f, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + u32 value; + ssize_t rc; + + rc = kstrtouint_from_user(buf, count, 16, &value); + if (rc) + return rc; + + rc = hl_debugfs_i2c_write(hdev, entry->i2c_bus, entry->i2c_addr, + entry->i2c_reg, value); + if (rc) { + dev_err(hdev->dev, + "Failed to write 0x%02x to I2C bus %d, addr %d, reg %d\n", + value, entry->i2c_bus, entry->i2c_addr, entry->i2c_reg); + return rc; + } + + return count; +} + +static ssize_t hl_led0_write(struct file *f, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + u32 value; + ssize_t rc; + + rc = kstrtouint_from_user(buf, count, 10, &value); + if (rc) + return rc; + + value = value ? 1 : 0; + + hl_debugfs_led_set(hdev, 0, value); + + return count; +} + +static ssize_t hl_led1_write(struct file *f, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + u32 value; + ssize_t rc; + + rc = kstrtouint_from_user(buf, count, 10, &value); + if (rc) + return rc; + + value = value ? 1 : 0; + + hl_debugfs_led_set(hdev, 1, value); + + return count; +} + +static ssize_t hl_led2_write(struct file *f, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + u32 value; + ssize_t rc; + + rc = kstrtouint_from_user(buf, count, 10, &value); + if (rc) + return rc; + + value = value ? 1 : 0; + + hl_debugfs_led_set(hdev, 2, value); + + return count; +} + +static ssize_t hl_device_read(struct file *f, char __user *buf, + size_t count, loff_t *ppos) +{ + static const char *help = + "Valid values: disable, enable, suspend, resume, cpu_timeout\n"; + return simple_read_from_buffer(buf, count, ppos, help, strlen(help)); +} + +static ssize_t hl_device_write(struct file *f, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + char data[30] = {0}; + + /* don't allow partial writes */ + if (*ppos != 0) + return 0; + + simple_write_to_buffer(data, 29, ppos, buf, count); + + if (strncmp("disable", data, strlen("disable")) == 0) { + hdev->disabled = true; + } else if (strncmp("enable", data, strlen("enable")) == 0) { + hdev->disabled = false; + } else if (strncmp("suspend", data, strlen("suspend")) == 0) { + hdev->asic_funcs->suspend(hdev); + } else if (strncmp("resume", data, strlen("resume")) == 0) { + hdev->asic_funcs->resume(hdev); + } else if (strncmp("cpu_timeout", data, strlen("cpu_timeout")) == 0) { + hdev->device_cpu_disabled = true; + } else { + dev_err(hdev->dev, + "Valid values: disable, enable, suspend, resume, cpu_timeout\n"); + count = -EINVAL; + } + + return count; +} + +static ssize_t hl_clk_gate_read(struct file *f, char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + char tmp_buf[200]; + ssize_t rc; + + if (*ppos) + return 0; + + sprintf(tmp_buf, "0x%llx\n", hdev->clock_gating_mask); + rc = simple_read_from_buffer(buf, count, ppos, tmp_buf, + strlen(tmp_buf) + 1); + + return rc; +} + +static ssize_t hl_clk_gate_write(struct file *f, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + u64 value; + ssize_t rc; + + if (atomic_read(&hdev->in_reset)) { + dev_warn_ratelimited(hdev->dev, + "Can't change clock gating during reset\n"); + return 0; + } + + rc = kstrtoull_from_user(buf, count, 16, &value); + if (rc) + return rc; + + hdev->clock_gating_mask = value; + hdev->asic_funcs->set_clock_gating(hdev); + + return count; +} + +static ssize_t hl_stop_on_err_read(struct file *f, char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + char tmp_buf[200]; + ssize_t rc; + + if (*ppos) + return 0; + + sprintf(tmp_buf, "%d\n", hdev->stop_on_err); + rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf, + strlen(tmp_buf) + 1); + + return rc; +} + +static ssize_t hl_stop_on_err_write(struct file *f, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + u32 value; + ssize_t rc; + + if (atomic_read(&hdev->in_reset)) { + dev_warn_ratelimited(hdev->dev, + "Can't change stop on error during reset\n"); + return 0; + } + + rc = kstrtouint_from_user(buf, count, 10, &value); + if (rc) + return rc; + + hdev->stop_on_err = value ? 1 : 0; + + hl_device_reset(hdev, false, false); + + return count; +} + +static const struct file_operations hl_data32b_fops = { + .owner = THIS_MODULE, + .read = hl_data_read32, + .write = hl_data_write32 +}; + +static const struct file_operations hl_data64b_fops = { + .owner = THIS_MODULE, + .read = hl_data_read64, + .write = hl_data_write64 +}; + +static const struct file_operations hl_i2c_data_fops = { + .owner = THIS_MODULE, + .read = hl_i2c_data_read, + .write = hl_i2c_data_write +}; + +static const struct file_operations hl_power_fops = { + .owner = THIS_MODULE, + .read = hl_get_power_state, + .write = hl_set_power_state +}; + +static const struct file_operations hl_led0_fops = { + .owner = THIS_MODULE, + .write = hl_led0_write +}; + +static const struct file_operations hl_led1_fops = { + .owner = THIS_MODULE, + .write = hl_led1_write +}; + +static const struct file_operations hl_led2_fops = { + .owner = THIS_MODULE, + .write = hl_led2_write +}; + +static const struct file_operations hl_device_fops = { + .owner = THIS_MODULE, + .read = hl_device_read, + .write = hl_device_write +}; + +static const struct file_operations hl_clk_gate_fops = { + .owner = THIS_MODULE, + .read = hl_clk_gate_read, + .write = hl_clk_gate_write +}; + +static const struct file_operations hl_stop_on_err_fops = { + .owner = THIS_MODULE, + .read = hl_stop_on_err_read, + .write = hl_stop_on_err_write +}; + +static const struct hl_info_list hl_debugfs_list[] = { + {"command_buffers", command_buffers_show, NULL}, + {"command_submission", command_submission_show, NULL}, + {"command_submission_jobs", command_submission_jobs_show, NULL}, + {"userptr", userptr_show, NULL}, + {"vm", vm_show, NULL}, + {"mmu", mmu_show, mmu_asid_va_write}, + {"engines", engines_show, NULL} +}; + +static int hl_debugfs_open(struct inode *inode, struct file *file) +{ + struct hl_debugfs_entry *node = inode->i_private; + + return single_open(file, node->info_ent->show, node); +} + +static ssize_t hl_debugfs_write(struct file *file, const char __user *buf, + size_t count, loff_t *f_pos) +{ + struct hl_debugfs_entry *node = file->f_inode->i_private; + + if (node->info_ent->write) + return node->info_ent->write(file, buf, count, f_pos); + else + return -EINVAL; + +} + +static const struct file_operations hl_debugfs_fops = { + .owner = THIS_MODULE, + .open = hl_debugfs_open, + .read = seq_read, + .write = hl_debugfs_write, + .llseek = seq_lseek, + .release = single_release, +}; + +void hl_debugfs_add_device(struct hl_device *hdev) +{ + struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs; + int count = ARRAY_SIZE(hl_debugfs_list); + struct hl_debugfs_entry *entry; + struct dentry *ent; + int i; + + dev_entry->hdev = hdev; + dev_entry->entry_arr = kmalloc_array(count, + sizeof(struct hl_debugfs_entry), + GFP_KERNEL); + if (!dev_entry->entry_arr) + return; + + INIT_LIST_HEAD(&dev_entry->file_list); + INIT_LIST_HEAD(&dev_entry->cb_list); + INIT_LIST_HEAD(&dev_entry->cs_list); + INIT_LIST_HEAD(&dev_entry->cs_job_list); + INIT_LIST_HEAD(&dev_entry->userptr_list); + INIT_LIST_HEAD(&dev_entry->ctx_mem_hash_list); + mutex_init(&dev_entry->file_mutex); + spin_lock_init(&dev_entry->cb_spinlock); + spin_lock_init(&dev_entry->cs_spinlock); + spin_lock_init(&dev_entry->cs_job_spinlock); + spin_lock_init(&dev_entry->userptr_spinlock); + spin_lock_init(&dev_entry->ctx_mem_hash_spinlock); + + dev_entry->root = debugfs_create_dir(dev_name(hdev->dev), + hl_debug_root); + + debugfs_create_x64("addr", + 0644, + dev_entry->root, + &dev_entry->addr); + + debugfs_create_file("data32", + 0644, + dev_entry->root, + dev_entry, + &hl_data32b_fops); + + debugfs_create_file("data64", + 0644, + dev_entry->root, + dev_entry, + &hl_data64b_fops); + + debugfs_create_file("set_power_state", + 0200, + dev_entry->root, + dev_entry, + &hl_power_fops); + + debugfs_create_u8("i2c_bus", + 0644, + dev_entry->root, + &dev_entry->i2c_bus); + + debugfs_create_u8("i2c_addr", + 0644, + dev_entry->root, + &dev_entry->i2c_addr); + + debugfs_create_u8("i2c_reg", + 0644, + dev_entry->root, + &dev_entry->i2c_reg); + + debugfs_create_file("i2c_data", + 0644, + dev_entry->root, + dev_entry, + &hl_i2c_data_fops); + + debugfs_create_file("led0", + 0200, + dev_entry->root, + dev_entry, + &hl_led0_fops); + + debugfs_create_file("led1", + 0200, + dev_entry->root, + dev_entry, + &hl_led1_fops); + + debugfs_create_file("led2", + 0200, + dev_entry->root, + dev_entry, + &hl_led2_fops); + + debugfs_create_file("device", + 0200, + dev_entry->root, + dev_entry, + &hl_device_fops); + + debugfs_create_file("clk_gate", + 0200, + dev_entry->root, + dev_entry, + &hl_clk_gate_fops); + + debugfs_create_file("stop_on_err", + 0644, + dev_entry->root, + dev_entry, + &hl_stop_on_err_fops); + + for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) { + + ent = debugfs_create_file(hl_debugfs_list[i].name, + 0444, + dev_entry->root, + entry, + &hl_debugfs_fops); + entry->dent = ent; + entry->info_ent = &hl_debugfs_list[i]; + entry->dev_entry = dev_entry; + } +} + +void hl_debugfs_remove_device(struct hl_device *hdev) +{ + struct hl_dbg_device_entry *entry = &hdev->hl_debugfs; + + debugfs_remove_recursive(entry->root); + + mutex_destroy(&entry->file_mutex); + kfree(entry->entry_arr); +} + +void hl_debugfs_add_file(struct hl_fpriv *hpriv) +{ + struct hl_dbg_device_entry *dev_entry = &hpriv->hdev->hl_debugfs; + + mutex_lock(&dev_entry->file_mutex); + list_add(&hpriv->debugfs_list, &dev_entry->file_list); + mutex_unlock(&dev_entry->file_mutex); +} + +void hl_debugfs_remove_file(struct hl_fpriv *hpriv) +{ + struct hl_dbg_device_entry *dev_entry = &hpriv->hdev->hl_debugfs; + + mutex_lock(&dev_entry->file_mutex); + list_del(&hpriv->debugfs_list); + mutex_unlock(&dev_entry->file_mutex); +} + +void hl_debugfs_add_cb(struct hl_cb *cb) +{ + struct hl_dbg_device_entry *dev_entry = &cb->hdev->hl_debugfs; + + spin_lock(&dev_entry->cb_spinlock); + list_add(&cb->debugfs_list, &dev_entry->cb_list); + spin_unlock(&dev_entry->cb_spinlock); +} + +void hl_debugfs_remove_cb(struct hl_cb *cb) +{ + struct hl_dbg_device_entry *dev_entry = &cb->hdev->hl_debugfs; + + spin_lock(&dev_entry->cb_spinlock); + list_del(&cb->debugfs_list); + spin_unlock(&dev_entry->cb_spinlock); +} + +void hl_debugfs_add_cs(struct hl_cs *cs) +{ + struct hl_dbg_device_entry *dev_entry = &cs->ctx->hdev->hl_debugfs; + + spin_lock(&dev_entry->cs_spinlock); + list_add(&cs->debugfs_list, &dev_entry->cs_list); + spin_unlock(&dev_entry->cs_spinlock); +} + +void hl_debugfs_remove_cs(struct hl_cs *cs) +{ + struct hl_dbg_device_entry *dev_entry = &cs->ctx->hdev->hl_debugfs; + + spin_lock(&dev_entry->cs_spinlock); + list_del(&cs->debugfs_list); + spin_unlock(&dev_entry->cs_spinlock); +} + +void hl_debugfs_add_job(struct hl_device *hdev, struct hl_cs_job *job) +{ + struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs; + + spin_lock(&dev_entry->cs_job_spinlock); + list_add(&job->debugfs_list, &dev_entry->cs_job_list); + spin_unlock(&dev_entry->cs_job_spinlock); +} + +void hl_debugfs_remove_job(struct hl_device *hdev, struct hl_cs_job *job) +{ + struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs; + + spin_lock(&dev_entry->cs_job_spinlock); + list_del(&job->debugfs_list); + spin_unlock(&dev_entry->cs_job_spinlock); +} + +void hl_debugfs_add_userptr(struct hl_device *hdev, struct hl_userptr *userptr) +{ + struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs; + + spin_lock(&dev_entry->userptr_spinlock); + list_add(&userptr->debugfs_list, &dev_entry->userptr_list); + spin_unlock(&dev_entry->userptr_spinlock); +} + +void hl_debugfs_remove_userptr(struct hl_device *hdev, + struct hl_userptr *userptr) +{ + struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs; + + spin_lock(&dev_entry->userptr_spinlock); + list_del(&userptr->debugfs_list); + spin_unlock(&dev_entry->userptr_spinlock); +} + +void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx) +{ + struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs; + + spin_lock(&dev_entry->ctx_mem_hash_spinlock); + list_add(&ctx->debugfs_list, &dev_entry->ctx_mem_hash_list); + spin_unlock(&dev_entry->ctx_mem_hash_spinlock); +} + +void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx) +{ + struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs; + + spin_lock(&dev_entry->ctx_mem_hash_spinlock); + list_del(&ctx->debugfs_list); + spin_unlock(&dev_entry->ctx_mem_hash_spinlock); +} + +void __init hl_debugfs_init(void) +{ + hl_debug_root = debugfs_create_dir("habanalabs", NULL); +} + +void hl_debugfs_fini(void) +{ + debugfs_remove_recursive(hl_debug_root); +} diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c new file mode 100644 index 000000000..7b0bf4707 --- /dev/null +++ b/drivers/misc/habanalabs/common/device.c @@ -0,0 +1,1541 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#define pr_fmt(fmt) "habanalabs: " fmt + +#include "habanalabs.h" + +#include <linux/pci.h> +#include <linux/sched/signal.h> +#include <linux/hwmon.h> +#include <uapi/misc/habanalabs.h> + +#define HL_PLDM_PENDING_RESET_PER_SEC (HL_PENDING_RESET_PER_SEC * 10) + +bool hl_device_disabled_or_in_reset(struct hl_device *hdev) +{ + if ((hdev->disabled) || (atomic_read(&hdev->in_reset))) + return true; + else + return false; +} + +enum hl_device_status hl_device_status(struct hl_device *hdev) +{ + enum hl_device_status status; + + if (hdev->disabled) + status = HL_DEVICE_STATUS_MALFUNCTION; + else if (atomic_read(&hdev->in_reset)) + status = HL_DEVICE_STATUS_IN_RESET; + else + status = HL_DEVICE_STATUS_OPERATIONAL; + + return status; +} + +static void hpriv_release(struct kref *ref) +{ + struct hl_fpriv *hpriv; + struct hl_device *hdev; + + hpriv = container_of(ref, struct hl_fpriv, refcount); + + hdev = hpriv->hdev; + + put_pid(hpriv->taskpid); + + hl_debugfs_remove_file(hpriv); + + mutex_destroy(&hpriv->restore_phase_mutex); + + mutex_lock(&hdev->fpriv_list_lock); + list_del(&hpriv->dev_node); + hdev->compute_ctx = NULL; + mutex_unlock(&hdev->fpriv_list_lock); + + kfree(hpriv); +} + +void hl_hpriv_get(struct hl_fpriv *hpriv) +{ + kref_get(&hpriv->refcount); +} + +void hl_hpriv_put(struct hl_fpriv *hpriv) +{ + kref_put(&hpriv->refcount, hpriv_release); +} + +/* + * hl_device_release - release function for habanalabs device + * + * @inode: pointer to inode structure + * @filp: pointer to file structure + * + * Called when process closes an habanalabs device + */ +static int hl_device_release(struct inode *inode, struct file *filp) +{ + struct hl_fpriv *hpriv = filp->private_data; + + hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr); + hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr); + + filp->private_data = NULL; + + hl_hpriv_put(hpriv); + + return 0; +} + +static int hl_device_release_ctrl(struct inode *inode, struct file *filp) +{ + struct hl_fpriv *hpriv = filp->private_data; + struct hl_device *hdev; + + filp->private_data = NULL; + + hdev = hpriv->hdev; + + mutex_lock(&hdev->fpriv_list_lock); + list_del(&hpriv->dev_node); + mutex_unlock(&hdev->fpriv_list_lock); + + put_pid(hpriv->taskpid); + + kfree(hpriv); + + return 0; +} + +/* + * hl_mmap - mmap function for habanalabs device + * + * @*filp: pointer to file structure + * @*vma: pointer to vm_area_struct of the process + * + * Called when process does an mmap on habanalabs device. Call the device's mmap + * function at the end of the common code. + */ +static int hl_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct hl_fpriv *hpriv = filp->private_data; + unsigned long vm_pgoff; + + vm_pgoff = vma->vm_pgoff; + vma->vm_pgoff = HL_MMAP_OFFSET_VALUE_GET(vm_pgoff); + + switch (vm_pgoff & HL_MMAP_TYPE_MASK) { + case HL_MMAP_TYPE_CB: + return hl_cb_mmap(hpriv, vma); + } + + return -EINVAL; +} + +static const struct file_operations hl_ops = { + .owner = THIS_MODULE, + .open = hl_device_open, + .release = hl_device_release, + .mmap = hl_mmap, + .unlocked_ioctl = hl_ioctl, + .compat_ioctl = hl_ioctl +}; + +static const struct file_operations hl_ctrl_ops = { + .owner = THIS_MODULE, + .open = hl_device_open_ctrl, + .release = hl_device_release_ctrl, + .unlocked_ioctl = hl_ioctl_control, + .compat_ioctl = hl_ioctl_control +}; + +static void device_release_func(struct device *dev) +{ + kfree(dev); +} + +/* + * device_init_cdev - Initialize cdev and device for habanalabs device + * + * @hdev: pointer to habanalabs device structure + * @hclass: pointer to the class object of the device + * @minor: minor number of the specific device + * @fpos: file operations to install for this device + * @name: name of the device as it will appear in the filesystem + * @cdev: pointer to the char device object that will be initialized + * @dev: pointer to the device object that will be initialized + * + * Initialize a cdev and a Linux device for habanalabs's device. + */ +static int device_init_cdev(struct hl_device *hdev, struct class *hclass, + int minor, const struct file_operations *fops, + char *name, struct cdev *cdev, + struct device **dev) +{ + cdev_init(cdev, fops); + cdev->owner = THIS_MODULE; + + *dev = kzalloc(sizeof(**dev), GFP_KERNEL); + if (!*dev) + return -ENOMEM; + + device_initialize(*dev); + (*dev)->devt = MKDEV(hdev->major, minor); + (*dev)->class = hclass; + (*dev)->release = device_release_func; + dev_set_drvdata(*dev, hdev); + dev_set_name(*dev, "%s", name); + + return 0; +} + +static int device_cdev_sysfs_add(struct hl_device *hdev) +{ + int rc; + + rc = cdev_device_add(&hdev->cdev, hdev->dev); + if (rc) { + dev_err(hdev->dev, + "failed to add a char device to the system\n"); + return rc; + } + + rc = cdev_device_add(&hdev->cdev_ctrl, hdev->dev_ctrl); + if (rc) { + dev_err(hdev->dev, + "failed to add a control char device to the system\n"); + goto delete_cdev_device; + } + + /* hl_sysfs_init() must be done after adding the device to the system */ + rc = hl_sysfs_init(hdev); + if (rc) { + dev_err(hdev->dev, "failed to initialize sysfs\n"); + goto delete_ctrl_cdev_device; + } + + hdev->cdev_sysfs_created = true; + + return 0; + +delete_ctrl_cdev_device: + cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl); +delete_cdev_device: + cdev_device_del(&hdev->cdev, hdev->dev); + return rc; +} + +static void device_cdev_sysfs_del(struct hl_device *hdev) +{ + if (!hdev->cdev_sysfs_created) + goto put_devices; + + hl_sysfs_fini(hdev); + cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl); + cdev_device_del(&hdev->cdev, hdev->dev); + +put_devices: + put_device(hdev->dev); + put_device(hdev->dev_ctrl); +} + +/* + * device_early_init - do some early initialization for the habanalabs device + * + * @hdev: pointer to habanalabs device structure + * + * Install the relevant function pointers and call the early_init function, + * if such a function exists + */ +static int device_early_init(struct hl_device *hdev) +{ + int i, rc; + char workq_name[32]; + + switch (hdev->asic_type) { + case ASIC_GOYA: + goya_set_asic_funcs(hdev); + strlcpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name)); + break; + case ASIC_GAUDI: + gaudi_set_asic_funcs(hdev); + sprintf(hdev->asic_name, "GAUDI"); + break; + default: + dev_err(hdev->dev, "Unrecognized ASIC type %d\n", + hdev->asic_type); + return -EINVAL; + } + + rc = hdev->asic_funcs->early_init(hdev); + if (rc) + return rc; + + rc = hl_asid_init(hdev); + if (rc) + goto early_fini; + + if (hdev->asic_prop.completion_queues_count) { + hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count, + sizeof(*hdev->cq_wq), + GFP_ATOMIC); + if (!hdev->cq_wq) { + rc = -ENOMEM; + goto asid_fini; + } + } + + for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) { + snprintf(workq_name, 32, "hl-free-jobs-%u", (u32) i); + hdev->cq_wq[i] = create_singlethread_workqueue(workq_name); + if (hdev->cq_wq[i] == NULL) { + dev_err(hdev->dev, "Failed to allocate CQ workqueue\n"); + rc = -ENOMEM; + goto free_cq_wq; + } + } + + hdev->eq_wq = alloc_workqueue("hl-events", WQ_UNBOUND, 0); + if (hdev->eq_wq == NULL) { + dev_err(hdev->dev, "Failed to allocate EQ workqueue\n"); + rc = -ENOMEM; + goto free_cq_wq; + } + + hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info), + GFP_KERNEL); + if (!hdev->hl_chip_info) { + rc = -ENOMEM; + goto free_eq_wq; + } + + hdev->idle_busy_ts_arr = kmalloc_array(HL_IDLE_BUSY_TS_ARR_SIZE, + sizeof(struct hl_device_idle_busy_ts), + (GFP_KERNEL | __GFP_ZERO)); + if (!hdev->idle_busy_ts_arr) { + rc = -ENOMEM; + goto free_chip_info; + } + + rc = hl_mmu_if_set_funcs(hdev); + if (rc) + goto free_idle_busy_ts_arr; + + hl_cb_mgr_init(&hdev->kernel_cb_mgr); + + mutex_init(&hdev->send_cpu_message_lock); + mutex_init(&hdev->debug_lock); + mutex_init(&hdev->mmu_cache_lock); + INIT_LIST_HEAD(&hdev->hw_queues_mirror_list); + spin_lock_init(&hdev->hw_queues_mirror_lock); + INIT_LIST_HEAD(&hdev->fpriv_list); + mutex_init(&hdev->fpriv_list_lock); + atomic_set(&hdev->in_reset, 0); + + return 0; + +free_idle_busy_ts_arr: + kfree(hdev->idle_busy_ts_arr); +free_chip_info: + kfree(hdev->hl_chip_info); +free_eq_wq: + destroy_workqueue(hdev->eq_wq); +free_cq_wq: + for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) + if (hdev->cq_wq[i]) + destroy_workqueue(hdev->cq_wq[i]); + kfree(hdev->cq_wq); +asid_fini: + hl_asid_fini(hdev); +early_fini: + if (hdev->asic_funcs->early_fini) + hdev->asic_funcs->early_fini(hdev); + + return rc; +} + +/* + * device_early_fini - finalize all that was done in device_early_init + * + * @hdev: pointer to habanalabs device structure + * + */ +static void device_early_fini(struct hl_device *hdev) +{ + int i; + + mutex_destroy(&hdev->mmu_cache_lock); + mutex_destroy(&hdev->debug_lock); + mutex_destroy(&hdev->send_cpu_message_lock); + + mutex_destroy(&hdev->fpriv_list_lock); + + hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr); + + kfree(hdev->idle_busy_ts_arr); + kfree(hdev->hl_chip_info); + + destroy_workqueue(hdev->eq_wq); + + for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) + destroy_workqueue(hdev->cq_wq[i]); + kfree(hdev->cq_wq); + + hl_asid_fini(hdev); + + if (hdev->asic_funcs->early_fini) + hdev->asic_funcs->early_fini(hdev); +} + +static void set_freq_to_low_job(struct work_struct *work) +{ + struct hl_device *hdev = container_of(work, struct hl_device, + work_freq.work); + + mutex_lock(&hdev->fpriv_list_lock); + + if (!hdev->compute_ctx) + hl_device_set_frequency(hdev, PLL_LOW); + + mutex_unlock(&hdev->fpriv_list_lock); + + schedule_delayed_work(&hdev->work_freq, + usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC)); +} + +static void hl_device_heartbeat(struct work_struct *work) +{ + struct hl_device *hdev = container_of(work, struct hl_device, + work_heartbeat.work); + + if (hl_device_disabled_or_in_reset(hdev)) + goto reschedule; + + if (!hdev->asic_funcs->send_heartbeat(hdev)) + goto reschedule; + + dev_err(hdev->dev, "Device heartbeat failed!\n"); + hl_device_reset(hdev, true, false); + + return; + +reschedule: + schedule_delayed_work(&hdev->work_heartbeat, + usecs_to_jiffies(HL_HEARTBEAT_PER_USEC)); +} + +/* + * device_late_init - do late stuff initialization for the habanalabs device + * + * @hdev: pointer to habanalabs device structure + * + * Do stuff that either needs the device H/W queues to be active or needs + * to happen after all the rest of the initialization is finished + */ +static int device_late_init(struct hl_device *hdev) +{ + int rc; + + if (hdev->asic_funcs->late_init) { + rc = hdev->asic_funcs->late_init(hdev); + if (rc) { + dev_err(hdev->dev, + "failed late initialization for the H/W\n"); + return rc; + } + } + + hdev->high_pll = hdev->asic_prop.high_pll; + + /* force setting to low frequency */ + hdev->curr_pll_profile = PLL_LOW; + + if (hdev->pm_mng_profile == PM_AUTO) + hdev->asic_funcs->set_pll_profile(hdev, PLL_LOW); + else + hdev->asic_funcs->set_pll_profile(hdev, PLL_LAST); + + INIT_DELAYED_WORK(&hdev->work_freq, set_freq_to_low_job); + schedule_delayed_work(&hdev->work_freq, + usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC)); + + if (hdev->heartbeat) { + INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat); + schedule_delayed_work(&hdev->work_heartbeat, + usecs_to_jiffies(HL_HEARTBEAT_PER_USEC)); + } + + hdev->late_init_done = true; + + return 0; +} + +/* + * device_late_fini - finalize all that was done in device_late_init + * + * @hdev: pointer to habanalabs device structure + * + */ +static void device_late_fini(struct hl_device *hdev) +{ + if (!hdev->late_init_done) + return; + + cancel_delayed_work_sync(&hdev->work_freq); + if (hdev->heartbeat) + cancel_delayed_work_sync(&hdev->work_heartbeat); + + if (hdev->asic_funcs->late_fini) + hdev->asic_funcs->late_fini(hdev); + + hdev->late_init_done = false; +} + +uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms) +{ + struct hl_device_idle_busy_ts *ts; + ktime_t zero_ktime, curr = ktime_get(); + u32 overlap_cnt = 0, last_index = hdev->idle_busy_ts_idx; + s64 period_us, last_start_us, last_end_us, last_busy_time_us, + total_busy_time_us = 0, total_busy_time_ms; + + zero_ktime = ktime_set(0, 0); + period_us = period_ms * USEC_PER_MSEC; + ts = &hdev->idle_busy_ts_arr[last_index]; + + /* check case that device is currently in idle */ + if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime) && + !ktime_compare(ts->idle_to_busy_ts, zero_ktime)) { + + last_index--; + /* Handle case idle_busy_ts_idx was 0 */ + if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE) + last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1; + + ts = &hdev->idle_busy_ts_arr[last_index]; + } + + while (overlap_cnt < HL_IDLE_BUSY_TS_ARR_SIZE) { + /* Check if we are in last sample case. i.e. if the sample + * begun before the sampling period. This could be a real + * sample or 0 so need to handle both cases + */ + last_start_us = ktime_to_us( + ktime_sub(curr, ts->idle_to_busy_ts)); + + if (last_start_us > period_us) { + + /* First check two cases: + * 1. If the device is currently busy + * 2. If the device was idle during the whole sampling + * period + */ + + if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime)) { + /* Check if the device is currently busy */ + if (ktime_compare(ts->idle_to_busy_ts, + zero_ktime)) + return 100; + + /* We either didn't have any activity or we + * reached an entry which is 0. Either way, + * exit and return what was accumulated so far + */ + break; + } + + /* If sample has finished, check it is relevant */ + last_end_us = ktime_to_us( + ktime_sub(curr, ts->busy_to_idle_ts)); + + if (last_end_us > period_us) + break; + + /* It is relevant so add it but with adjustment */ + last_busy_time_us = ktime_to_us( + ktime_sub(ts->busy_to_idle_ts, + ts->idle_to_busy_ts)); + total_busy_time_us += last_busy_time_us - + (last_start_us - period_us); + break; + } + + /* Check if the sample is finished or still open */ + if (ktime_compare(ts->busy_to_idle_ts, zero_ktime)) + last_busy_time_us = ktime_to_us( + ktime_sub(ts->busy_to_idle_ts, + ts->idle_to_busy_ts)); + else + last_busy_time_us = ktime_to_us( + ktime_sub(curr, ts->idle_to_busy_ts)); + + total_busy_time_us += last_busy_time_us; + + last_index--; + /* Handle case idle_busy_ts_idx was 0 */ + if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE) + last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1; + + ts = &hdev->idle_busy_ts_arr[last_index]; + + overlap_cnt++; + } + + total_busy_time_ms = DIV_ROUND_UP_ULL(total_busy_time_us, + USEC_PER_MSEC); + + return DIV_ROUND_UP_ULL(total_busy_time_ms * 100, period_ms); +} + +/* + * hl_device_set_frequency - set the frequency of the device + * + * @hdev: pointer to habanalabs device structure + * @freq: the new frequency value + * + * Change the frequency if needed. This function has no protection against + * concurrency, therefore it is assumed that the calling function has protected + * itself against the case of calling this function from multiple threads with + * different values + * + * Returns 0 if no change was done, otherwise returns 1 + */ +int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq) +{ + if ((hdev->pm_mng_profile == PM_MANUAL) || + (hdev->curr_pll_profile == freq)) + return 0; + + dev_dbg(hdev->dev, "Changing device frequency to %s\n", + freq == PLL_HIGH ? "high" : "low"); + + hdev->asic_funcs->set_pll_profile(hdev, freq); + + hdev->curr_pll_profile = freq; + + return 1; +} + +int hl_device_set_debug_mode(struct hl_device *hdev, bool enable) +{ + int rc = 0; + + mutex_lock(&hdev->debug_lock); + + if (!enable) { + if (!hdev->in_debug) { + dev_err(hdev->dev, + "Failed to disable debug mode because device was not in debug mode\n"); + rc = -EFAULT; + goto out; + } + + if (!hdev->hard_reset_pending) + hdev->asic_funcs->halt_coresight(hdev); + + hdev->in_debug = 0; + + if (!hdev->hard_reset_pending) + hdev->asic_funcs->set_clock_gating(hdev); + + goto out; + } + + if (hdev->in_debug) { + dev_err(hdev->dev, + "Failed to enable debug mode because device is already in debug mode\n"); + rc = -EFAULT; + goto out; + } + + hdev->asic_funcs->disable_clock_gating(hdev); + hdev->in_debug = 1; + +out: + mutex_unlock(&hdev->debug_lock); + + return rc; +} + +/* + * hl_device_suspend - initiate device suspend + * + * @hdev: pointer to habanalabs device structure + * + * Puts the hw in the suspend state (all asics). + * Returns 0 for success or an error on failure. + * Called at driver suspend. + */ +int hl_device_suspend(struct hl_device *hdev) +{ + int rc; + + pci_save_state(hdev->pdev); + + /* Block future CS/VM/JOB completion operations */ + rc = atomic_cmpxchg(&hdev->in_reset, 0, 1); + if (rc) { + dev_err(hdev->dev, "Can't suspend while in reset\n"); + return -EIO; + } + + /* This blocks all other stuff that is not blocked by in_reset */ + hdev->disabled = true; + + /* + * Flush anyone that is inside the critical section of enqueue + * jobs to the H/W + */ + hdev->asic_funcs->hw_queues_lock(hdev); + hdev->asic_funcs->hw_queues_unlock(hdev); + + /* Flush processes that are sending message to CPU */ + mutex_lock(&hdev->send_cpu_message_lock); + mutex_unlock(&hdev->send_cpu_message_lock); + + rc = hdev->asic_funcs->suspend(hdev); + if (rc) + dev_err(hdev->dev, + "Failed to disable PCI access of device CPU\n"); + + /* Shut down the device */ + pci_disable_device(hdev->pdev); + pci_set_power_state(hdev->pdev, PCI_D3hot); + + return 0; +} + +/* + * hl_device_resume - initiate device resume + * + * @hdev: pointer to habanalabs device structure + * + * Bring the hw back to operating state (all asics). + * Returns 0 for success or an error on failure. + * Called at driver resume. + */ +int hl_device_resume(struct hl_device *hdev) +{ + int rc; + + pci_set_power_state(hdev->pdev, PCI_D0); + pci_restore_state(hdev->pdev); + rc = pci_enable_device_mem(hdev->pdev); + if (rc) { + dev_err(hdev->dev, + "Failed to enable PCI device in resume\n"); + return rc; + } + + pci_set_master(hdev->pdev); + + rc = hdev->asic_funcs->resume(hdev); + if (rc) { + dev_err(hdev->dev, "Failed to resume device after suspend\n"); + goto disable_device; + } + + + hdev->disabled = false; + atomic_set(&hdev->in_reset, 0); + + rc = hl_device_reset(hdev, true, false); + if (rc) { + dev_err(hdev->dev, "Failed to reset device during resume\n"); + goto disable_device; + } + + return 0; + +disable_device: + pci_clear_master(hdev->pdev); + pci_disable_device(hdev->pdev); + + return rc; +} + +static int device_kill_open_processes(struct hl_device *hdev) +{ + u16 pending_total, pending_cnt; + struct hl_fpriv *hpriv; + struct task_struct *task = NULL; + + if (hdev->pldm) + pending_total = HL_PLDM_PENDING_RESET_PER_SEC; + else + pending_total = HL_PENDING_RESET_PER_SEC; + + /* Giving time for user to close FD, and for processes that are inside + * hl_device_open to finish + */ + if (!list_empty(&hdev->fpriv_list)) + ssleep(1); + + mutex_lock(&hdev->fpriv_list_lock); + + /* This section must be protected because we are dereferencing + * pointers that are freed if the process exits + */ + list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node) { + task = get_pid_task(hpriv->taskpid, PIDTYPE_PID); + if (task) { + dev_info(hdev->dev, "Killing user process pid=%d\n", + task_pid_nr(task)); + send_sig(SIGKILL, task, 1); + usleep_range(1000, 10000); + + put_task_struct(task); + } + } + + mutex_unlock(&hdev->fpriv_list_lock); + + /* We killed the open users, but because the driver cleans up after the + * user contexts are closed (e.g. mmu mappings), we need to wait again + * to make sure the cleaning phase is finished before continuing with + * the reset + */ + + pending_cnt = pending_total; + + while ((!list_empty(&hdev->fpriv_list)) && (pending_cnt)) { + dev_info(hdev->dev, + "Waiting for all unmap operations to finish before hard reset\n"); + + pending_cnt--; + + ssleep(1); + } + + return list_empty(&hdev->fpriv_list) ? 0 : -EBUSY; +} + +static void device_hard_reset_pending(struct work_struct *work) +{ + struct hl_device_reset_work *device_reset_work = + container_of(work, struct hl_device_reset_work, reset_work); + struct hl_device *hdev = device_reset_work->hdev; + + hl_device_reset(hdev, true, true); + + kfree(device_reset_work); +} + +/* + * hl_device_reset - reset the device + * + * @hdev: pointer to habanalabs device structure + * @hard_reset: should we do hard reset to all engines or just reset the + * compute/dma engines + * @from_hard_reset_thread: is the caller the hard-reset thread + * + * Block future CS and wait for pending CS to be enqueued + * Call ASIC H/W fini + * Flush all completions + * Re-initialize all internal data structures + * Call ASIC H/W init, late_init + * Test queues + * Enable device + * + * Returns 0 for success or an error on failure. + */ +int hl_device_reset(struct hl_device *hdev, bool hard_reset, + bool from_hard_reset_thread) +{ + int i, rc; + + if (!hdev->init_done) { + dev_err(hdev->dev, + "Can't reset before initialization is done\n"); + return 0; + } + + if ((!hard_reset) && (!hdev->supports_soft_reset)) { + dev_dbg(hdev->dev, "Doing hard-reset instead of soft-reset\n"); + hard_reset = true; + } + + /* + * Prevent concurrency in this function - only one reset should be + * done at any given time. Only need to perform this if we didn't + * get from the dedicated hard reset thread + */ + if (!from_hard_reset_thread) { + /* Block future CS/VM/JOB completion operations */ + rc = atomic_cmpxchg(&hdev->in_reset, 0, 1); + if (rc) + return 0; + + if (hard_reset) { + /* Disable PCI access from device F/W so he won't send + * us additional interrupts. We disable MSI/MSI-X at + * the halt_engines function and we can't have the F/W + * sending us interrupts after that. We need to disable + * the access here because if the device is marked + * disable, the message won't be send. Also, in case + * of heartbeat, the device CPU is marked as disable + * so this message won't be sent + */ + if (hl_fw_send_pci_access_msg(hdev, + CPUCP_PACKET_DISABLE_PCI_ACCESS)) + dev_warn(hdev->dev, + "Failed to disable PCI access by F/W\n"); + } + + /* This also blocks future CS/VM/JOB completion operations */ + hdev->disabled = true; + + /* Flush anyone that is inside the critical section of enqueue + * jobs to the H/W + */ + hdev->asic_funcs->hw_queues_lock(hdev); + hdev->asic_funcs->hw_queues_unlock(hdev); + + /* Flush anyone that is inside device open */ + mutex_lock(&hdev->fpriv_list_lock); + mutex_unlock(&hdev->fpriv_list_lock); + + dev_err(hdev->dev, "Going to RESET device!\n"); + } + +again: + if ((hard_reset) && (!from_hard_reset_thread)) { + struct hl_device_reset_work *device_reset_work; + + hdev->hard_reset_pending = true; + + device_reset_work = kzalloc(sizeof(*device_reset_work), + GFP_ATOMIC); + if (!device_reset_work) { + rc = -ENOMEM; + goto out_err; + } + + /* + * Because the reset function can't run from interrupt or + * from heartbeat work, we need to call the reset function + * from a dedicated work + */ + INIT_WORK(&device_reset_work->reset_work, + device_hard_reset_pending); + device_reset_work->hdev = hdev; + schedule_work(&device_reset_work->reset_work); + + return 0; + } + + if (hard_reset) { + device_late_fini(hdev); + + /* + * Now that the heartbeat thread is closed, flush processes + * which are sending messages to CPU + */ + mutex_lock(&hdev->send_cpu_message_lock); + mutex_unlock(&hdev->send_cpu_message_lock); + } + + /* + * Halt the engines and disable interrupts so we won't get any more + * completions from H/W and we won't have any accesses from the + * H/W to the host machine + */ + hdev->asic_funcs->halt_engines(hdev, hard_reset); + + /* Go over all the queues, release all CS and their jobs */ + hl_cs_rollback_all(hdev); + + if (hard_reset) { + /* Kill processes here after CS rollback. This is because the + * process can't really exit until all its CSs are done, which + * is what we do in cs rollback + */ + rc = device_kill_open_processes(hdev); + if (rc) { + dev_crit(hdev->dev, + "Failed to kill all open processes, stopping hard reset\n"); + goto out_err; + } + + /* Flush the Event queue workers to make sure no other thread is + * reading or writing to registers during the reset + */ + flush_workqueue(hdev->eq_wq); + } + + /* Reset the H/W. It will be in idle state after this returns */ + hdev->asic_funcs->hw_fini(hdev, hard_reset); + + if (hard_reset) { + /* Release kernel context */ + if (hl_ctx_put(hdev->kernel_ctx) == 1) + hdev->kernel_ctx = NULL; + hl_vm_fini(hdev); + hl_mmu_fini(hdev); + hl_eq_reset(hdev, &hdev->event_queue); + } + + /* Re-initialize PI,CI to 0 in all queues (hw queue, cq) */ + hl_hw_queue_reset(hdev, hard_reset); + for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) + hl_cq_reset(hdev, &hdev->completion_queue[i]); + + hdev->idle_busy_ts_idx = 0; + hdev->idle_busy_ts_arr[0].busy_to_idle_ts = ktime_set(0, 0); + hdev->idle_busy_ts_arr[0].idle_to_busy_ts = ktime_set(0, 0); + + if (hdev->cs_active_cnt) + dev_crit(hdev->dev, "CS active cnt %d is not 0 during reset\n", + hdev->cs_active_cnt); + + mutex_lock(&hdev->fpriv_list_lock); + + /* Make sure the context switch phase will run again */ + if (hdev->compute_ctx) { + atomic_set(&hdev->compute_ctx->thread_ctx_switch_token, 1); + hdev->compute_ctx->thread_ctx_switch_wait_token = 0; + } + + mutex_unlock(&hdev->fpriv_list_lock); + + /* Finished tear-down, starting to re-initialize */ + + if (hard_reset) { + hdev->device_cpu_disabled = false; + hdev->hard_reset_pending = false; + + if (hdev->kernel_ctx) { + dev_crit(hdev->dev, + "kernel ctx was alive during hard reset, something is terribly wrong\n"); + rc = -EBUSY; + goto out_err; + } + + rc = hl_mmu_init(hdev); + if (rc) { + dev_err(hdev->dev, + "Failed to initialize MMU S/W after hard reset\n"); + goto out_err; + } + + /* Allocate the kernel context */ + hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), + GFP_KERNEL); + if (!hdev->kernel_ctx) { + rc = -ENOMEM; + hl_mmu_fini(hdev); + goto out_err; + } + + hdev->compute_ctx = NULL; + + rc = hl_ctx_init(hdev, hdev->kernel_ctx, true); + if (rc) { + dev_err(hdev->dev, + "failed to init kernel ctx in hard reset\n"); + kfree(hdev->kernel_ctx); + hdev->kernel_ctx = NULL; + hl_mmu_fini(hdev); + goto out_err; + } + } + + /* Device is now enabled as part of the initialization requires + * communication with the device firmware to get information that + * is required for the initialization itself + */ + hdev->disabled = false; + + rc = hdev->asic_funcs->hw_init(hdev); + if (rc) { + dev_err(hdev->dev, + "failed to initialize the H/W after reset\n"); + goto out_err; + } + + /* Check that the communication with the device is working */ + rc = hdev->asic_funcs->test_queues(hdev); + if (rc) { + dev_err(hdev->dev, + "Failed to detect if device is alive after reset\n"); + goto out_err; + } + + if (hard_reset) { + rc = device_late_init(hdev); + if (rc) { + dev_err(hdev->dev, + "Failed late init after hard reset\n"); + goto out_err; + } + + rc = hl_vm_init(hdev); + if (rc) { + dev_err(hdev->dev, + "Failed to init memory module after hard reset\n"); + goto out_err; + } + + hl_set_max_power(hdev); + } else { + rc = hdev->asic_funcs->soft_reset_late_init(hdev); + if (rc) { + dev_err(hdev->dev, + "Failed late init after soft reset\n"); + goto out_err; + } + } + + atomic_set(&hdev->in_reset, 0); + + if (hard_reset) + hdev->hard_reset_cnt++; + else + hdev->soft_reset_cnt++; + + dev_warn(hdev->dev, "Successfully finished resetting the device\n"); + + return 0; + +out_err: + hdev->disabled = true; + + if (hard_reset) { + dev_err(hdev->dev, + "Failed to reset! Device is NOT usable\n"); + hdev->hard_reset_cnt++; + } else { + dev_err(hdev->dev, + "Failed to do soft-reset, trying hard reset\n"); + hdev->soft_reset_cnt++; + hard_reset = true; + goto again; + } + + atomic_set(&hdev->in_reset, 0); + + return rc; +} + +/* + * hl_device_init - main initialization function for habanalabs device + * + * @hdev: pointer to habanalabs device structure + * + * Allocate an id for the device, do early initialization and then call the + * ASIC specific initialization functions. Finally, create the cdev and the + * Linux device to expose it to the user + */ +int hl_device_init(struct hl_device *hdev, struct class *hclass) +{ + int i, rc, cq_cnt, cq_ready_cnt; + char *name; + bool add_cdev_sysfs_on_err = false; + + name = kasprintf(GFP_KERNEL, "hl%d", hdev->id / 2); + if (!name) { + rc = -ENOMEM; + goto out_disabled; + } + + /* Initialize cdev and device structures */ + rc = device_init_cdev(hdev, hclass, hdev->id, &hl_ops, name, + &hdev->cdev, &hdev->dev); + + kfree(name); + + if (rc) + goto out_disabled; + + name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->id / 2); + if (!name) { + rc = -ENOMEM; + goto free_dev; + } + + /* Initialize cdev and device structures for control device */ + rc = device_init_cdev(hdev, hclass, hdev->id_control, &hl_ctrl_ops, + name, &hdev->cdev_ctrl, &hdev->dev_ctrl); + + kfree(name); + + if (rc) + goto free_dev; + + /* Initialize ASIC function pointers and perform early init */ + rc = device_early_init(hdev); + if (rc) + goto free_dev_ctrl; + + /* + * Start calling ASIC initialization. First S/W then H/W and finally + * late init + */ + rc = hdev->asic_funcs->sw_init(hdev); + if (rc) + goto early_fini; + + /* + * Initialize the H/W queues. Must be done before hw_init, because + * there the addresses of the kernel queue are being written to the + * registers of the device + */ + rc = hl_hw_queues_create(hdev); + if (rc) { + dev_err(hdev->dev, "failed to initialize kernel queues\n"); + goto sw_fini; + } + + cq_cnt = hdev->asic_prop.completion_queues_count; + + /* + * Initialize the completion queues. Must be done before hw_init, + * because there the addresses of the completion queues are being + * passed as arguments to request_irq + */ + if (cq_cnt) { + hdev->completion_queue = kcalloc(cq_cnt, + sizeof(*hdev->completion_queue), + GFP_KERNEL); + + if (!hdev->completion_queue) { + dev_err(hdev->dev, + "failed to allocate completion queues\n"); + rc = -ENOMEM; + goto hw_queues_destroy; + } + } + + for (i = 0, cq_ready_cnt = 0 ; i < cq_cnt ; i++, cq_ready_cnt++) { + rc = hl_cq_init(hdev, &hdev->completion_queue[i], + hdev->asic_funcs->get_queue_id_for_cq(hdev, i)); + if (rc) { + dev_err(hdev->dev, + "failed to initialize completion queue\n"); + goto cq_fini; + } + hdev->completion_queue[i].cq_idx = i; + } + + /* + * Initialize the event queue. Must be done before hw_init, + * because there the address of the event queue is being + * passed as argument to request_irq + */ + rc = hl_eq_init(hdev, &hdev->event_queue); + if (rc) { + dev_err(hdev->dev, "failed to initialize event queue\n"); + goto cq_fini; + } + + /* MMU S/W must be initialized before kernel context is created */ + rc = hl_mmu_init(hdev); + if (rc) { + dev_err(hdev->dev, "Failed to initialize MMU S/W structures\n"); + goto eq_fini; + } + + /* Allocate the kernel context */ + hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL); + if (!hdev->kernel_ctx) { + rc = -ENOMEM; + goto mmu_fini; + } + + hdev->compute_ctx = NULL; + + rc = hl_ctx_init(hdev, hdev->kernel_ctx, true); + if (rc) { + dev_err(hdev->dev, "failed to initialize kernel context\n"); + kfree(hdev->kernel_ctx); + goto mmu_fini; + } + + rc = hl_cb_pool_init(hdev); + if (rc) { + dev_err(hdev->dev, "failed to initialize CB pool\n"); + goto release_ctx; + } + + hl_debugfs_add_device(hdev); + + if (hdev->asic_funcs->get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { + dev_info(hdev->dev, + "H/W state is dirty, must reset before initializing\n"); + hdev->asic_funcs->halt_engines(hdev, true); + hdev->asic_funcs->hw_fini(hdev, true); + } + + /* + * From this point, in case of an error, add char devices and create + * sysfs nodes as part of the error flow, to allow debugging. + */ + add_cdev_sysfs_on_err = true; + + /* Device is now enabled as part of the initialization requires + * communication with the device firmware to get information that + * is required for the initialization itself + */ + hdev->disabled = false; + + rc = hdev->asic_funcs->hw_init(hdev); + if (rc) { + dev_err(hdev->dev, "failed to initialize the H/W\n"); + rc = 0; + goto out_disabled; + } + + /* Check that the communication with the device is working */ + rc = hdev->asic_funcs->test_queues(hdev); + if (rc) { + dev_err(hdev->dev, "Failed to detect if device is alive\n"); + rc = 0; + goto out_disabled; + } + + rc = device_late_init(hdev); + if (rc) { + dev_err(hdev->dev, "Failed late initialization\n"); + rc = 0; + goto out_disabled; + } + + dev_info(hdev->dev, "Found %s device with %lluGB DRAM\n", + hdev->asic_name, + hdev->asic_prop.dram_size / 1024 / 1024 / 1024); + + rc = hl_vm_init(hdev); + if (rc) { + dev_err(hdev->dev, "Failed to initialize memory module\n"); + rc = 0; + goto out_disabled; + } + + /* + * Expose devices and sysfs nodes to user. + * From here there is no need to add char devices and create sysfs nodes + * in case of an error. + */ + add_cdev_sysfs_on_err = false; + rc = device_cdev_sysfs_add(hdev); + if (rc) { + dev_err(hdev->dev, + "Failed to add char devices and sysfs nodes\n"); + rc = 0; + goto out_disabled; + } + + /* Need to call this again because the max power might change, + * depending on card type for certain ASICs + */ + hl_set_max_power(hdev); + + /* + * hl_hwmon_init() must be called after device_late_init(), because only + * there we get the information from the device about which + * hwmon-related sensors the device supports. + * Furthermore, it must be done after adding the device to the system. + */ + rc = hl_hwmon_init(hdev); + if (rc) { + dev_err(hdev->dev, "Failed to initialize hwmon\n"); + rc = 0; + goto out_disabled; + } + + dev_notice(hdev->dev, + "Successfully added device to habanalabs driver\n"); + + hdev->init_done = true; + + return 0; + +release_ctx: + if (hl_ctx_put(hdev->kernel_ctx) != 1) + dev_err(hdev->dev, + "kernel ctx is still alive on initialization failure\n"); +mmu_fini: + hl_mmu_fini(hdev); +eq_fini: + hl_eq_fini(hdev, &hdev->event_queue); +cq_fini: + for (i = 0 ; i < cq_ready_cnt ; i++) + hl_cq_fini(hdev, &hdev->completion_queue[i]); + kfree(hdev->completion_queue); +hw_queues_destroy: + hl_hw_queues_destroy(hdev); +sw_fini: + hdev->asic_funcs->sw_fini(hdev); +early_fini: + device_early_fini(hdev); +free_dev_ctrl: + put_device(hdev->dev_ctrl); +free_dev: + put_device(hdev->dev); +out_disabled: + hdev->disabled = true; + if (add_cdev_sysfs_on_err) + device_cdev_sysfs_add(hdev); + if (hdev->pdev) + dev_err(&hdev->pdev->dev, + "Failed to initialize hl%d. Device is NOT usable !\n", + hdev->id / 2); + else + pr_err("Failed to initialize hl%d. Device is NOT usable !\n", + hdev->id / 2); + + return rc; +} + +/* + * hl_device_fini - main tear-down function for habanalabs device + * + * @hdev: pointer to habanalabs device structure + * + * Destroy the device, call ASIC fini functions and release the id + */ +void hl_device_fini(struct hl_device *hdev) +{ + int i, rc; + ktime_t timeout; + + dev_info(hdev->dev, "Removing device\n"); + + /* + * This function is competing with the reset function, so try to + * take the reset atomic and if we are already in middle of reset, + * wait until reset function is finished. Reset function is designed + * to always finish. However, in Gaudi, because of all the network + * ports, the hard reset could take between 10-30 seconds + */ + + timeout = ktime_add_us(ktime_get(), + HL_HARD_RESET_MAX_TIMEOUT * 1000 * 1000); + rc = atomic_cmpxchg(&hdev->in_reset, 0, 1); + while (rc) { + usleep_range(50, 200); + rc = atomic_cmpxchg(&hdev->in_reset, 0, 1); + if (ktime_compare(ktime_get(), timeout) > 0) { + WARN(1, "Failed to remove device because reset function did not finish\n"); + return; + } + } + + /* Disable PCI access from device F/W so it won't send us additional + * interrupts. We disable MSI/MSI-X at the halt_engines function and we + * can't have the F/W sending us interrupts after that. We need to + * disable the access here because if the device is marked disable, the + * message won't be send. Also, in case of heartbeat, the device CPU is + * marked as disable so this message won't be sent + */ + hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS); + + /* Mark device as disabled */ + hdev->disabled = true; + + /* Flush anyone that is inside the critical section of enqueue + * jobs to the H/W + */ + hdev->asic_funcs->hw_queues_lock(hdev); + hdev->asic_funcs->hw_queues_unlock(hdev); + + /* Flush anyone that is inside device open */ + mutex_lock(&hdev->fpriv_list_lock); + mutex_unlock(&hdev->fpriv_list_lock); + + hdev->hard_reset_pending = true; + + hl_hwmon_fini(hdev); + + device_late_fini(hdev); + + hl_debugfs_remove_device(hdev); + + /* + * Halt the engines and disable interrupts so we won't get any more + * completions from H/W and we won't have any accesses from the + * H/W to the host machine + */ + hdev->asic_funcs->halt_engines(hdev, true); + + /* Go over all the queues, release all CS and their jobs */ + hl_cs_rollback_all(hdev); + + /* Kill processes here after CS rollback. This is because the process + * can't really exit until all its CSs are done, which is what we + * do in cs rollback + */ + rc = device_kill_open_processes(hdev); + if (rc) + dev_crit(hdev->dev, "Failed to kill all open processes\n"); + + hl_cb_pool_fini(hdev); + + /* Reset the H/W. It will be in idle state after this returns */ + hdev->asic_funcs->hw_fini(hdev, true); + + /* Release kernel context */ + if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1)) + dev_err(hdev->dev, "kernel ctx is still alive\n"); + + hl_vm_fini(hdev); + + hl_mmu_fini(hdev); + + hl_eq_fini(hdev, &hdev->event_queue); + + for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) + hl_cq_fini(hdev, &hdev->completion_queue[i]); + kfree(hdev->completion_queue); + + hl_hw_queues_destroy(hdev); + + /* Call ASIC S/W finalize function */ + hdev->asic_funcs->sw_fini(hdev); + + device_early_fini(hdev); + + /* Hide devices and sysfs nodes from user */ + device_cdev_sysfs_del(hdev); + + pr_info("removed device successfully\n"); +} + +/* + * MMIO register access helper functions. + */ + +/* + * hl_rreg - Read an MMIO register + * + * @hdev: pointer to habanalabs device structure + * @reg: MMIO register offset (in bytes) + * + * Returns the value of the MMIO register we are asked to read + * + */ +inline u32 hl_rreg(struct hl_device *hdev, u32 reg) +{ + return readl(hdev->rmmio + reg); +} + +/* + * hl_wreg - Write to an MMIO register + * + * @hdev: pointer to habanalabs device structure + * @reg: MMIO register offset (in bytes) + * @val: 32-bit value + * + * Writes the 32-bit value into the MMIO register + * + */ +inline void hl_wreg(struct hl_device *hdev, u32 reg, u32 val) +{ + writel(val, hdev->rmmio + reg); +} diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c new file mode 100644 index 000000000..13c6eebd4 --- /dev/null +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -0,0 +1,716 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include "habanalabs.h" +#include "../include/common/hl_boot_if.h" + +#include <linux/firmware.h> +#include <linux/genalloc.h> +#include <linux/io-64-nonatomic-lo-hi.h> +#include <linux/slab.h> + +#define FW_FILE_MAX_SIZE 0x1400000 /* maximum size of 20MB */ +/** + * hl_fw_load_fw_to_device() - Load F/W code to device's memory. + * + * @hdev: pointer to hl_device structure. + * @fw_name: the firmware image name + * @dst: IO memory mapped address space to copy firmware to + * + * Copy fw code from firmware file to device memory. + * + * Return: 0 on success, non-zero for failure. + */ +int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name, + void __iomem *dst) +{ + const struct firmware *fw; + const u64 *fw_data; + size_t fw_size; + int rc; + + rc = request_firmware(&fw, fw_name, hdev->dev); + if (rc) { + dev_err(hdev->dev, "Firmware file %s is not found!\n", fw_name); + goto out; + } + + fw_size = fw->size; + if ((fw_size % 4) != 0) { + dev_err(hdev->dev, "Illegal %s firmware size %zu\n", + fw_name, fw_size); + rc = -EINVAL; + goto out; + } + + dev_dbg(hdev->dev, "%s firmware size == %zu\n", fw_name, fw_size); + + if (fw_size > FW_FILE_MAX_SIZE) { + dev_err(hdev->dev, + "FW file size %zu exceeds maximum of %u bytes\n", + fw_size, FW_FILE_MAX_SIZE); + rc = -EINVAL; + goto out; + } + + fw_data = (const u64 *) fw->data; + + memcpy_toio(dst, fw_data, fw_size); + +out: + release_firmware(fw); + return rc; +} + +int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode) +{ + struct cpucp_packet pkt = {}; + + pkt.ctl = cpu_to_le32(opcode << CPUCP_PKT_CTL_OPCODE_SHIFT); + + return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, + sizeof(pkt), 0, NULL); +} + +int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, + u16 len, u32 timeout, long *result) +{ + struct cpucp_packet *pkt; + dma_addr_t pkt_dma_addr; + u32 tmp; + int rc = 0; + + pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len, + &pkt_dma_addr); + if (!pkt) { + dev_err(hdev->dev, + "Failed to allocate DMA memory for packet to CPU\n"); + return -ENOMEM; + } + + memcpy(pkt, msg, len); + + mutex_lock(&hdev->send_cpu_message_lock); + + if (hdev->disabled) + goto out; + + if (hdev->device_cpu_disabled) { + rc = -EIO; + goto out; + } + + rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, len, pkt_dma_addr); + if (rc) { + dev_err(hdev->dev, "Failed to send CB on CPU PQ (%d)\n", rc); + goto out; + } + + rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp, + (tmp == CPUCP_PACKET_FENCE_VAL), 1000, + timeout, true); + + hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id); + + if (rc == -ETIMEDOUT) { + dev_err(hdev->dev, "Device CPU packet timeout (0x%x)\n", tmp); + hdev->device_cpu_disabled = true; + goto out; + } + + tmp = le32_to_cpu(pkt->ctl); + + rc = (tmp & CPUCP_PKT_CTL_RC_MASK) >> CPUCP_PKT_CTL_RC_SHIFT; + if (rc) { + dev_err(hdev->dev, "F/W ERROR %d for CPU packet %d\n", + rc, + (tmp & CPUCP_PKT_CTL_OPCODE_MASK) + >> CPUCP_PKT_CTL_OPCODE_SHIFT); + rc = -EIO; + } else if (result) { + *result = (long) le64_to_cpu(pkt->result); + } + +out: + mutex_unlock(&hdev->send_cpu_message_lock); + + hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, len, pkt); + + return rc; +} + +int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type) +{ + struct cpucp_packet pkt; + long result; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ << + CPUCP_PKT_CTL_OPCODE_SHIFT); + pkt.value = cpu_to_le64(event_type); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + 0, &result); + + if (rc) + dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type); + + return rc; +} + +int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr, + size_t irq_arr_size) +{ + struct cpucp_unmask_irq_arr_packet *pkt; + size_t total_pkt_size; + long result; + int rc; + + total_pkt_size = sizeof(struct cpucp_unmask_irq_arr_packet) + + irq_arr_size; + + /* data should be aligned to 8 bytes in order to CPU-CP to copy it */ + total_pkt_size = (total_pkt_size + 0x7) & ~0x7; + + /* total_pkt_size is casted to u16 later on */ + if (total_pkt_size > USHRT_MAX) { + dev_err(hdev->dev, "too many elements in IRQ array\n"); + return -EINVAL; + } + + pkt = kzalloc(total_pkt_size, GFP_KERNEL); + if (!pkt) + return -ENOMEM; + + pkt->length = cpu_to_le32(irq_arr_size / sizeof(irq_arr[0])); + memcpy(&pkt->irqs, irq_arr, irq_arr_size); + + pkt->cpucp_pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY << + CPUCP_PKT_CTL_OPCODE_SHIFT); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt, + total_pkt_size, 0, &result); + + if (rc) + dev_err(hdev->dev, "failed to unmask IRQ array\n"); + + kfree(pkt); + + return rc; +} + +int hl_fw_test_cpu_queue(struct hl_device *hdev) +{ + struct cpucp_packet test_pkt = {}; + long result; + int rc; + + test_pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEST << + CPUCP_PKT_CTL_OPCODE_SHIFT); + test_pkt.value = cpu_to_le64(CPUCP_PACKET_FENCE_VAL); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &test_pkt, + sizeof(test_pkt), 0, &result); + + if (!rc) { + if (result != CPUCP_PACKET_FENCE_VAL) + dev_err(hdev->dev, + "CPU queue test failed (0x%08lX)\n", result); + } else { + dev_err(hdev->dev, "CPU queue test failed, error %d\n", rc); + } + + return rc; +} + +void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, + dma_addr_t *dma_handle) +{ + u64 kernel_addr; + + kernel_addr = gen_pool_alloc(hdev->cpu_accessible_dma_pool, size); + + *dma_handle = hdev->cpu_accessible_dma_address + + (kernel_addr - (u64) (uintptr_t) hdev->cpu_accessible_dma_mem); + + return (void *) (uintptr_t) kernel_addr; +} + +void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, + void *vaddr) +{ + gen_pool_free(hdev->cpu_accessible_dma_pool, (u64) (uintptr_t) vaddr, + size); +} + +int hl_fw_send_heartbeat(struct hl_device *hdev) +{ + struct cpucp_packet hb_pkt = {}; + long result; + int rc; + + hb_pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEST << + CPUCP_PKT_CTL_OPCODE_SHIFT); + hb_pkt.value = cpu_to_le64(CPUCP_PACKET_FENCE_VAL); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt, + sizeof(hb_pkt), 0, &result); + + if ((rc) || (result != CPUCP_PACKET_FENCE_VAL)) + rc = -EIO; + + return rc; +} + +int hl_fw_cpucp_info_get(struct hl_device *hdev) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct cpucp_packet pkt = {}; + void *cpucp_info_cpu_addr; + dma_addr_t cpucp_info_dma_addr; + long result; + int rc; + + cpucp_info_cpu_addr = + hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, + sizeof(struct cpucp_info), + &cpucp_info_dma_addr); + if (!cpucp_info_cpu_addr) { + dev_err(hdev->dev, + "Failed to allocate DMA memory for CPU-CP info packet\n"); + return -ENOMEM; + } + + memset(cpucp_info_cpu_addr, 0, sizeof(struct cpucp_info)); + + pkt.ctl = cpu_to_le32(CPUCP_PACKET_INFO_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); + pkt.addr = cpu_to_le64(cpucp_info_dma_addr); + pkt.data_max_size = cpu_to_le32(sizeof(struct cpucp_info)); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + HL_CPUCP_INFO_TIMEOUT_USEC, &result); + if (rc) { + dev_err(hdev->dev, + "Failed to handle CPU-CP info pkt, error %d\n", rc); + goto out; + } + + memcpy(&prop->cpucp_info, cpucp_info_cpu_addr, + sizeof(prop->cpucp_info)); + + rc = hl_build_hwmon_channel_info(hdev, prop->cpucp_info.sensors); + if (rc) { + dev_err(hdev->dev, + "Failed to build hwmon channel info, error %d\n", rc); + rc = -EFAULT; + goto out; + } + +out: + hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, + sizeof(struct cpucp_info), cpucp_info_cpu_addr); + + return rc; +} + +int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size) +{ + struct cpucp_packet pkt = {}; + void *eeprom_info_cpu_addr; + dma_addr_t eeprom_info_dma_addr; + long result; + int rc; + + eeprom_info_cpu_addr = + hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, + max_size, &eeprom_info_dma_addr); + if (!eeprom_info_cpu_addr) { + dev_err(hdev->dev, + "Failed to allocate DMA memory for CPU-CP EEPROM packet\n"); + return -ENOMEM; + } + + memset(eeprom_info_cpu_addr, 0, max_size); + + pkt.ctl = cpu_to_le32(CPUCP_PACKET_EEPROM_DATA_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); + pkt.addr = cpu_to_le64(eeprom_info_dma_addr); + pkt.data_max_size = cpu_to_le32(max_size); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + HL_CPUCP_EEPROM_TIMEOUT_USEC, &result); + + if (rc) { + dev_err(hdev->dev, + "Failed to handle CPU-CP EEPROM packet, error %d\n", + rc); + goto out; + } + + /* result contains the actual size */ + memcpy(data, eeprom_info_cpu_addr, min((size_t)result, max_size)); + +out: + hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, max_size, + eeprom_info_cpu_addr); + + return rc; +} + +int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev, + struct hl_info_pci_counters *counters) +{ + struct cpucp_packet pkt = {}; + long result; + int rc; + + pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_THROUGHPUT_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); + + /* Fetch PCI rx counter */ + pkt.index = cpu_to_le32(cpucp_pcie_throughput_rx); + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + HL_CPUCP_INFO_TIMEOUT_USEC, &result); + if (rc) { + dev_err(hdev->dev, + "Failed to handle CPU-CP PCI info pkt, error %d\n", rc); + return rc; + } + counters->rx_throughput = result; + + memset(&pkt, 0, sizeof(pkt)); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_THROUGHPUT_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); + + /* Fetch PCI tx counter */ + pkt.index = cpu_to_le32(cpucp_pcie_throughput_tx); + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + HL_CPUCP_INFO_TIMEOUT_USEC, &result); + if (rc) { + dev_err(hdev->dev, + "Failed to handle CPU-CP PCI info pkt, error %d\n", rc); + return rc; + } + counters->tx_throughput = result; + + /* Fetch PCI replay counter */ + memset(&pkt, 0, sizeof(pkt)); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_REPLAY_CNT_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + HL_CPUCP_INFO_TIMEOUT_USEC, &result); + if (rc) { + dev_err(hdev->dev, + "Failed to handle CPU-CP PCI info pkt, error %d\n", rc); + return rc; + } + counters->replay_cnt = (u32) result; + + return rc; +} + +int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy) +{ + struct cpucp_packet pkt = {}; + long result; + int rc; + + pkt.ctl = cpu_to_le32(CPUCP_PACKET_TOTAL_ENERGY_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + HL_CPUCP_INFO_TIMEOUT_USEC, &result); + if (rc) { + dev_err(hdev->dev, + "Failed to handle CpuCP total energy pkt, error %d\n", + rc); + return rc; + } + + *total_energy = result; + + return rc; +} + +static void fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg) +{ + u32 err_val; + + /* Some of the firmware status codes are deprecated in newer f/w + * versions. In those versions, the errors are reported + * in different registers. Therefore, we need to check those + * registers and print the exact errors. Moreover, there + * may be multiple errors, so we need to report on each error + * separately. Some of the error codes might indicate a state + * that is not an error per-se, but it is an error in production + * environment + */ + err_val = RREG32(boot_err0_reg); + if (!(err_val & CPU_BOOT_ERR0_ENABLED)) + return; + + if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL) + dev_err(hdev->dev, + "Device boot error - DRAM initialization failed\n"); + if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED) + dev_err(hdev->dev, "Device boot error - FIT image corrupted\n"); + if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL) + dev_err(hdev->dev, + "Device boot error - Thermal Sensor initialization failed\n"); + if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED) + dev_warn(hdev->dev, + "Device boot warning - Skipped DRAM initialization\n"); + if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED) + dev_warn(hdev->dev, + "Device boot error - Skipped waiting for BMC\n"); + if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY) + dev_err(hdev->dev, + "Device boot error - Serdes data from BMC not available\n"); + if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL) + dev_err(hdev->dev, + "Device boot error - NIC F/W initialization failed\n"); +} + +static void detect_cpu_boot_status(struct hl_device *hdev, u32 status) +{ + /* Some of the status codes below are deprecated in newer f/w + * versions but we keep them here for backward compatibility + */ + switch (status) { + case CPU_BOOT_STATUS_NA: + dev_err(hdev->dev, + "Device boot error - BTL did NOT run\n"); + break; + case CPU_BOOT_STATUS_IN_WFE: + dev_err(hdev->dev, + "Device boot error - Stuck inside WFE loop\n"); + break; + case CPU_BOOT_STATUS_IN_BTL: + dev_err(hdev->dev, + "Device boot error - Stuck in BTL\n"); + break; + case CPU_BOOT_STATUS_IN_PREBOOT: + dev_err(hdev->dev, + "Device boot error - Stuck in Preboot\n"); + break; + case CPU_BOOT_STATUS_IN_SPL: + dev_err(hdev->dev, + "Device boot error - Stuck in SPL\n"); + break; + case CPU_BOOT_STATUS_IN_UBOOT: + dev_err(hdev->dev, + "Device boot error - Stuck in u-boot\n"); + break; + case CPU_BOOT_STATUS_DRAM_INIT_FAIL: + dev_err(hdev->dev, + "Device boot error - DRAM initialization failed\n"); + break; + case CPU_BOOT_STATUS_UBOOT_NOT_READY: + dev_err(hdev->dev, + "Device boot error - u-boot stopped by user\n"); + break; + case CPU_BOOT_STATUS_TS_INIT_FAIL: + dev_err(hdev->dev, + "Device boot error - Thermal Sensor initialization failed\n"); + break; + default: + dev_err(hdev->dev, + "Device boot error - Invalid status code %d\n", + status); + break; + } +} + +int hl_fw_read_preboot_ver(struct hl_device *hdev, u32 cpu_boot_status_reg, + u32 boot_err0_reg, u32 timeout) +{ + u32 status; + int rc; + + if (!hdev->cpu_enable) + return 0; + + /* Need to check two possible scenarios: + * + * CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT - for newer firmwares where + * the preboot is waiting for the boot fit + * + * All other status values - for older firmwares where the uboot was + * loaded from the FLASH + */ + rc = hl_poll_timeout( + hdev, + cpu_boot_status_reg, + status, + (status == CPU_BOOT_STATUS_IN_UBOOT) || + (status == CPU_BOOT_STATUS_DRAM_RDY) || + (status == CPU_BOOT_STATUS_NIC_FW_RDY) || + (status == CPU_BOOT_STATUS_READY_TO_BOOT) || + (status == CPU_BOOT_STATUS_SRAM_AVAIL) || + (status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT), + 10000, + timeout); + + if (rc) { + dev_err(hdev->dev, "Failed to read preboot version\n"); + detect_cpu_boot_status(hdev, status); + fw_read_errors(hdev, boot_err0_reg); + return -EIO; + } + + hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_PREBOOT); + + return 0; +} + +int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, + u32 msg_to_cpu_reg, u32 cpu_msg_status_reg, + u32 boot_err0_reg, bool skip_bmc, + u32 cpu_timeout, u32 boot_fit_timeout) +{ + u32 status; + int rc; + + dev_info(hdev->dev, "Going to wait for device boot (up to %lds)\n", + cpu_timeout / USEC_PER_SEC); + + /* Wait for boot FIT request */ + rc = hl_poll_timeout( + hdev, + cpu_boot_status_reg, + status, + status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT, + 10000, + boot_fit_timeout); + + if (rc) { + dev_dbg(hdev->dev, + "No boot fit request received, resuming boot\n"); + } else { + rc = hdev->asic_funcs->load_boot_fit_to_device(hdev); + if (rc) + goto out; + + /* Clear device CPU message status */ + WREG32(cpu_msg_status_reg, CPU_MSG_CLR); + + /* Signal device CPU that boot loader is ready */ + WREG32(msg_to_cpu_reg, KMD_MSG_FIT_RDY); + + /* Poll for CPU device ack */ + rc = hl_poll_timeout( + hdev, + cpu_msg_status_reg, + status, + status == CPU_MSG_OK, + 10000, + boot_fit_timeout); + + if (rc) { + dev_err(hdev->dev, + "Timeout waiting for boot fit load ack\n"); + goto out; + } + + /* Clear message */ + WREG32(msg_to_cpu_reg, KMD_MSG_NA); + } + + /* Make sure CPU boot-loader is running */ + rc = hl_poll_timeout( + hdev, + cpu_boot_status_reg, + status, + (status == CPU_BOOT_STATUS_DRAM_RDY) || + (status == CPU_BOOT_STATUS_NIC_FW_RDY) || + (status == CPU_BOOT_STATUS_READY_TO_BOOT) || + (status == CPU_BOOT_STATUS_SRAM_AVAIL), + 10000, + cpu_timeout); + + /* Read U-Boot version now in case we will later fail */ + hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_UBOOT); + + if (rc) { + detect_cpu_boot_status(hdev, status); + rc = -EIO; + goto out; + } + + if (!hdev->fw_loading) { + dev_info(hdev->dev, "Skip loading FW\n"); + goto out; + } + + if (status == CPU_BOOT_STATUS_SRAM_AVAIL) + goto out; + + dev_info(hdev->dev, + "Loading firmware to device, may take some time...\n"); + + rc = hdev->asic_funcs->load_firmware_to_device(hdev); + if (rc) + goto out; + + if (skip_bmc) { + WREG32(msg_to_cpu_reg, KMD_MSG_SKIP_BMC); + + rc = hl_poll_timeout( + hdev, + cpu_boot_status_reg, + status, + (status == CPU_BOOT_STATUS_BMC_WAITING_SKIPPED), + 10000, + cpu_timeout); + + if (rc) { + dev_err(hdev->dev, + "Failed to get ACK on skipping BMC, %d\n", + status); + WREG32(msg_to_cpu_reg, KMD_MSG_NA); + rc = -EIO; + goto out; + } + } + + WREG32(msg_to_cpu_reg, KMD_MSG_FIT_RDY); + + rc = hl_poll_timeout( + hdev, + cpu_boot_status_reg, + status, + (status == CPU_BOOT_STATUS_SRAM_AVAIL), + 10000, + cpu_timeout); + + /* Clear message */ + WREG32(msg_to_cpu_reg, KMD_MSG_NA); + + if (rc) { + if (status == CPU_BOOT_STATUS_FIT_CORRUPTED) + dev_err(hdev->dev, + "Device reports FIT image is corrupted\n"); + else + dev_err(hdev->dev, + "Failed to load firmware to device, %d\n", + status); + + rc = -EIO; + goto out; + } + + dev_info(hdev->dev, "Successfully loaded firmware to device\n"); + +out: + fw_read_errors(hdev, boot_err0_reg); + + return rc; +} diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h new file mode 100644 index 000000000..6ed974d2d --- /dev/null +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -0,0 +1,2078 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + * + */ + +#ifndef HABANALABSP_H_ +#define HABANALABSP_H_ + +#include "../include/common/cpucp_if.h" +#include "../include/common/qman_if.h" +#include <uapi/misc/habanalabs.h> + +#include <linux/cdev.h> +#include <linux/iopoll.h> +#include <linux/irqreturn.h> +#include <linux/dma-direction.h> +#include <linux/scatterlist.h> +#include <linux/hashtable.h> +#include <linux/bitfield.h> + +#define HL_NAME "habanalabs" + +/* Use upper bits of mmap offset to store habana driver specific information. + * bits[63:62] - Encode mmap type + * bits[45:0] - mmap offset value + * + * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these + * defines are w.r.t to PAGE_SIZE + */ +#define HL_MMAP_TYPE_SHIFT (62 - PAGE_SHIFT) +#define HL_MMAP_TYPE_MASK (0x3ull << HL_MMAP_TYPE_SHIFT) +#define HL_MMAP_TYPE_CB (0x2ull << HL_MMAP_TYPE_SHIFT) + +#define HL_MMAP_OFFSET_VALUE_MASK (0x3FFFFFFFFFFFull >> PAGE_SHIFT) +#define HL_MMAP_OFFSET_VALUE_GET(off) (off & HL_MMAP_OFFSET_VALUE_MASK) + +#define HL_PENDING_RESET_PER_SEC 30 + +#define HL_HARD_RESET_MAX_TIMEOUT 120 + +#define HL_DEVICE_TIMEOUT_USEC 1000000 /* 1 s */ + +#define HL_HEARTBEAT_PER_USEC 5000000 /* 5 s */ + +#define HL_PLL_LOW_JOB_FREQ_USEC 5000000 /* 5 s */ + +#define HL_CPUCP_INFO_TIMEOUT_USEC 10000000 /* 10s */ +#define HL_CPUCP_EEPROM_TIMEOUT_USEC 10000000 /* 10s */ + +#define HL_PCI_ELBI_TIMEOUT_MSEC 10 /* 10ms */ + +#define HL_SIM_MAX_TIMEOUT_US 10000000 /* 10s */ + +#define HL_IDLE_BUSY_TS_ARR_SIZE 4096 + +/* Memory */ +#define MEM_HASH_TABLE_BITS 7 /* 1 << 7 buckets */ + +/* MMU */ +#define MMU_HASH_TABLE_BITS 7 /* 1 << 7 buckets */ + +/* + * HL_RSVD_SOBS 'sync stream' reserved sync objects per QMAN stream + * HL_RSVD_MONS 'sync stream' reserved monitors per QMAN stream + */ +#define HL_RSVD_SOBS 4 +#define HL_RSVD_MONS 2 + +#define HL_RSVD_SOBS_IN_USE 2 +#define HL_RSVD_MONS_IN_USE 1 + +#define HL_MAX_SOB_VAL (1 << 15) + +#define IS_POWER_OF_2(n) (n != 0 && ((n & (n - 1)) == 0)) +#define IS_MAX_PENDING_CS_VALID(n) (IS_POWER_OF_2(n) && (n > 1)) + +#define HL_PCI_NUM_BARS 6 + +#define HL_MAX_DCORES 4 + +/** + * struct pgt_info - MMU hop page info. + * @node: hash linked-list node for the pgts shadow hash of pgts. + * @phys_addr: physical address of the pgt. + * @shadow_addr: shadow hop in the host. + * @ctx: pointer to the owner ctx. + * @num_of_ptes: indicates how many ptes are used in the pgt. + * + * The MMU page tables hierarchy is placed on the DRAM. When a new level (hop) + * is needed during mapping, a new page is allocated and this structure holds + * its essential information. During unmapping, if no valid PTEs remained in the + * page, it is freed with its pgt_info structure. + */ +struct pgt_info { + struct hlist_node node; + u64 phys_addr; + u64 shadow_addr; + struct hl_ctx *ctx; + int num_of_ptes; +}; + +struct hl_device; +struct hl_fpriv; + +/** + * enum hl_pci_match_mode - pci match mode per region + * @PCI_ADDRESS_MATCH_MODE: address match mode + * @PCI_BAR_MATCH_MODE: bar match mode + */ +enum hl_pci_match_mode { + PCI_ADDRESS_MATCH_MODE, + PCI_BAR_MATCH_MODE +}; + +/** + * enum hl_fw_component - F/W components to read version through registers. + * @FW_COMP_UBOOT: u-boot. + * @FW_COMP_PREBOOT: preboot. + */ +enum hl_fw_component { + FW_COMP_UBOOT, + FW_COMP_PREBOOT +}; + +/** + * enum hl_queue_type - Supported QUEUE types. + * @QUEUE_TYPE_NA: queue is not available. + * @QUEUE_TYPE_EXT: external queue which is a DMA channel that may access the + * host. + * @QUEUE_TYPE_INT: internal queue that performs DMA inside the device's + * memories and/or operates the compute engines. + * @QUEUE_TYPE_CPU: S/W queue for communication with the device's CPU. + * @QUEUE_TYPE_HW: queue of DMA and compute engines jobs, for which completion + * notifications are sent by H/W. + */ +enum hl_queue_type { + QUEUE_TYPE_NA, + QUEUE_TYPE_EXT, + QUEUE_TYPE_INT, + QUEUE_TYPE_CPU, + QUEUE_TYPE_HW +}; + +enum hl_cs_type { + CS_TYPE_DEFAULT, + CS_TYPE_SIGNAL, + CS_TYPE_WAIT +}; + +/* + * struct hl_inbound_pci_region - inbound region descriptor + * @mode: pci match mode for this region + * @addr: region target address + * @size: region size in bytes + * @offset_in_bar: offset within bar (address match mode) + * @bar: bar id + */ +struct hl_inbound_pci_region { + enum hl_pci_match_mode mode; + u64 addr; + u64 size; + u64 offset_in_bar; + u8 bar; +}; + +/* + * struct hl_outbound_pci_region - outbound region descriptor + * @addr: region target address + * @size: region size in bytes + */ +struct hl_outbound_pci_region { + u64 addr; + u64 size; +}; + +/* + * struct hl_hw_sob - H/W SOB info. + * @hdev: habanalabs device structure. + * @kref: refcount of this SOB. The SOB will reset once the refcount is zero. + * @sob_id: id of this SOB. + * @q_idx: the H/W queue that uses this SOB. + */ +struct hl_hw_sob { + struct hl_device *hdev; + struct kref kref; + u32 sob_id; + u32 q_idx; +}; + +/** + * struct hw_queue_properties - queue information. + * @type: queue type. + * @driver_only: true if only the driver is allowed to send a job to this queue, + * false otherwise. + * @requires_kernel_cb: true if a CB handle must be provided for jobs on this + * queue, false otherwise (a CB address must be provided). + * @supports_sync_stream: True if queue supports sync stream + */ +struct hw_queue_properties { + enum hl_queue_type type; + u8 driver_only; + u8 requires_kernel_cb; + u8 supports_sync_stream; +}; + +/** + * enum vm_type_t - virtual memory mapping request information. + * @VM_TYPE_USERPTR: mapping of user memory to device virtual address. + * @VM_TYPE_PHYS_PACK: mapping of DRAM memory to device virtual address. + */ +enum vm_type_t { + VM_TYPE_USERPTR = 0x1, + VM_TYPE_PHYS_PACK = 0x2 +}; + +/** + * enum hl_device_hw_state - H/W device state. use this to understand whether + * to do reset before hw_init or not + * @HL_DEVICE_HW_STATE_CLEAN: H/W state is clean. i.e. after hard reset + * @HL_DEVICE_HW_STATE_DIRTY: H/W state is dirty. i.e. we started to execute + * hw_init + */ +enum hl_device_hw_state { + HL_DEVICE_HW_STATE_CLEAN = 0, + HL_DEVICE_HW_STATE_DIRTY +}; + +/** + * struct hl_mmu_properties - ASIC specific MMU address translation properties. + * @start_addr: virtual start address of the memory region. + * @end_addr: virtual end address of the memory region. + * @hop0_shift: shift of hop 0 mask. + * @hop1_shift: shift of hop 1 mask. + * @hop2_shift: shift of hop 2 mask. + * @hop3_shift: shift of hop 3 mask. + * @hop4_shift: shift of hop 4 mask. + * @hop5_shift: shift of hop 5 mask. + * @hop0_mask: mask to get the PTE address in hop 0. + * @hop1_mask: mask to get the PTE address in hop 1. + * @hop2_mask: mask to get the PTE address in hop 2. + * @hop3_mask: mask to get the PTE address in hop 3. + * @hop4_mask: mask to get the PTE address in hop 4. + * @hop5_mask: mask to get the PTE address in hop 5. + * @page_size: default page size used to allocate memory. + * @num_hops: The amount of hops supported by the translation table. + */ +struct hl_mmu_properties { + u64 start_addr; + u64 end_addr; + u64 hop0_shift; + u64 hop1_shift; + u64 hop2_shift; + u64 hop3_shift; + u64 hop4_shift; + u64 hop5_shift; + u64 hop0_mask; + u64 hop1_mask; + u64 hop2_mask; + u64 hop3_mask; + u64 hop4_mask; + u64 hop5_mask; + u32 page_size; + u32 num_hops; +}; + +/** + * struct asic_fixed_properties - ASIC specific immutable properties. + * @hw_queues_props: H/W queues properties. + * @cpucp_info: received various information from CPU-CP regarding the H/W, e.g. + * available sensors. + * @uboot_ver: F/W U-boot version. + * @preboot_ver: F/W Preboot version. + * @dmmu: DRAM MMU address translation properties. + * @pmmu: PCI (host) MMU address translation properties. + * @pmmu_huge: PCI (host) MMU address translation properties for memory + * allocated with huge pages. + * @sram_base_address: SRAM physical start address. + * @sram_end_address: SRAM physical end address. + * @sram_user_base_address - SRAM physical start address for user access. + * @dram_base_address: DRAM physical start address. + * @dram_end_address: DRAM physical end address. + * @dram_user_base_address: DRAM physical start address for user access. + * @dram_size: DRAM total size. + * @dram_pci_bar_size: size of PCI bar towards DRAM. + * @max_power_default: max power of the device after reset + * @dram_size_for_default_page_mapping: DRAM size needed to map to avoid page + * fault. + * @pcie_dbi_base_address: Base address of the PCIE_DBI block. + * @pcie_aux_dbi_reg_addr: Address of the PCIE_AUX DBI register. + * @mmu_pgt_addr: base physical address in DRAM of MMU page tables. + * @mmu_dram_default_page_addr: DRAM default page physical address. + * @cb_va_start_addr: virtual start address of command buffers which are mapped + * to the device's MMU. + * @cb_va_end_addr: virtual end address of command buffers which are mapped to + * the device's MMU. + * @mmu_pgt_size: MMU page tables total size. + * @mmu_pte_size: PTE size in MMU page tables. + * @mmu_hop_table_size: MMU hop table size. + * @mmu_hop0_tables_total_size: total size of MMU hop0 tables. + * @dram_page_size: page size for MMU DRAM allocation. + * @cfg_size: configuration space size on SRAM. + * @sram_size: total size of SRAM. + * @max_asid: maximum number of open contexts (ASIDs). + * @num_of_events: number of possible internal H/W IRQs. + * @psoc_pci_pll_nr: PCI PLL NR value. + * @psoc_pci_pll_nf: PCI PLL NF value. + * @psoc_pci_pll_od: PCI PLL OD value. + * @psoc_pci_pll_div_factor: PCI PLL DIV FACTOR 1 value. + * @psoc_timestamp_frequency: frequency of the psoc timestamp clock. + * @high_pll: high PLL frequency used by the device. + * @cb_pool_cb_cnt: number of CBs in the CB pool. + * @cb_pool_cb_size: size of each CB in the CB pool. + * @max_pending_cs: maximum of concurrent pending command submissions + * @max_queues: maximum amount of queues in the system + * @sync_stream_first_sob: first sync object available for sync stream use + * @sync_stream_first_mon: first monitor available for sync stream use + * @first_available_user_sob: first sob available for the user + * @first_available_user_mon: first monitor available for the user + * @tpc_enabled_mask: which TPCs are enabled. + * @completion_queues_count: number of completion queues. + * @fw_security_disabled: true if security measures are disabled in firmware, + * false otherwise + */ +struct asic_fixed_properties { + struct hw_queue_properties *hw_queues_props; + struct cpucp_info cpucp_info; + char uboot_ver[VERSION_MAX_LEN]; + char preboot_ver[VERSION_MAX_LEN]; + struct hl_mmu_properties dmmu; + struct hl_mmu_properties pmmu; + struct hl_mmu_properties pmmu_huge; + u64 sram_base_address; + u64 sram_end_address; + u64 sram_user_base_address; + u64 dram_base_address; + u64 dram_end_address; + u64 dram_user_base_address; + u64 dram_size; + u64 dram_pci_bar_size; + u64 max_power_default; + u64 dram_size_for_default_page_mapping; + u64 pcie_dbi_base_address; + u64 pcie_aux_dbi_reg_addr; + u64 mmu_pgt_addr; + u64 mmu_dram_default_page_addr; + u64 cb_va_start_addr; + u64 cb_va_end_addr; + u32 mmu_pgt_size; + u32 mmu_pte_size; + u32 mmu_hop_table_size; + u32 mmu_hop0_tables_total_size; + u32 dram_page_size; + u32 cfg_size; + u32 sram_size; + u32 max_asid; + u32 num_of_events; + u32 psoc_pci_pll_nr; + u32 psoc_pci_pll_nf; + u32 psoc_pci_pll_od; + u32 psoc_pci_pll_div_factor; + u32 psoc_timestamp_frequency; + u32 high_pll; + u32 cb_pool_cb_cnt; + u32 cb_pool_cb_size; + u32 max_pending_cs; + u32 max_queues; + u16 sync_stream_first_sob; + u16 sync_stream_first_mon; + u16 first_available_user_sob[HL_MAX_DCORES]; + u16 first_available_user_mon[HL_MAX_DCORES]; + u8 tpc_enabled_mask; + u8 completion_queues_count; + u8 fw_security_disabled; +}; + +/** + * struct hl_fence - software synchronization primitive + * @completion: fence is implemented using completion + * @refcount: refcount for this fence + * @error: mark this fence with error + * + */ +struct hl_fence { + struct completion completion; + struct kref refcount; + int error; +}; + +/** + * struct hl_cs_compl - command submission completion object. + * @base_fence: hl fence object. + * @lock: spinlock to protect fence. + * @hdev: habanalabs device structure. + * @hw_sob: the H/W SOB used in this signal/wait CS. + * @cs_seq: command submission sequence number. + * @type: type of the CS - signal/wait. + * @sob_val: the SOB value that is used in this signal/wait CS. + */ +struct hl_cs_compl { + struct hl_fence base_fence; + spinlock_t lock; + struct hl_device *hdev; + struct hl_hw_sob *hw_sob; + u64 cs_seq; + enum hl_cs_type type; + u16 sob_val; +}; + +/* + * Command Buffers + */ + +/** + * struct hl_cb_mgr - describes a Command Buffer Manager. + * @cb_lock: protects cb_handles. + * @cb_handles: an idr to hold all command buffer handles. + */ +struct hl_cb_mgr { + spinlock_t cb_lock; + struct idr cb_handles; /* protected by cb_lock */ +}; + +/** + * struct hl_cb - describes a Command Buffer. + * @refcount: reference counter for usage of the CB. + * @hdev: pointer to device this CB belongs to. + * @ctx: pointer to the CB owner's context. + * @lock: spinlock to protect mmap/cs flows. + * @debugfs_list: node in debugfs list of command buffers. + * @pool_list: node in pool list of command buffers. + * @va_block_list: list of virtual addresses blocks of the CB if it is mapped to + * the device's MMU. + * @id: the CB's ID. + * @kernel_address: Holds the CB's kernel virtual address. + * @bus_address: Holds the CB's DMA address. + * @mmap_size: Holds the CB's size that was mmaped. + * @size: holds the CB's size. + * @cs_cnt: holds number of CS that this CB participates in. + * @mmap: true if the CB is currently mmaped to user. + * @is_pool: true if CB was acquired from the pool, false otherwise. + * @is_internal: internaly allocated + * @is_mmu_mapped: true if the CB is mapped to the device's MMU. + */ +struct hl_cb { + struct kref refcount; + struct hl_device *hdev; + struct hl_ctx *ctx; + spinlock_t lock; + struct list_head debugfs_list; + struct list_head pool_list; + struct list_head va_block_list; + u64 id; + void *kernel_address; + dma_addr_t bus_address; + u32 mmap_size; + u32 size; + u32 cs_cnt; + u8 mmap; + u8 is_pool; + u8 is_internal; + u8 is_mmu_mapped; +}; + + +/* + * QUEUES + */ + +struct hl_cs_job; + +/* Queue length of external and HW queues */ +#define HL_QUEUE_LENGTH 4096 +#define HL_QUEUE_SIZE_IN_BYTES (HL_QUEUE_LENGTH * HL_BD_SIZE) + +#if (HL_MAX_JOBS_PER_CS > HL_QUEUE_LENGTH) +#error "HL_QUEUE_LENGTH must be greater than HL_MAX_JOBS_PER_CS" +#endif + +/* HL_CQ_LENGTH is in units of struct hl_cq_entry */ +#define HL_CQ_LENGTH HL_QUEUE_LENGTH +#define HL_CQ_SIZE_IN_BYTES (HL_CQ_LENGTH * HL_CQ_ENTRY_SIZE) + +/* Must be power of 2 */ +#define HL_EQ_LENGTH 64 +#define HL_EQ_SIZE_IN_BYTES (HL_EQ_LENGTH * HL_EQ_ENTRY_SIZE) + +/* Host <-> CPU-CP shared memory size */ +#define HL_CPU_ACCESSIBLE_MEM_SIZE SZ_2M + +/** + * struct hl_hw_queue - describes a H/W transport queue. + * @hw_sob: array of the used H/W SOBs by this H/W queue. + * @shadow_queue: pointer to a shadow queue that holds pointers to jobs. + * @queue_type: type of queue. + * @kernel_address: holds the queue's kernel virtual address. + * @bus_address: holds the queue's DMA address. + * @pi: holds the queue's pi value. + * @ci: holds the queue's ci value, AS CALCULATED BY THE DRIVER (not real ci). + * @hw_queue_id: the id of the H/W queue. + * @cq_id: the id for the corresponding CQ for this H/W queue. + * @msi_vec: the IRQ number of the H/W queue. + * @int_queue_len: length of internal queue (number of entries). + * @next_sob_val: the next value to use for the currently used SOB. + * @base_sob_id: the base SOB id of the SOBs used by this queue. + * @base_mon_id: the base MON id of the MONs used by this queue. + * @valid: is the queue valid (we have array of 32 queues, not all of them + * exist). + * @curr_sob_offset: the id offset to the currently used SOB from the + * HL_RSVD_SOBS that are being used by this queue. + * @supports_sync_stream: True if queue supports sync stream + */ +struct hl_hw_queue { + struct hl_hw_sob hw_sob[HL_RSVD_SOBS]; + struct hl_cs_job **shadow_queue; + enum hl_queue_type queue_type; + void *kernel_address; + dma_addr_t bus_address; + u32 pi; + atomic_t ci; + u32 hw_queue_id; + u32 cq_id; + u32 msi_vec; + u16 int_queue_len; + u16 next_sob_val; + u16 base_sob_id; + u16 base_mon_id; + u8 valid; + u8 curr_sob_offset; + u8 supports_sync_stream; +}; + +/** + * struct hl_cq - describes a completion queue + * @hdev: pointer to the device structure + * @kernel_address: holds the queue's kernel virtual address + * @bus_address: holds the queue's DMA address + * @cq_idx: completion queue index in array + * @hw_queue_id: the id of the matching H/W queue + * @ci: ci inside the queue + * @pi: pi inside the queue + * @free_slots_cnt: counter of free slots in queue + */ +struct hl_cq { + struct hl_device *hdev; + void *kernel_address; + dma_addr_t bus_address; + u32 cq_idx; + u32 hw_queue_id; + u32 ci; + u32 pi; + atomic_t free_slots_cnt; +}; + +/** + * struct hl_eq - describes the event queue (single one per device) + * @hdev: pointer to the device structure + * @kernel_address: holds the queue's kernel virtual address + * @bus_address: holds the queue's DMA address + * @ci: ci inside the queue + */ +struct hl_eq { + struct hl_device *hdev; + void *kernel_address; + dma_addr_t bus_address; + u32 ci; +}; + + +/* + * ASICs + */ + +/** + * enum hl_asic_type - supported ASIC types. + * @ASIC_INVALID: Invalid ASIC type. + * @ASIC_GOYA: Goya device. + * @ASIC_GAUDI: Gaudi device. + */ +enum hl_asic_type { + ASIC_INVALID, + ASIC_GOYA, + ASIC_GAUDI +}; + +struct hl_cs_parser; + +/** + * enum hl_pm_mng_profile - power management profile. + * @PM_AUTO: internal clock is set by the Linux driver. + * @PM_MANUAL: internal clock is set by the user. + * @PM_LAST: last power management type. + */ +enum hl_pm_mng_profile { + PM_AUTO = 1, + PM_MANUAL, + PM_LAST +}; + +/** + * enum hl_pll_frequency - PLL frequency. + * @PLL_HIGH: high frequency. + * @PLL_LOW: low frequency. + * @PLL_LAST: last frequency values that were configured by the user. + */ +enum hl_pll_frequency { + PLL_HIGH = 1, + PLL_LOW, + PLL_LAST +}; + +#define PLL_REF_CLK 50 + +enum div_select_defs { + DIV_SEL_REF_CLK = 0, + DIV_SEL_PLL_CLK = 1, + DIV_SEL_DIVIDED_REF = 2, + DIV_SEL_DIVIDED_PLL = 3, +}; + +/** + * struct hl_asic_funcs - ASIC specific functions that are can be called from + * common code. + * @early_init: sets up early driver state (pre sw_init), doesn't configure H/W. + * @early_fini: tears down what was done in early_init. + * @late_init: sets up late driver/hw state (post hw_init) - Optional. + * @late_fini: tears down what was done in late_init (pre hw_fini) - Optional. + * @sw_init: sets up driver state, does not configure H/W. + * @sw_fini: tears down driver state, does not configure H/W. + * @hw_init: sets up the H/W state. + * @hw_fini: tears down the H/W state. + * @halt_engines: halt engines, needed for reset sequence. This also disables + * interrupts from the device. Should be called before + * hw_fini and before CS rollback. + * @suspend: handles IP specific H/W or SW changes for suspend. + * @resume: handles IP specific H/W or SW changes for resume. + * @cb_mmap: maps a CB. + * @ring_doorbell: increment PI on a given QMAN. + * @pqe_write: Write the PQ entry to the PQ. This is ASIC-specific + * function because the PQs are located in different memory areas + * per ASIC (SRAM, DRAM, Host memory) and therefore, the method of + * writing the PQE must match the destination memory area + * properties. + * @asic_dma_alloc_coherent: Allocate coherent DMA memory by calling + * dma_alloc_coherent(). This is ASIC function because + * its implementation is not trivial when the driver + * is loaded in simulation mode (not upstreamed). + * @asic_dma_free_coherent: Free coherent DMA memory by calling + * dma_free_coherent(). This is ASIC function because + * its implementation is not trivial when the driver + * is loaded in simulation mode (not upstreamed). + * @get_int_queue_base: get the internal queue base address. + * @test_queues: run simple test on all queues for sanity check. + * @asic_dma_pool_zalloc: small DMA allocation of coherent memory from DMA pool. + * size of allocation is HL_DMA_POOL_BLK_SIZE. + * @asic_dma_pool_free: free small DMA allocation from pool. + * @cpu_accessible_dma_pool_alloc: allocate CPU PQ packet from DMA pool. + * @cpu_accessible_dma_pool_free: free CPU PQ packet from DMA pool. + * @hl_dma_unmap_sg: DMA unmap scatter-gather list. + * @cs_parser: parse Command Submission. + * @asic_dma_map_sg: DMA map scatter-gather list. + * @get_dma_desc_list_size: get number of LIN_DMA packets required for CB. + * @add_end_of_cb_packets: Add packets to the end of CB, if device requires it. + * @update_eq_ci: update event queue CI. + * @context_switch: called upon ASID context switch. + * @restore_phase_topology: clear all SOBs amd MONs. + * @debugfs_read32: debug interface for reading u32 from DRAM/SRAM. + * @debugfs_write32: debug interface for writing u32 to DRAM/SRAM. + * @add_device_attr: add ASIC specific device attributes. + * @handle_eqe: handle event queue entry (IRQ) from CPU-CP. + * @set_pll_profile: change PLL profile (manual/automatic). + * @get_events_stat: retrieve event queue entries histogram. + * @read_pte: read MMU page table entry from DRAM. + * @write_pte: write MMU page table entry to DRAM. + * @mmu_invalidate_cache: flush MMU STLB host/DRAM cache, either with soft + * (L1 only) or hard (L0 & L1) flush. + * @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with + * ASID-VA-size mask. + * @send_heartbeat: send is-alive packet to CPU-CP and verify response. + * @set_clock_gating: enable/disable clock gating per engine according to + * clock gating mask in hdev + * @disable_clock_gating: disable clock gating completely + * @debug_coresight: perform certain actions on Coresight for debugging. + * @is_device_idle: return true if device is idle, false otherwise. + * @soft_reset_late_init: perform certain actions needed after soft reset. + * @hw_queues_lock: acquire H/W queues lock. + * @hw_queues_unlock: release H/W queues lock. + * @get_pci_id: retrieve PCI ID. + * @get_eeprom_data: retrieve EEPROM data from F/W. + * @send_cpu_message: send message to F/W. If the message is timedout, the + * driver will eventually reset the device. The timeout can + * be determined by the calling function or it can be 0 and + * then the timeout is the default timeout for the specific + * ASIC + * @get_hw_state: retrieve the H/W state + * @pci_bars_map: Map PCI BARs. + * @init_iatu: Initialize the iATU unit inside the PCI controller. + * @rreg: Read a register. Needed for simulator support. + * @wreg: Write a register. Needed for simulator support. + * @halt_coresight: stop the ETF and ETR traces. + * @ctx_init: context dependent initialization. + * @get_clk_rate: Retrieve the ASIC current and maximum clock rate in MHz + * @get_queue_id_for_cq: Get the H/W queue id related to the given CQ index. + * @read_device_fw_version: read the device's firmware versions that are + * contained in registers + * @load_firmware_to_device: load the firmware to the device's memory + * @load_boot_fit_to_device: load boot fit to device's memory + * @get_signal_cb_size: Get signal CB size. + * @get_wait_cb_size: Get wait CB size. + * @gen_signal_cb: Generate a signal CB. + * @gen_wait_cb: Generate a wait CB. + * @reset_sob: Reset a SOB. + * @set_dma_mask_from_fw: set the DMA mask in the driver according to the + * firmware configuration + * @get_device_time: Get the device time. + */ +struct hl_asic_funcs { + int (*early_init)(struct hl_device *hdev); + int (*early_fini)(struct hl_device *hdev); + int (*late_init)(struct hl_device *hdev); + void (*late_fini)(struct hl_device *hdev); + int (*sw_init)(struct hl_device *hdev); + int (*sw_fini)(struct hl_device *hdev); + int (*hw_init)(struct hl_device *hdev); + void (*hw_fini)(struct hl_device *hdev, bool hard_reset); + void (*halt_engines)(struct hl_device *hdev, bool hard_reset); + int (*suspend)(struct hl_device *hdev); + int (*resume)(struct hl_device *hdev); + int (*cb_mmap)(struct hl_device *hdev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size); + void (*ring_doorbell)(struct hl_device *hdev, u32 hw_queue_id, u32 pi); + void (*pqe_write)(struct hl_device *hdev, __le64 *pqe, + struct hl_bd *bd); + void* (*asic_dma_alloc_coherent)(struct hl_device *hdev, size_t size, + dma_addr_t *dma_handle, gfp_t flag); + void (*asic_dma_free_coherent)(struct hl_device *hdev, size_t size, + void *cpu_addr, dma_addr_t dma_handle); + void* (*get_int_queue_base)(struct hl_device *hdev, u32 queue_id, + dma_addr_t *dma_handle, u16 *queue_len); + int (*test_queues)(struct hl_device *hdev); + void* (*asic_dma_pool_zalloc)(struct hl_device *hdev, size_t size, + gfp_t mem_flags, dma_addr_t *dma_handle); + void (*asic_dma_pool_free)(struct hl_device *hdev, void *vaddr, + dma_addr_t dma_addr); + void* (*cpu_accessible_dma_pool_alloc)(struct hl_device *hdev, + size_t size, dma_addr_t *dma_handle); + void (*cpu_accessible_dma_pool_free)(struct hl_device *hdev, + size_t size, void *vaddr); + void (*hl_dma_unmap_sg)(struct hl_device *hdev, + struct scatterlist *sgl, int nents, + enum dma_data_direction dir); + int (*cs_parser)(struct hl_device *hdev, struct hl_cs_parser *parser); + int (*asic_dma_map_sg)(struct hl_device *hdev, + struct scatterlist *sgl, int nents, + enum dma_data_direction dir); + u32 (*get_dma_desc_list_size)(struct hl_device *hdev, + struct sg_table *sgt); + void (*add_end_of_cb_packets)(struct hl_device *hdev, + void *kernel_address, u32 len, + u64 cq_addr, u32 cq_val, u32 msix_num, + bool eb); + void (*update_eq_ci)(struct hl_device *hdev, u32 val); + int (*context_switch)(struct hl_device *hdev, u32 asid); + void (*restore_phase_topology)(struct hl_device *hdev); + int (*debugfs_read32)(struct hl_device *hdev, u64 addr, u32 *val); + int (*debugfs_write32)(struct hl_device *hdev, u64 addr, u32 val); + int (*debugfs_read64)(struct hl_device *hdev, u64 addr, u64 *val); + int (*debugfs_write64)(struct hl_device *hdev, u64 addr, u64 val); + void (*add_device_attr)(struct hl_device *hdev, + struct attribute_group *dev_attr_grp); + void (*handle_eqe)(struct hl_device *hdev, + struct hl_eq_entry *eq_entry); + void (*set_pll_profile)(struct hl_device *hdev, + enum hl_pll_frequency freq); + void* (*get_events_stat)(struct hl_device *hdev, bool aggregate, + u32 *size); + u64 (*read_pte)(struct hl_device *hdev, u64 addr); + void (*write_pte)(struct hl_device *hdev, u64 addr, u64 val); + int (*mmu_invalidate_cache)(struct hl_device *hdev, bool is_hard, + u32 flags); + int (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard, + u32 asid, u64 va, u64 size); + int (*send_heartbeat)(struct hl_device *hdev); + void (*set_clock_gating)(struct hl_device *hdev); + void (*disable_clock_gating)(struct hl_device *hdev); + int (*debug_coresight)(struct hl_device *hdev, void *data); + bool (*is_device_idle)(struct hl_device *hdev, u64 *mask, + struct seq_file *s); + int (*soft_reset_late_init)(struct hl_device *hdev); + void (*hw_queues_lock)(struct hl_device *hdev); + void (*hw_queues_unlock)(struct hl_device *hdev); + u32 (*get_pci_id)(struct hl_device *hdev); + int (*get_eeprom_data)(struct hl_device *hdev, void *data, + size_t max_size); + int (*send_cpu_message)(struct hl_device *hdev, u32 *msg, + u16 len, u32 timeout, long *result); + enum hl_device_hw_state (*get_hw_state)(struct hl_device *hdev); + int (*pci_bars_map)(struct hl_device *hdev); + int (*init_iatu)(struct hl_device *hdev); + u32 (*rreg)(struct hl_device *hdev, u32 reg); + void (*wreg)(struct hl_device *hdev, u32 reg, u32 val); + void (*halt_coresight)(struct hl_device *hdev); + int (*ctx_init)(struct hl_ctx *ctx); + int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk); + u32 (*get_queue_id_for_cq)(struct hl_device *hdev, u32 cq_idx); + void (*read_device_fw_version)(struct hl_device *hdev, + enum hl_fw_component fwc); + int (*load_firmware_to_device)(struct hl_device *hdev); + int (*load_boot_fit_to_device)(struct hl_device *hdev); + u32 (*get_signal_cb_size)(struct hl_device *hdev); + u32 (*get_wait_cb_size)(struct hl_device *hdev); + void (*gen_signal_cb)(struct hl_device *hdev, void *data, u16 sob_id); + void (*gen_wait_cb)(struct hl_device *hdev, void *data, u16 sob_id, + u16 sob_val, u16 mon_id, u32 q_idx); + void (*reset_sob)(struct hl_device *hdev, void *data); + void (*set_dma_mask_from_fw)(struct hl_device *hdev); + u64 (*get_device_time)(struct hl_device *hdev); +}; + + +/* + * CONTEXTS + */ + +#define HL_KERNEL_ASID_ID 0 + +/** + * struct hl_va_range - virtual addresses range. + * @lock: protects the virtual addresses list. + * @list: list of virtual addresses blocks available for mappings. + * @start_addr: range start address. + * @end_addr: range end address. + */ +struct hl_va_range { + struct mutex lock; + struct list_head list; + u64 start_addr; + u64 end_addr; +}; + +/** + * struct hl_ctx - user/kernel context. + * @mem_hash: holds mapping from virtual address to virtual memory area + * descriptor (hl_vm_phys_pg_list or hl_userptr). + * @mmu_shadow_hash: holds a mapping from shadow address to pgt_info structure. + * @hpriv: pointer to the private (Kernel Driver) data of the process (fd). + * @hdev: pointer to the device structure. + * @refcount: reference counter for the context. Context is released only when + * this hits 0l. It is incremented on CS and CS_WAIT. + * @cs_pending: array of hl fence objects representing pending CS. + * @host_va_range: holds available virtual addresses for host mappings. + * @host_huge_va_range: holds available virtual addresses for host mappings + * with huge pages. + * @dram_va_range: holds available virtual addresses for DRAM mappings. + * @mem_hash_lock: protects the mem_hash. + * @mmu_lock: protects the MMU page tables. Any change to the PGT, modifying the + * MMU hash or walking the PGT requires talking this lock. + * @debugfs_list: node in debugfs list of contexts. + * @cb_va_pool: device VA pool for command buffers which are mapped to the + * device's MMU. + * @cs_sequence: sequence number for CS. Value is assigned to a CS and passed + * to user so user could inquire about CS. It is used as + * index to cs_pending array. + * @dram_default_hops: array that holds all hops addresses needed for default + * DRAM mapping. + * @cs_lock: spinlock to protect cs_sequence. + * @dram_phys_mem: amount of used physical DRAM memory by this context. + * @thread_ctx_switch_token: token to prevent multiple threads of the same + * context from running the context switch phase. + * Only a single thread should run it. + * @thread_ctx_switch_wait_token: token to prevent the threads that didn't run + * the context switch phase from moving to their + * execution phase before the context switch phase + * has finished. + * @asid: context's unique address space ID in the device's MMU. + * @handle: context's opaque handle for user + */ +struct hl_ctx { + DECLARE_HASHTABLE(mem_hash, MEM_HASH_TABLE_BITS); + DECLARE_HASHTABLE(mmu_shadow_hash, MMU_HASH_TABLE_BITS); + struct hl_fpriv *hpriv; + struct hl_device *hdev; + struct kref refcount; + struct hl_fence **cs_pending; + struct hl_va_range *host_va_range; + struct hl_va_range *host_huge_va_range; + struct hl_va_range *dram_va_range; + struct mutex mem_hash_lock; + struct mutex mmu_lock; + struct list_head debugfs_list; + struct hl_cs_counters cs_counters; + struct gen_pool *cb_va_pool; + u64 cs_sequence; + u64 *dram_default_hops; + spinlock_t cs_lock; + atomic64_t dram_phys_mem; + atomic_t thread_ctx_switch_token; + u32 thread_ctx_switch_wait_token; + u32 asid; + u32 handle; +}; + +/** + * struct hl_ctx_mgr - for handling multiple contexts. + * @ctx_lock: protects ctx_handles. + * @ctx_handles: idr to hold all ctx handles. + */ +struct hl_ctx_mgr { + struct mutex ctx_lock; + struct idr ctx_handles; +}; + + + +/* + * COMMAND SUBMISSIONS + */ + +/** + * struct hl_userptr - memory mapping chunk information + * @vm_type: type of the VM. + * @job_node: linked-list node for hanging the object on the Job's list. + * @vec: pointer to the frame vector. + * @sgt: pointer to the scatter-gather table that holds the pages. + * @dir: for DMA unmapping, the direction must be supplied, so save it. + * @debugfs_list: node in debugfs list of command submissions. + * @addr: user-space virtual address of the start of the memory area. + * @size: size of the memory area to pin & map. + * @dma_mapped: true if the SG was mapped to DMA addresses, false otherwise. + */ +struct hl_userptr { + enum vm_type_t vm_type; /* must be first */ + struct list_head job_node; + struct frame_vector *vec; + struct sg_table *sgt; + enum dma_data_direction dir; + struct list_head debugfs_list; + u64 addr; + u32 size; + u8 dma_mapped; +}; + +/** + * struct hl_cs - command submission. + * @jobs_in_queue_cnt: per each queue, maintain counter of submitted jobs. + * @ctx: the context this CS belongs to. + * @job_list: list of the CS's jobs in the various queues. + * @job_lock: spinlock for the CS's jobs list. Needed for free_job. + * @refcount: reference counter for usage of the CS. + * @fence: pointer to the fence object of this CS. + * @signal_fence: pointer to the fence object of the signal CS (used by wait + * CS only). + * @finish_work: workqueue object to run when CS is completed by H/W. + * @work_tdr: delayed work node for TDR. + * @mirror_node : node in device mirror list of command submissions. + * @debugfs_list: node in debugfs list of command submissions. + * @sequence: the sequence number of this CS. + * @type: CS_TYPE_*. + * @submitted: true if CS was submitted to H/W. + * @completed: true if CS was completed by device. + * @timedout : true if CS was timedout. + * @tdr_active: true if TDR was activated for this CS (to prevent + * double TDR activation). + * @aborted: true if CS was aborted due to some device error. + */ +struct hl_cs { + u16 *jobs_in_queue_cnt; + struct hl_ctx *ctx; + struct list_head job_list; + spinlock_t job_lock; + struct kref refcount; + struct hl_fence *fence; + struct hl_fence *signal_fence; + struct work_struct finish_work; + struct delayed_work work_tdr; + struct list_head mirror_node; + struct list_head debugfs_list; + u64 sequence; + enum hl_cs_type type; + u8 submitted; + u8 completed; + u8 timedout; + u8 tdr_active; + u8 aborted; +}; + +/** + * struct hl_cs_job - command submission job. + * @cs_node: the node to hang on the CS jobs list. + * @cs: the CS this job belongs to. + * @user_cb: the CB we got from the user. + * @patched_cb: in case of patching, this is internal CB which is submitted on + * the queue instead of the CB we got from the IOCTL. + * @finish_work: workqueue object to run when job is completed. + * @userptr_list: linked-list of userptr mappings that belong to this job and + * wait for completion. + * @debugfs_list: node in debugfs list of command submission jobs. + * @queue_type: the type of the H/W queue this job is submitted to. + * @id: the id of this job inside a CS. + * @hw_queue_id: the id of the H/W queue this job is submitted to. + * @user_cb_size: the actual size of the CB we got from the user. + * @job_cb_size: the actual size of the CB that we put on the queue. + * @is_kernel_allocated_cb: true if the CB handle we got from the user holds a + * handle to a kernel-allocated CB object, false + * otherwise (SRAM/DRAM/host address). + * @contains_dma_pkt: whether the JOB contains at least one DMA packet. This + * info is needed later, when adding the 2xMSG_PROT at the + * end of the JOB, to know which barriers to put in the + * MSG_PROT packets. Relevant only for GAUDI as GOYA doesn't + * have streams so the engine can't be busy by another + * stream. + */ +struct hl_cs_job { + struct list_head cs_node; + struct hl_cs *cs; + struct hl_cb *user_cb; + struct hl_cb *patched_cb; + struct work_struct finish_work; + struct list_head userptr_list; + struct list_head debugfs_list; + enum hl_queue_type queue_type; + u32 id; + u32 hw_queue_id; + u32 user_cb_size; + u32 job_cb_size; + u8 is_kernel_allocated_cb; + u8 contains_dma_pkt; +}; + +/** + * struct hl_cs_parser - command submission parser properties. + * @user_cb: the CB we got from the user. + * @patched_cb: in case of patching, this is internal CB which is submitted on + * the queue instead of the CB we got from the IOCTL. + * @job_userptr_list: linked-list of userptr mappings that belong to the related + * job and wait for completion. + * @cs_sequence: the sequence number of the related CS. + * @queue_type: the type of the H/W queue this job is submitted to. + * @ctx_id: the ID of the context the related CS belongs to. + * @hw_queue_id: the id of the H/W queue this job is submitted to. + * @user_cb_size: the actual size of the CB we got from the user. + * @patched_cb_size: the size of the CB after parsing. + * @job_id: the id of the related job inside the related CS. + * @is_kernel_allocated_cb: true if the CB handle we got from the user holds a + * handle to a kernel-allocated CB object, false + * otherwise (SRAM/DRAM/host address). + * @contains_dma_pkt: whether the JOB contains at least one DMA packet. This + * info is needed later, when adding the 2xMSG_PROT at the + * end of the JOB, to know which barriers to put in the + * MSG_PROT packets. Relevant only for GAUDI as GOYA doesn't + * have streams so the engine can't be busy by another + * stream. + */ +struct hl_cs_parser { + struct hl_cb *user_cb; + struct hl_cb *patched_cb; + struct list_head *job_userptr_list; + u64 cs_sequence; + enum hl_queue_type queue_type; + u32 ctx_id; + u32 hw_queue_id; + u32 user_cb_size; + u32 patched_cb_size; + u8 job_id; + u8 is_kernel_allocated_cb; + u8 contains_dma_pkt; +}; + + +/* + * MEMORY STRUCTURE + */ + +/** + * struct hl_vm_hash_node - hash element from virtual address to virtual + * memory area descriptor (hl_vm_phys_pg_list or + * hl_userptr). + * @node: node to hang on the hash table in context object. + * @vaddr: key virtual address. + * @ptr: value pointer (hl_vm_phys_pg_list or hl_userptr). + */ +struct hl_vm_hash_node { + struct hlist_node node; + u64 vaddr; + void *ptr; +}; + +/** + * struct hl_vm_phys_pg_pack - physical page pack. + * @vm_type: describes the type of the virtual area descriptor. + * @pages: the physical page array. + * @npages: num physical pages in the pack. + * @total_size: total size of all the pages in this list. + * @mapping_cnt: number of shared mappings. + * @asid: the context related to this list. + * @page_size: size of each page in the pack. + * @flags: HL_MEM_* flags related to this list. + * @handle: the provided handle related to this list. + * @offset: offset from the first page. + * @contiguous: is contiguous physical memory. + * @created_from_userptr: is product of host virtual address. + */ +struct hl_vm_phys_pg_pack { + enum vm_type_t vm_type; /* must be first */ + u64 *pages; + u64 npages; + u64 total_size; + atomic_t mapping_cnt; + u32 asid; + u32 page_size; + u32 flags; + u32 handle; + u32 offset; + u8 contiguous; + u8 created_from_userptr; +}; + +/** + * struct hl_vm_va_block - virtual range block information. + * @node: node to hang on the virtual range list in context object. + * @start: virtual range start address. + * @end: virtual range end address. + * @size: virtual range size. + */ +struct hl_vm_va_block { + struct list_head node; + u64 start; + u64 end; + u64 size; +}; + +/** + * struct hl_vm - virtual memory manager for MMU. + * @dram_pg_pool: pool for DRAM physical pages of 2MB. + * @dram_pg_pool_refcount: reference counter for the pool usage. + * @idr_lock: protects the phys_pg_list_handles. + * @phys_pg_pack_handles: idr to hold all device allocations handles. + * @init_done: whether initialization was done. We need this because VM + * initialization might be skipped during device initialization. + */ +struct hl_vm { + struct gen_pool *dram_pg_pool; + struct kref dram_pg_pool_refcount; + spinlock_t idr_lock; + struct idr phys_pg_pack_handles; + u8 init_done; +}; + + +/* + * DEBUG, PROFILING STRUCTURE + */ + +/** + * struct hl_debug_params - Coresight debug parameters. + * @input: pointer to component specific input parameters. + * @output: pointer to component specific output parameters. + * @output_size: size of output buffer. + * @reg_idx: relevant register ID. + * @op: component operation to execute. + * @enable: true if to enable component debugging, false otherwise. + */ +struct hl_debug_params { + void *input; + void *output; + u32 output_size; + u32 reg_idx; + u32 op; + bool enable; +}; + +/* + * FILE PRIVATE STRUCTURE + */ + +/** + * struct hl_fpriv - process information stored in FD private data. + * @hdev: habanalabs device structure. + * @filp: pointer to the given file structure. + * @taskpid: current process ID. + * @ctx: current executing context. TODO: remove for multiple ctx per process + * @ctx_mgr: context manager to handle multiple context for this FD. + * @cb_mgr: command buffer manager to handle multiple buffers for this FD. + * @debugfs_list: list of relevant ASIC debugfs. + * @dev_node: node in the device list of file private data + * @refcount: number of related contexts. + * @restore_phase_mutex: lock for context switch and restore phase. + * @is_control: true for control device, false otherwise + */ +struct hl_fpriv { + struct hl_device *hdev; + struct file *filp; + struct pid *taskpid; + struct hl_ctx *ctx; + struct hl_ctx_mgr ctx_mgr; + struct hl_cb_mgr cb_mgr; + struct list_head debugfs_list; + struct list_head dev_node; + struct kref refcount; + struct mutex restore_phase_mutex; + u8 is_control; +}; + + +/* + * DebugFS + */ + +/** + * struct hl_info_list - debugfs file ops. + * @name: file name. + * @show: function to output information. + * @write: function to write to the file. + */ +struct hl_info_list { + const char *name; + int (*show)(struct seq_file *s, void *data); + ssize_t (*write)(struct file *file, const char __user *buf, + size_t count, loff_t *f_pos); +}; + +/** + * struct hl_debugfs_entry - debugfs dentry wrapper. + * @dent: base debugfs entry structure. + * @info_ent: dentry realted ops. + * @dev_entry: ASIC specific debugfs manager. + */ +struct hl_debugfs_entry { + struct dentry *dent; + const struct hl_info_list *info_ent; + struct hl_dbg_device_entry *dev_entry; +}; + +/** + * struct hl_dbg_device_entry - ASIC specific debugfs manager. + * @root: root dentry. + * @hdev: habanalabs device structure. + * @entry_arr: array of available hl_debugfs_entry. + * @file_list: list of available debugfs files. + * @file_mutex: protects file_list. + * @cb_list: list of available CBs. + * @cb_spinlock: protects cb_list. + * @cs_list: list of available CSs. + * @cs_spinlock: protects cs_list. + * @cs_job_list: list of available CB jobs. + * @cs_job_spinlock: protects cs_job_list. + * @userptr_list: list of available userptrs (virtual memory chunk descriptor). + * @userptr_spinlock: protects userptr_list. + * @ctx_mem_hash_list: list of available contexts with MMU mappings. + * @ctx_mem_hash_spinlock: protects cb_list. + * @addr: next address to read/write from/to in read/write32. + * @mmu_addr: next virtual address to translate to physical address in mmu_show. + * @mmu_asid: ASID to use while translating in mmu_show. + * @i2c_bus: generic u8 debugfs file for bus value to use in i2c_data_read. + * @i2c_bus: generic u8 debugfs file for address value to use in i2c_data_read. + * @i2c_bus: generic u8 debugfs file for register value to use in i2c_data_read. + */ +struct hl_dbg_device_entry { + struct dentry *root; + struct hl_device *hdev; + struct hl_debugfs_entry *entry_arr; + struct list_head file_list; + struct mutex file_mutex; + struct list_head cb_list; + spinlock_t cb_spinlock; + struct list_head cs_list; + spinlock_t cs_spinlock; + struct list_head cs_job_list; + spinlock_t cs_job_spinlock; + struct list_head userptr_list; + spinlock_t userptr_spinlock; + struct list_head ctx_mem_hash_list; + spinlock_t ctx_mem_hash_spinlock; + u64 addr; + u64 mmu_addr; + u32 mmu_asid; + u8 i2c_bus; + u8 i2c_addr; + u8 i2c_reg; +}; + + +/* + * DEVICES + */ + +/* Theoretical limit only. A single host can only contain up to 4 or 8 PCIe + * x16 cards. In extreme cases, there are hosts that can accommodate 16 cards. + */ +#define HL_MAX_MINORS 256 + +/* + * Registers read & write functions. + */ + +u32 hl_rreg(struct hl_device *hdev, u32 reg); +void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); + +#define RREG32(reg) hdev->asic_funcs->rreg(hdev, (reg)) +#define WREG32(reg, v) hdev->asic_funcs->wreg(hdev, (reg), (v)) +#define DREG32(reg) pr_info("REGISTER: " #reg " : 0x%08X\n", \ + hdev->asic_funcs->rreg(hdev, (reg))) + +#define WREG32_P(reg, val, mask) \ + do { \ + u32 tmp_ = RREG32(reg); \ + tmp_ &= (mask); \ + tmp_ |= ((val) & ~(mask)); \ + WREG32(reg, tmp_); \ + } while (0) +#define WREG32_AND(reg, and) WREG32_P(reg, 0, and) +#define WREG32_OR(reg, or) WREG32_P(reg, or, ~(or)) + +#define RMWREG32(reg, val, mask) \ + do { \ + u32 tmp_ = RREG32(reg); \ + tmp_ &= ~(mask); \ + tmp_ |= ((val) << __ffs(mask)); \ + WREG32(reg, tmp_); \ + } while (0) + +#define RREG32_MASK(reg, mask) ((RREG32(reg) & mask) >> __ffs(mask)) + +#define REG_FIELD_SHIFT(reg, field) reg##_##field##_SHIFT +#define REG_FIELD_MASK(reg, field) reg##_##field##_MASK +#define WREG32_FIELD(reg, offset, field, val) \ + WREG32(mm##reg + offset, (RREG32(mm##reg + offset) & \ + ~REG_FIELD_MASK(reg, field)) | \ + (val) << REG_FIELD_SHIFT(reg, field)) + +/* Timeout should be longer when working with simulator but cap the + * increased timeout to some maximum + */ +#define hl_poll_timeout(hdev, addr, val, cond, sleep_us, timeout_us) \ +({ \ + ktime_t __timeout; \ + if (hdev->pdev) \ + __timeout = ktime_add_us(ktime_get(), timeout_us); \ + else \ + __timeout = ktime_add_us(ktime_get(),\ + min((u64)(timeout_us * 10), \ + (u64) HL_SIM_MAX_TIMEOUT_US)); \ + might_sleep_if(sleep_us); \ + for (;;) { \ + (val) = RREG32(addr); \ + if (cond) \ + break; \ + if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \ + (val) = RREG32(addr); \ + break; \ + } \ + if (sleep_us) \ + usleep_range((sleep_us >> 2) + 1, sleep_us); \ + } \ + (cond) ? 0 : -ETIMEDOUT; \ +}) + +/* + * address in this macro points always to a memory location in the + * host's (server's) memory. That location is updated asynchronously + * either by the direct access of the device or by another core. + * + * To work both in LE and BE architectures, we need to distinguish between the + * two states (device or another core updates the memory location). Therefore, + * if mem_written_by_device is true, the host memory being polled will be + * updated directly by the device. If false, the host memory being polled will + * be updated by host CPU. Required so host knows whether or not the memory + * might need to be byte-swapped before returning value to caller. + */ +#define hl_poll_timeout_memory(hdev, addr, val, cond, sleep_us, timeout_us, \ + mem_written_by_device) \ +({ \ + ktime_t __timeout; \ + if (hdev->pdev) \ + __timeout = ktime_add_us(ktime_get(), timeout_us); \ + else \ + __timeout = ktime_add_us(ktime_get(),\ + min((u64)(timeout_us * 10), \ + (u64) HL_SIM_MAX_TIMEOUT_US)); \ + might_sleep_if(sleep_us); \ + for (;;) { \ + /* Verify we read updates done by other cores or by device */ \ + mb(); \ + (val) = *((u32 *)(addr)); \ + if (mem_written_by_device) \ + (val) = le32_to_cpu(*(__le32 *) &(val)); \ + if (cond) \ + break; \ + if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \ + (val) = *((u32 *)(addr)); \ + if (mem_written_by_device) \ + (val) = le32_to_cpu(*(__le32 *) &(val)); \ + break; \ + } \ + if (sleep_us) \ + usleep_range((sleep_us >> 2) + 1, sleep_us); \ + } \ + (cond) ? 0 : -ETIMEDOUT; \ +}) + +#define hl_poll_timeout_device_memory(hdev, addr, val, cond, sleep_us, \ + timeout_us) \ +({ \ + ktime_t __timeout; \ + if (hdev->pdev) \ + __timeout = ktime_add_us(ktime_get(), timeout_us); \ + else \ + __timeout = ktime_add_us(ktime_get(),\ + min((u64)(timeout_us * 10), \ + (u64) HL_SIM_MAX_TIMEOUT_US)); \ + might_sleep_if(sleep_us); \ + for (;;) { \ + (val) = readl(addr); \ + if (cond) \ + break; \ + if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \ + (val) = readl(addr); \ + break; \ + } \ + if (sleep_us) \ + usleep_range((sleep_us >> 2) + 1, sleep_us); \ + } \ + (cond) ? 0 : -ETIMEDOUT; \ +}) + +struct hwmon_chip_info; + +/** + * struct hl_device_reset_work - reset workqueue task wrapper. + * @reset_work: reset work to be done. + * @hdev: habanalabs device structure. + */ +struct hl_device_reset_work { + struct work_struct reset_work; + struct hl_device *hdev; +}; + +/** + * struct hl_device_idle_busy_ts - used for calculating device utilization rate. + * @idle_to_busy_ts: timestamp where device changed from idle to busy. + * @busy_to_idle_ts: timestamp where device changed from busy to idle. + */ +struct hl_device_idle_busy_ts { + ktime_t idle_to_busy_ts; + ktime_t busy_to_idle_ts; +}; + + +/** + * struct hl_mmu_priv - used for holding per-device mmu internal information. + * @mmu_pgt_pool: pool of page tables used by MMU for allocating hops. + * @mmu_shadow_hop0: shadow array of hop0 tables. + */ +struct hl_mmu_priv { + struct gen_pool *mmu_pgt_pool; + void *mmu_shadow_hop0; +}; + +/** + * struct hl_mmu_funcs - Device related MMU functions. + * @init: initialize the MMU module. + * @fini: release the MMU module. + * @ctx_init: Initialize a context for using the MMU module. + * @ctx_fini: disable a ctx from using the mmu module. + * @map: maps a virtual address to physical address for a context. + * @unmap: unmap a virtual address of a context. + * @flush: flush all writes from all cores to reach device MMU. + * @swap_out: marks all mapping of the given context as swapped out. + * @swap_in: marks all mapping of the given context as swapped in. + */ +struct hl_mmu_funcs { + int (*init)(struct hl_device *hdev); + void (*fini)(struct hl_device *hdev); + int (*ctx_init)(struct hl_ctx *ctx); + void (*ctx_fini)(struct hl_ctx *ctx); + int (*map)(struct hl_ctx *ctx, + u64 virt_addr, u64 phys_addr, u32 page_size, + bool is_dram_addr); + int (*unmap)(struct hl_ctx *ctx, + u64 virt_addr, bool is_dram_addr); + void (*flush)(struct hl_ctx *ctx); + void (*swap_out)(struct hl_ctx *ctx); + void (*swap_in)(struct hl_ctx *ctx); +}; + +/** + * struct hl_device - habanalabs device structure. + * @pdev: pointer to PCI device, can be NULL in case of simulator device. + * @pcie_bar_phys: array of available PCIe bars physical addresses. + * (required only for PCI address match mode) + * @pcie_bar: array of available PCIe bars virtual addresses. + * @rmmio: configuration area address on SRAM. + * @cdev: related char device. + * @cdev_ctrl: char device for control operations only (INFO IOCTL) + * @dev: related kernel basic device structure. + * @dev_ctrl: related kernel device structure for the control device + * @work_freq: delayed work to lower device frequency if possible. + * @work_heartbeat: delayed work for CPU-CP is-alive check. + * @asic_name: ASIC specific name. + * @asic_type: ASIC specific type. + * @completion_queue: array of hl_cq. + * @cq_wq: work queues of completion queues for executing work in process + * context. + * @eq_wq: work queue of event queue for executing work in process context. + * @kernel_ctx: Kernel driver context structure. + * @kernel_queues: array of hl_hw_queue. + * @hw_queues_mirror_list: CS mirror list for TDR. + * @hw_queues_mirror_lock: protects hw_queues_mirror_list. + * @kernel_cb_mgr: command buffer manager for creating/destroying/handling CGs. + * @event_queue: event queue for IRQ from CPU-CP. + * @dma_pool: DMA pool for small allocations. + * @cpu_accessible_dma_mem: Host <-> CPU-CP shared memory CPU address. + * @cpu_accessible_dma_address: Host <-> CPU-CP shared memory DMA address. + * @cpu_accessible_dma_pool: Host <-> CPU-CP shared memory pool. + * @asid_bitmap: holds used/available ASIDs. + * @asid_mutex: protects asid_bitmap. + * @send_cpu_message_lock: enforces only one message in Host <-> CPU-CP queue. + * @debug_lock: protects critical section of setting debug mode for device + * @asic_prop: ASIC specific immutable properties. + * @asic_funcs: ASIC specific functions. + * @asic_specific: ASIC specific information to use only from ASIC files. + * @vm: virtual memory manager for MMU. + * @mmu_cache_lock: protects MMU cache invalidation as it can serve one context. + * @hwmon_dev: H/W monitor device. + * @pm_mng_profile: current power management profile. + * @hl_chip_info: ASIC's sensors information. + * @hl_debugfs: device's debugfs manager. + * @cb_pool: list of preallocated CBs. + * @cb_pool_lock: protects the CB pool. + * @internal_cb_pool_virt_addr: internal command buffer pool virtual address. + * @internal_cb_pool_dma_addr: internal command buffer pool dma address. + * @internal_cb_pool: internal command buffer memory pool. + * @internal_cb_va_base: internal cb pool mmu virtual address base + * @fpriv_list: list of file private data structures. Each structure is created + * when a user opens the device + * @fpriv_list_lock: protects the fpriv_list + * @compute_ctx: current compute context executing. + * @idle_busy_ts_arr: array to hold time stamps of transitions from idle to busy + * and vice-versa + * @aggregated_cs_counters: aggregated cs counters among all contexts + * @mmu_priv: device-specific MMU data. + * @mmu_func: device-related MMU functions. + * @dram_used_mem: current DRAM memory consumption. + * @timeout_jiffies: device CS timeout value. + * @max_power: the max power of the device, as configured by the sysadmin. This + * value is saved so in case of hard-reset, the driver will restore + * this value and update the F/W after the re-initialization + * @clock_gating_mask: is clock gating enabled. bitmask that represents the + * different engines. See debugfs-driver-habanalabs for + * details. + * @in_reset: is device in reset flow. + * @curr_pll_profile: current PLL profile. + * @card_type: Various ASICs have several card types. This indicates the card + * type of the current device. + * @cs_active_cnt: number of active command submissions on this device (active + * means already in H/W queues) + * @major: habanalabs kernel driver major. + * @high_pll: high PLL profile frequency. + * @soft_reset_cnt: number of soft reset since the driver was loaded. + * @hard_reset_cnt: number of hard reset since the driver was loaded. + * @idle_busy_ts_idx: index of current entry in idle_busy_ts_arr + * @clk_throttling_reason: bitmask represents the current clk throttling reasons + * @id: device minor. + * @id_control: minor of the control device + * @cpu_pci_msb_addr: 50-bit extension bits for the device CPU's 40-bit + * addresses. + * @disabled: is device disabled. + * @late_init_done: is late init stage was done during initialization. + * @hwmon_initialized: is H/W monitor sensors was initialized. + * @hard_reset_pending: is there a hard reset work pending. + * @heartbeat: is heartbeat sanity check towards CPU-CP enabled. + * @reset_on_lockup: true if a reset should be done in case of stuck CS, false + * otherwise. + * @dram_supports_virtual_memory: is MMU enabled towards DRAM. + * @dram_default_page_mapping: is DRAM default page mapping enabled. + * @pmmu_huge_range: is a different virtual addresses range used for PMMU with + * huge pages. + * @init_done: is the initialization of the device done. + * @mmu_enable: is MMU enabled. + * @mmu_huge_page_opt: is MMU huge pages optimization enabled. + * @device_cpu_disabled: is the device CPU disabled (due to timeouts) + * @dma_mask: the dma mask that was set for this device + * @in_debug: is device under debug. This, together with fpriv_list, enforces + * that only a single user is configuring the debug infrastructure. + * @power9_64bit_dma_enable: true to enable 64-bit DMA mask support. Relevant + * only to POWER9 machines. + * @cdev_sysfs_created: were char devices and sysfs nodes created. + * @stop_on_err: true if engines should stop on error. + * @supports_sync_stream: is sync stream supported. + * @sync_stream_queue_idx: helper index for sync stream queues initialization. + * @supports_coresight: is CoreSight supported. + * @supports_soft_reset: is soft reset supported. + * @supports_cb_mapping: is mapping a CB to the device's MMU supported. + */ +struct hl_device { + struct pci_dev *pdev; + u64 pcie_bar_phys[HL_PCI_NUM_BARS]; + void __iomem *pcie_bar[HL_PCI_NUM_BARS]; + void __iomem *rmmio; + struct cdev cdev; + struct cdev cdev_ctrl; + struct device *dev; + struct device *dev_ctrl; + struct delayed_work work_freq; + struct delayed_work work_heartbeat; + char asic_name[32]; + enum hl_asic_type asic_type; + struct hl_cq *completion_queue; + struct workqueue_struct **cq_wq; + struct workqueue_struct *eq_wq; + struct hl_ctx *kernel_ctx; + struct hl_hw_queue *kernel_queues; + struct list_head hw_queues_mirror_list; + spinlock_t hw_queues_mirror_lock; + struct hl_cb_mgr kernel_cb_mgr; + struct hl_eq event_queue; + struct dma_pool *dma_pool; + void *cpu_accessible_dma_mem; + dma_addr_t cpu_accessible_dma_address; + struct gen_pool *cpu_accessible_dma_pool; + unsigned long *asid_bitmap; + struct mutex asid_mutex; + struct mutex send_cpu_message_lock; + struct mutex debug_lock; + struct asic_fixed_properties asic_prop; + const struct hl_asic_funcs *asic_funcs; + void *asic_specific; + struct hl_vm vm; + struct mutex mmu_cache_lock; + struct device *hwmon_dev; + enum hl_pm_mng_profile pm_mng_profile; + struct hwmon_chip_info *hl_chip_info; + + struct hl_dbg_device_entry hl_debugfs; + + struct list_head cb_pool; + spinlock_t cb_pool_lock; + + void *internal_cb_pool_virt_addr; + dma_addr_t internal_cb_pool_dma_addr; + struct gen_pool *internal_cb_pool; + u64 internal_cb_va_base; + + struct list_head fpriv_list; + struct mutex fpriv_list_lock; + + struct hl_ctx *compute_ctx; + + struct hl_device_idle_busy_ts *idle_busy_ts_arr; + + struct hl_cs_counters aggregated_cs_counters; + + struct hl_mmu_priv mmu_priv; + struct hl_mmu_funcs mmu_func; + + atomic64_t dram_used_mem; + u64 timeout_jiffies; + u64 max_power; + u64 clock_gating_mask; + atomic_t in_reset; + enum hl_pll_frequency curr_pll_profile; + enum cpucp_card_types card_type; + int cs_active_cnt; + u32 major; + u32 high_pll; + u32 soft_reset_cnt; + u32 hard_reset_cnt; + u32 idle_busy_ts_idx; + u32 clk_throttling_reason; + u16 id; + u16 id_control; + u16 cpu_pci_msb_addr; + u8 disabled; + u8 late_init_done; + u8 hwmon_initialized; + u8 hard_reset_pending; + u8 heartbeat; + u8 reset_on_lockup; + u8 dram_supports_virtual_memory; + u8 dram_default_page_mapping; + u8 pmmu_huge_range; + u8 init_done; + u8 device_cpu_disabled; + u8 dma_mask; + u8 in_debug; + u8 power9_64bit_dma_enable; + u8 cdev_sysfs_created; + u8 stop_on_err; + u8 supports_sync_stream; + u8 sync_stream_queue_idx; + u8 supports_coresight; + u8 supports_soft_reset; + u8 supports_cb_mapping; + + /* Parameters for bring-up */ + u8 mmu_enable; + u8 mmu_huge_page_opt; + u8 cpu_enable; + u8 reset_pcilink; + u8 cpu_queues_enable; + u8 fw_loading; + u8 pldm; + u8 axi_drain; + u8 sram_scrambler_enable; + u8 dram_scrambler_enable; + u8 hard_reset_on_fw_events; + u8 bmc_enable; + u8 rl_enable; +}; + + +/* + * IOCTLs + */ + +/** + * typedef hl_ioctl_t - typedef for ioctl function in the driver + * @hpriv: pointer to the FD's private data, which contains state of + * user process + * @data: pointer to the input/output arguments structure of the IOCTL + * + * Return: 0 for success, negative value for error + */ +typedef int hl_ioctl_t(struct hl_fpriv *hpriv, void *data); + +/** + * struct hl_ioctl_desc - describes an IOCTL entry of the driver. + * @cmd: the IOCTL code as created by the kernel macros. + * @func: pointer to the driver's function that should be called for this IOCTL. + */ +struct hl_ioctl_desc { + unsigned int cmd; + hl_ioctl_t *func; +}; + + +/* + * Kernel module functions that can be accessed by entire module + */ + +/** + * hl_mem_area_inside_range() - Checks whether address+size are inside a range. + * @address: The start address of the area we want to validate. + * @size: The size in bytes of the area we want to validate. + * @range_start_address: The start address of the valid range. + * @range_end_address: The end address of the valid range. + * + * Return: true if the area is inside the valid range, false otherwise. + */ +static inline bool hl_mem_area_inside_range(u64 address, u64 size, + u64 range_start_address, u64 range_end_address) +{ + u64 end_address = address + size; + + if ((address >= range_start_address) && + (end_address <= range_end_address) && + (end_address > address)) + return true; + + return false; +} + +/** + * hl_mem_area_crosses_range() - Checks whether address+size crossing a range. + * @address: The start address of the area we want to validate. + * @size: The size in bytes of the area we want to validate. + * @range_start_address: The start address of the valid range. + * @range_end_address: The end address of the valid range. + * + * Return: true if the area overlaps part or all of the valid range, + * false otherwise. + */ +static inline bool hl_mem_area_crosses_range(u64 address, u32 size, + u64 range_start_address, u64 range_end_address) +{ + u64 end_address = address + size; + + if ((address >= range_start_address) && + (address < range_end_address)) + return true; + + if ((end_address >= range_start_address) && + (end_address < range_end_address)) + return true; + + if ((address < range_start_address) && + (end_address >= range_end_address)) + return true; + + return false; +} + +int hl_device_open(struct inode *inode, struct file *filp); +int hl_device_open_ctrl(struct inode *inode, struct file *filp); +bool hl_device_disabled_or_in_reset(struct hl_device *hdev); +enum hl_device_status hl_device_status(struct hl_device *hdev); +int hl_device_set_debug_mode(struct hl_device *hdev, bool enable); +int create_hdev(struct hl_device **dev, struct pci_dev *pdev, + enum hl_asic_type asic_type, int minor); +void destroy_hdev(struct hl_device *hdev); +int hl_hw_queues_create(struct hl_device *hdev); +void hl_hw_queues_destroy(struct hl_device *hdev); +int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id, + u32 cb_size, u64 cb_ptr); +int hl_hw_queue_schedule_cs(struct hl_cs *cs); +u32 hl_hw_queue_add_ptr(u32 ptr, u16 val); +void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id); +void hl_int_hw_queue_update_ci(struct hl_cs *cs); +void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset); + +#define hl_queue_inc_ptr(p) hl_hw_queue_add_ptr(p, 1) +#define hl_pi_2_offset(pi) ((pi) & (HL_QUEUE_LENGTH - 1)) + +int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id); +void hl_cq_fini(struct hl_device *hdev, struct hl_cq *q); +int hl_eq_init(struct hl_device *hdev, struct hl_eq *q); +void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q); +void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q); +void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q); +irqreturn_t hl_irq_handler_cq(int irq, void *arg); +irqreturn_t hl_irq_handler_eq(int irq, void *arg); +u32 hl_cq_inc_ptr(u32 ptr); + +int hl_asid_init(struct hl_device *hdev); +void hl_asid_fini(struct hl_device *hdev); +unsigned long hl_asid_alloc(struct hl_device *hdev); +void hl_asid_free(struct hl_device *hdev, unsigned long asid); + +int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv); +void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx); +int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx); +void hl_ctx_do_release(struct kref *ref); +void hl_ctx_get(struct hl_device *hdev, struct hl_ctx *ctx); +int hl_ctx_put(struct hl_ctx *ctx); +struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq); +void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr); +void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr); + +int hl_device_init(struct hl_device *hdev, struct class *hclass); +void hl_device_fini(struct hl_device *hdev); +int hl_device_suspend(struct hl_device *hdev); +int hl_device_resume(struct hl_device *hdev); +int hl_device_reset(struct hl_device *hdev, bool hard_reset, + bool from_hard_reset_thread); +void hl_hpriv_get(struct hl_fpriv *hpriv); +void hl_hpriv_put(struct hl_fpriv *hpriv); +int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq); +uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms); + +int hl_build_hwmon_channel_info(struct hl_device *hdev, + struct cpucp_sensor *sensors_arr); + +int hl_sysfs_init(struct hl_device *hdev); +void hl_sysfs_fini(struct hl_device *hdev); + +int hl_hwmon_init(struct hl_device *hdev); +void hl_hwmon_fini(struct hl_device *hdev); + +int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, + struct hl_ctx *ctx, u32 cb_size, bool internal_cb, + bool map_cb, u64 *handle); +int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle); +int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma); +struct hl_cb *hl_cb_get(struct hl_device *hdev, struct hl_cb_mgr *mgr, + u32 handle); +void hl_cb_put(struct hl_cb *cb); +void hl_cb_mgr_init(struct hl_cb_mgr *mgr); +void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr); +struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size, + bool internal_cb); +int hl_cb_pool_init(struct hl_device *hdev); +int hl_cb_pool_fini(struct hl_device *hdev); +int hl_cb_va_pool_init(struct hl_ctx *ctx); +void hl_cb_va_pool_fini(struct hl_ctx *ctx); + +void hl_cs_rollback_all(struct hl_device *hdev); +struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, + enum hl_queue_type queue_type, bool is_kernel_allocated_cb); +void hl_sob_reset_error(struct kref *ref); +void hl_fence_put(struct hl_fence *fence); +void hl_fence_get(struct hl_fence *fence); + +void goya_set_asic_funcs(struct hl_device *hdev); +void gaudi_set_asic_funcs(struct hl_device *hdev); + +int hl_vm_ctx_init(struct hl_ctx *ctx); +void hl_vm_ctx_fini(struct hl_ctx *ctx); + +int hl_vm_init(struct hl_device *hdev); +void hl_vm_fini(struct hl_device *hdev); + +int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size, + struct hl_userptr *userptr); +void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr); +void hl_userptr_delete_list(struct hl_device *hdev, + struct list_head *userptr_list); +bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, u32 size, + struct list_head *userptr_list, + struct hl_userptr **userptr); + +int hl_mmu_init(struct hl_device *hdev); +void hl_mmu_fini(struct hl_device *hdev); +int hl_mmu_ctx_init(struct hl_ctx *ctx); +void hl_mmu_ctx_fini(struct hl_ctx *ctx); +int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, + u32 page_size, bool flush_pte); +int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, + bool flush_pte); +void hl_mmu_swap_out(struct hl_ctx *ctx); +void hl_mmu_swap_in(struct hl_ctx *ctx); +int hl_mmu_if_set_funcs(struct hl_device *hdev); +void hl_mmu_v1_set_funcs(struct hl_device *hdev); + +int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name, + void __iomem *dst); +int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode); +int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, + u16 len, u32 timeout, long *result); +int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type); +int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr, + size_t irq_arr_size); +int hl_fw_test_cpu_queue(struct hl_device *hdev); +void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, + dma_addr_t *dma_handle); +void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, + void *vaddr); +int hl_fw_send_heartbeat(struct hl_device *hdev); +int hl_fw_cpucp_info_get(struct hl_device *hdev); +int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size); +int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev, + struct hl_info_pci_counters *counters); +int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, + u64 *total_energy); +int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, + u32 msg_to_cpu_reg, u32 cpu_msg_status_reg, + u32 boot_err0_reg, bool skip_bmc, + u32 cpu_timeout, u32 boot_fit_timeout); +int hl_fw_read_preboot_ver(struct hl_device *hdev, u32 cpu_boot_status_reg, + u32 boot_err0_reg, u32 timeout); + +int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3], + bool is_wc[3]); +int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data); +int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region, + struct hl_inbound_pci_region *pci_region); +int hl_pci_set_outbound_region(struct hl_device *hdev, + struct hl_outbound_pci_region *pci_region); +int hl_pci_init(struct hl_device *hdev, u32 cpu_boot_status_reg, + u32 boot_err0_reg, u32 preboot_ver_timeout); +void hl_pci_fini(struct hl_device *hdev); + +long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr); +void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq); +int hl_get_temperature(struct hl_device *hdev, + int sensor_index, u32 attr, long *value); +int hl_set_temperature(struct hl_device *hdev, + int sensor_index, u32 attr, long value); +int hl_get_voltage(struct hl_device *hdev, + int sensor_index, u32 attr, long *value); +int hl_get_current(struct hl_device *hdev, + int sensor_index, u32 attr, long *value); +int hl_get_fan_speed(struct hl_device *hdev, + int sensor_index, u32 attr, long *value); +int hl_get_pwm_info(struct hl_device *hdev, + int sensor_index, u32 attr, long *value); +void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, + long value); +u64 hl_get_max_power(struct hl_device *hdev); +void hl_set_max_power(struct hl_device *hdev); +int hl_set_voltage(struct hl_device *hdev, + int sensor_index, u32 attr, long value); +int hl_set_current(struct hl_device *hdev, + int sensor_index, u32 attr, long value); + +#ifdef CONFIG_DEBUG_FS + +void hl_debugfs_init(void); +void hl_debugfs_fini(void); +void hl_debugfs_add_device(struct hl_device *hdev); +void hl_debugfs_remove_device(struct hl_device *hdev); +void hl_debugfs_add_file(struct hl_fpriv *hpriv); +void hl_debugfs_remove_file(struct hl_fpriv *hpriv); +void hl_debugfs_add_cb(struct hl_cb *cb); +void hl_debugfs_remove_cb(struct hl_cb *cb); +void hl_debugfs_add_cs(struct hl_cs *cs); +void hl_debugfs_remove_cs(struct hl_cs *cs); +void hl_debugfs_add_job(struct hl_device *hdev, struct hl_cs_job *job); +void hl_debugfs_remove_job(struct hl_device *hdev, struct hl_cs_job *job); +void hl_debugfs_add_userptr(struct hl_device *hdev, struct hl_userptr *userptr); +void hl_debugfs_remove_userptr(struct hl_device *hdev, + struct hl_userptr *userptr); +void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx); +void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx); + +#else + +static inline void __init hl_debugfs_init(void) +{ +} + +static inline void hl_debugfs_fini(void) +{ +} + +static inline void hl_debugfs_add_device(struct hl_device *hdev) +{ +} + +static inline void hl_debugfs_remove_device(struct hl_device *hdev) +{ +} + +static inline void hl_debugfs_add_file(struct hl_fpriv *hpriv) +{ +} + +static inline void hl_debugfs_remove_file(struct hl_fpriv *hpriv) +{ +} + +static inline void hl_debugfs_add_cb(struct hl_cb *cb) +{ +} + +static inline void hl_debugfs_remove_cb(struct hl_cb *cb) +{ +} + +static inline void hl_debugfs_add_cs(struct hl_cs *cs) +{ +} + +static inline void hl_debugfs_remove_cs(struct hl_cs *cs) +{ +} + +static inline void hl_debugfs_add_job(struct hl_device *hdev, + struct hl_cs_job *job) +{ +} + +static inline void hl_debugfs_remove_job(struct hl_device *hdev, + struct hl_cs_job *job) +{ +} + +static inline void hl_debugfs_add_userptr(struct hl_device *hdev, + struct hl_userptr *userptr) +{ +} + +static inline void hl_debugfs_remove_userptr(struct hl_device *hdev, + struct hl_userptr *userptr) +{ +} + +static inline void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, + struct hl_ctx *ctx) +{ +} + +static inline void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, + struct hl_ctx *ctx) +{ +} + +#endif + +/* IOCTLs */ +long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); +long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg); +int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data); +int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data); +int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data); +int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data); + +#endif /* HABANALABSP_H_ */ diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c new file mode 100644 index 000000000..ded92b3cb --- /dev/null +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -0,0 +1,605 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + * + */ + +#define pr_fmt(fmt) "habanalabs: " fmt + +#include "habanalabs.h" + +#include <linux/pci.h> +#include <linux/aer.h> +#include <linux/module.h> + +#define HL_DRIVER_AUTHOR "HabanaLabs Kernel Driver Team" + +#define HL_DRIVER_DESC "Driver for HabanaLabs's AI Accelerators" + +MODULE_AUTHOR(HL_DRIVER_AUTHOR); +MODULE_DESCRIPTION(HL_DRIVER_DESC); +MODULE_LICENSE("GPL v2"); + +static int hl_major; +static struct class *hl_class; +static DEFINE_IDR(hl_devs_idr); +static DEFINE_MUTEX(hl_devs_idr_lock); + +static int timeout_locked = 5; +static int reset_on_lockup = 1; + +module_param(timeout_locked, int, 0444); +MODULE_PARM_DESC(timeout_locked, + "Device lockup timeout in seconds (0 = disabled, default 5s)"); + +module_param(reset_on_lockup, int, 0444); +MODULE_PARM_DESC(reset_on_lockup, + "Do device reset on lockup (0 = no, 1 = yes, default yes)"); + +#define PCI_VENDOR_ID_HABANALABS 0x1da3 + +#define PCI_IDS_GOYA 0x0001 +#define PCI_IDS_GAUDI 0x1000 + +static const struct pci_device_id ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), }, + { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), }, + { 0, } +}; +MODULE_DEVICE_TABLE(pci, ids); + +/* + * get_asic_type - translate device id to asic type + * + * @device: id of the PCI device + * + * Translate device id to asic type. + * In case of unidentified device, return -1 + */ +static enum hl_asic_type get_asic_type(u16 device) +{ + enum hl_asic_type asic_type; + + switch (device) { + case PCI_IDS_GOYA: + asic_type = ASIC_GOYA; + break; + case PCI_IDS_GAUDI: + asic_type = ASIC_GAUDI; + break; + default: + asic_type = ASIC_INVALID; + break; + } + + return asic_type; +} + +/* + * hl_device_open - open function for habanalabs device + * + * @inode: pointer to inode structure + * @filp: pointer to file structure + * + * Called when process opens an habanalabs device. + */ +int hl_device_open(struct inode *inode, struct file *filp) +{ + struct hl_device *hdev; + struct hl_fpriv *hpriv; + int rc; + + mutex_lock(&hl_devs_idr_lock); + hdev = idr_find(&hl_devs_idr, iminor(inode)); + mutex_unlock(&hl_devs_idr_lock); + + if (!hdev) { + pr_err("Couldn't find device %d:%d\n", + imajor(inode), iminor(inode)); + return -ENXIO; + } + + hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL); + if (!hpriv) + return -ENOMEM; + + hpriv->hdev = hdev; + filp->private_data = hpriv; + hpriv->filp = filp; + mutex_init(&hpriv->restore_phase_mutex); + kref_init(&hpriv->refcount); + nonseekable_open(inode, filp); + + hl_cb_mgr_init(&hpriv->cb_mgr); + hl_ctx_mgr_init(&hpriv->ctx_mgr); + + hpriv->taskpid = find_get_pid(current->pid); + + mutex_lock(&hdev->fpriv_list_lock); + + if (hl_device_disabled_or_in_reset(hdev)) { + dev_err_ratelimited(hdev->dev, + "Can't open %s because it is disabled or in reset\n", + dev_name(hdev->dev)); + rc = -EPERM; + goto out_err; + } + + if (hdev->in_debug) { + dev_err_ratelimited(hdev->dev, + "Can't open %s because it is being debugged by another user\n", + dev_name(hdev->dev)); + rc = -EPERM; + goto out_err; + } + + if (hdev->compute_ctx) { + dev_dbg_ratelimited(hdev->dev, + "Can't open %s because another user is working on it\n", + dev_name(hdev->dev)); + rc = -EBUSY; + goto out_err; + } + + rc = hl_ctx_create(hdev, hpriv); + if (rc) { + dev_err(hdev->dev, "Failed to create context %d\n", rc); + goto out_err; + } + + /* Device is IDLE at this point so it is legal to change PLLs. + * There is no need to check anything because if the PLL is + * already HIGH, the set function will return without doing + * anything + */ + hl_device_set_frequency(hdev, PLL_HIGH); + + list_add(&hpriv->dev_node, &hdev->fpriv_list); + mutex_unlock(&hdev->fpriv_list_lock); + + hl_debugfs_add_file(hpriv); + + return 0; + +out_err: + mutex_unlock(&hdev->fpriv_list_lock); + + hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr); + hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr); + filp->private_data = NULL; + mutex_destroy(&hpriv->restore_phase_mutex); + put_pid(hpriv->taskpid); + + kfree(hpriv); + + return rc; +} + +int hl_device_open_ctrl(struct inode *inode, struct file *filp) +{ + struct hl_device *hdev; + struct hl_fpriv *hpriv; + int rc; + + mutex_lock(&hl_devs_idr_lock); + hdev = idr_find(&hl_devs_idr, iminor(inode)); + mutex_unlock(&hl_devs_idr_lock); + + if (!hdev) { + pr_err("Couldn't find device %d:%d\n", + imajor(inode), iminor(inode)); + return -ENXIO; + } + + hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL); + if (!hpriv) + return -ENOMEM; + + mutex_lock(&hdev->fpriv_list_lock); + + if (hl_device_disabled_or_in_reset(hdev)) { + dev_err_ratelimited(hdev->dev_ctrl, + "Can't open %s because it is disabled or in reset\n", + dev_name(hdev->dev_ctrl)); + rc = -EPERM; + goto out_err; + } + + list_add(&hpriv->dev_node, &hdev->fpriv_list); + mutex_unlock(&hdev->fpriv_list_lock); + + hpriv->hdev = hdev; + filp->private_data = hpriv; + hpriv->filp = filp; + hpriv->is_control = true; + nonseekable_open(inode, filp); + + hpriv->taskpid = find_get_pid(current->pid); + + return 0; + +out_err: + mutex_unlock(&hdev->fpriv_list_lock); + kfree(hpriv); + return rc; +} + +static void set_driver_behavior_per_device(struct hl_device *hdev) +{ + hdev->mmu_enable = 1; + hdev->cpu_enable = 1; + hdev->fw_loading = 1; + hdev->cpu_queues_enable = 1; + hdev->heartbeat = 1; + hdev->clock_gating_mask = ULONG_MAX; + + hdev->reset_pcilink = 0; + hdev->axi_drain = 0; + hdev->sram_scrambler_enable = 1; + hdev->dram_scrambler_enable = 1; + hdev->bmc_enable = 1; + hdev->hard_reset_on_fw_events = 1; +} + +/* + * create_hdev - create habanalabs device instance + * + * @dev: will hold the pointer to the new habanalabs device structure + * @pdev: pointer to the pci device + * @asic_type: in case of simulator device, which device is it + * @minor: in case of simulator device, the minor of the device + * + * Allocate memory for habanalabs device and initialize basic fields + * Identify the ASIC type + * Allocate ID (minor) for the device (only for real devices) + */ +int create_hdev(struct hl_device **dev, struct pci_dev *pdev, + enum hl_asic_type asic_type, int minor) +{ + struct hl_device *hdev; + int rc, main_id, ctrl_id = 0; + + *dev = NULL; + + hdev = kzalloc(sizeof(*hdev), GFP_KERNEL); + if (!hdev) + return -ENOMEM; + + /* First, we must find out which ASIC are we handling. This is needed + * to configure the behavior of the driver (kernel parameters) + */ + if (pdev) { + hdev->asic_type = get_asic_type(pdev->device); + if (hdev->asic_type == ASIC_INVALID) { + dev_err(&pdev->dev, "Unsupported ASIC\n"); + rc = -ENODEV; + goto free_hdev; + } + } else { + hdev->asic_type = asic_type; + } + + hdev->major = hl_major; + hdev->reset_on_lockup = reset_on_lockup; + hdev->pldm = 0; + + set_driver_behavior_per_device(hdev); + + if (timeout_locked) + hdev->timeout_jiffies = msecs_to_jiffies(timeout_locked * 1000); + else + hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT; + + hdev->disabled = true; + hdev->pdev = pdev; /* can be NULL in case of simulator device */ + + /* Set default DMA mask to 32 bits */ + hdev->dma_mask = 32; + + mutex_lock(&hl_devs_idr_lock); + + /* Always save 2 numbers, 1 for main device and 1 for control. + * They must be consecutive + */ + main_id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS, + GFP_KERNEL); + + if (main_id >= 0) + ctrl_id = idr_alloc(&hl_devs_idr, hdev, main_id + 1, + main_id + 2, GFP_KERNEL); + + mutex_unlock(&hl_devs_idr_lock); + + if ((main_id < 0) || (ctrl_id < 0)) { + if ((main_id == -ENOSPC) || (ctrl_id == -ENOSPC)) + pr_err("too many devices in the system\n"); + + if (main_id >= 0) { + mutex_lock(&hl_devs_idr_lock); + idr_remove(&hl_devs_idr, main_id); + mutex_unlock(&hl_devs_idr_lock); + } + + rc = -EBUSY; + goto free_hdev; + } + + hdev->id = main_id; + hdev->id_control = ctrl_id; + + *dev = hdev; + + return 0; + +free_hdev: + kfree(hdev); + return rc; +} + +/* + * destroy_hdev - destroy habanalabs device instance + * + * @dev: pointer to the habanalabs device structure + * + */ +void destroy_hdev(struct hl_device *hdev) +{ + /* Remove device from the device list */ + mutex_lock(&hl_devs_idr_lock); + idr_remove(&hl_devs_idr, hdev->id); + idr_remove(&hl_devs_idr, hdev->id_control); + mutex_unlock(&hl_devs_idr_lock); + + kfree(hdev); +} + +static int hl_pmops_suspend(struct device *dev) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + pr_debug("Going to suspend PCI device\n"); + + if (!hdev) { + pr_err("device pointer is NULL in suspend\n"); + return 0; + } + + return hl_device_suspend(hdev); +} + +static int hl_pmops_resume(struct device *dev) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + pr_debug("Going to resume PCI device\n"); + + if (!hdev) { + pr_err("device pointer is NULL in resume\n"); + return 0; + } + + return hl_device_resume(hdev); +} + +/* + * hl_pci_probe - probe PCI habanalabs devices + * + * @pdev: pointer to pci device + * @id: pointer to pci device id structure + * + * Standard PCI probe function for habanalabs device. + * Create a new habanalabs device and initialize it according to the + * device's type + */ +static int hl_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct hl_device *hdev; + int rc; + + dev_info(&pdev->dev, HL_NAME + " device found [%04x:%04x] (rev %x)\n", + (int)pdev->vendor, (int)pdev->device, (int)pdev->revision); + + rc = create_hdev(&hdev, pdev, ASIC_INVALID, -1); + if (rc) + return rc; + + pci_set_drvdata(pdev, hdev); + + pci_enable_pcie_error_reporting(pdev); + + rc = hl_device_init(hdev, hl_class); + if (rc) { + dev_err(&pdev->dev, "Fatal error during habanalabs device init\n"); + rc = -ENODEV; + goto disable_device; + } + + return 0; + +disable_device: + pci_disable_pcie_error_reporting(pdev); + pci_set_drvdata(pdev, NULL); + destroy_hdev(hdev); + + return rc; +} + +/* + * hl_pci_remove - remove PCI habanalabs devices + * + * @pdev: pointer to pci device + * + * Standard PCI remove function for habanalabs device + */ +static void hl_pci_remove(struct pci_dev *pdev) +{ + struct hl_device *hdev; + + hdev = pci_get_drvdata(pdev); + if (!hdev) + return; + + hl_device_fini(hdev); + pci_disable_pcie_error_reporting(pdev); + pci_set_drvdata(pdev, NULL); + destroy_hdev(hdev); +} + +/** + * hl_pci_err_detected - a PCI bus error detected on this device + * + * @pdev: pointer to pci device + * @state: PCI error type + * + * Called by the PCI subsystem whenever a non-correctable + * PCI bus error is detected + */ +static pci_ers_result_t +hl_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t state) +{ + struct hl_device *hdev = pci_get_drvdata(pdev); + enum pci_ers_result result; + + switch (state) { + case pci_channel_io_normal: + return PCI_ERS_RESULT_CAN_RECOVER; + + case pci_channel_io_frozen: + dev_warn(hdev->dev, "frozen state error detected\n"); + result = PCI_ERS_RESULT_NEED_RESET; + break; + + case pci_channel_io_perm_failure: + dev_warn(hdev->dev, "failure state error detected\n"); + result = PCI_ERS_RESULT_DISCONNECT; + break; + + default: + result = PCI_ERS_RESULT_NONE; + } + + hdev->asic_funcs->halt_engines(hdev, true); + + return result; +} + +/** + * hl_pci_err_resume - resume after a PCI slot reset + * + * @pdev: pointer to pci device + * + */ +static void hl_pci_err_resume(struct pci_dev *pdev) +{ + struct hl_device *hdev = pci_get_drvdata(pdev); + + dev_warn(hdev->dev, "Resuming device after PCI slot reset\n"); + hl_device_resume(hdev); +} + +/** + * hl_pci_err_slot_reset - a PCI slot reset has just happened + * + * @pdev: pointer to pci device + * + * Determine if the driver can recover from the PCI slot reset + */ +static pci_ers_result_t hl_pci_err_slot_reset(struct pci_dev *pdev) +{ + return PCI_ERS_RESULT_RECOVERED; +} + +static const struct dev_pm_ops hl_pm_ops = { + .suspend = hl_pmops_suspend, + .resume = hl_pmops_resume, +}; + +static const struct pci_error_handlers hl_pci_err_handler = { + .error_detected = hl_pci_err_detected, + .slot_reset = hl_pci_err_slot_reset, + .resume = hl_pci_err_resume, +}; + +static struct pci_driver hl_pci_driver = { + .name = HL_NAME, + .id_table = ids, + .probe = hl_pci_probe, + .remove = hl_pci_remove, + .shutdown = hl_pci_remove, + .driver.pm = &hl_pm_ops, + .err_handler = &hl_pci_err_handler, +}; + +/* + * hl_init - Initialize the habanalabs kernel driver + */ +static int __init hl_init(void) +{ + int rc; + dev_t dev; + + pr_info("loading driver\n"); + + rc = alloc_chrdev_region(&dev, 0, HL_MAX_MINORS, HL_NAME); + if (rc < 0) { + pr_err("unable to get major\n"); + return rc; + } + + hl_major = MAJOR(dev); + + hl_class = class_create(THIS_MODULE, HL_NAME); + if (IS_ERR(hl_class)) { + pr_err("failed to allocate class\n"); + rc = PTR_ERR(hl_class); + goto remove_major; + } + + hl_debugfs_init(); + + rc = pci_register_driver(&hl_pci_driver); + if (rc) { + pr_err("failed to register pci device\n"); + goto remove_debugfs; + } + + pr_debug("driver loaded\n"); + + return 0; + +remove_debugfs: + hl_debugfs_fini(); + class_destroy(hl_class); +remove_major: + unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS); + return rc; +} + +/* + * hl_exit - Release all resources of the habanalabs kernel driver + */ +static void __exit hl_exit(void) +{ + pci_unregister_driver(&hl_pci_driver); + + /* + * Removing debugfs must be after all devices or simulator devices + * have been removed because otherwise we get a bug in the + * debugfs module for referencing NULL objects + */ + hl_debugfs_fini(); + + class_destroy(hl_class); + unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS); + + idr_destroy(&hl_devs_idr); + + pr_debug("driver removed\n"); +} + +module_init(hl_init); +module_exit(hl_exit); diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c new file mode 100644 index 000000000..354011489 --- /dev/null +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -0,0 +1,641 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include <uapi/misc/habanalabs.h> +#include "habanalabs.h" + +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/uaccess.h> +#include <linux/slab.h> + +static u32 hl_debug_struct_size[HL_DEBUG_OP_TIMESTAMP + 1] = { + [HL_DEBUG_OP_ETR] = sizeof(struct hl_debug_params_etr), + [HL_DEBUG_OP_ETF] = sizeof(struct hl_debug_params_etf), + [HL_DEBUG_OP_STM] = sizeof(struct hl_debug_params_stm), + [HL_DEBUG_OP_FUNNEL] = 0, + [HL_DEBUG_OP_BMON] = sizeof(struct hl_debug_params_bmon), + [HL_DEBUG_OP_SPMU] = sizeof(struct hl_debug_params_spmu), + [HL_DEBUG_OP_TIMESTAMP] = 0 + +}; + +static int device_status_info(struct hl_device *hdev, struct hl_info_args *args) +{ + struct hl_info_device_status dev_stat = {0}; + u32 size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + + if ((!size) || (!out)) + return -EINVAL; + + dev_stat.status = hl_device_status(hdev); + + return copy_to_user(out, &dev_stat, + min((size_t)size, sizeof(dev_stat))) ? -EFAULT : 0; +} + +static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args) +{ + struct hl_info_hw_ip_info hw_ip = {0}; + u32 size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + struct asic_fixed_properties *prop = &hdev->asic_prop; + u64 sram_kmd_size, dram_kmd_size; + + if ((!size) || (!out)) + return -EINVAL; + + sram_kmd_size = (prop->sram_user_base_address - + prop->sram_base_address); + dram_kmd_size = (prop->dram_user_base_address - + prop->dram_base_address); + + hw_ip.device_id = hdev->asic_funcs->get_pci_id(hdev); + hw_ip.sram_base_address = prop->sram_user_base_address; + hw_ip.dram_base_address = prop->dram_user_base_address; + hw_ip.tpc_enabled_mask = prop->tpc_enabled_mask; + hw_ip.sram_size = prop->sram_size - sram_kmd_size; + hw_ip.dram_size = prop->dram_size - dram_kmd_size; + if (hw_ip.dram_size > PAGE_SIZE) + hw_ip.dram_enabled = 1; + hw_ip.num_of_events = prop->num_of_events; + + memcpy(hw_ip.cpucp_version, prop->cpucp_info.cpucp_version, + min(VERSION_MAX_LEN, HL_INFO_VERSION_MAX_LEN)); + + memcpy(hw_ip.card_name, prop->cpucp_info.card_name, + min(CARD_NAME_MAX_LEN, HL_INFO_CARD_NAME_MAX_LEN)); + + hw_ip.cpld_version = le32_to_cpu(prop->cpucp_info.cpld_version); + hw_ip.module_id = le32_to_cpu(prop->cpucp_info.card_location); + + hw_ip.psoc_pci_pll_nr = prop->psoc_pci_pll_nr; + hw_ip.psoc_pci_pll_nf = prop->psoc_pci_pll_nf; + hw_ip.psoc_pci_pll_od = prop->psoc_pci_pll_od; + hw_ip.psoc_pci_pll_div_factor = prop->psoc_pci_pll_div_factor; + + return copy_to_user(out, &hw_ip, + min((size_t)size, sizeof(hw_ip))) ? -EFAULT : 0; +} + +static int hw_events_info(struct hl_device *hdev, bool aggregate, + struct hl_info_args *args) +{ + u32 size, max_size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + void *arr; + + if ((!max_size) || (!out)) + return -EINVAL; + + arr = hdev->asic_funcs->get_events_stat(hdev, aggregate, &size); + + return copy_to_user(out, arr, min(max_size, size)) ? -EFAULT : 0; +} + +static int dram_usage_info(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + struct hl_device *hdev = hpriv->hdev; + struct hl_info_dram_usage dram_usage = {0}; + u32 max_size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + struct asic_fixed_properties *prop = &hdev->asic_prop; + u64 dram_kmd_size; + + if ((!max_size) || (!out)) + return -EINVAL; + + dram_kmd_size = (prop->dram_user_base_address - + prop->dram_base_address); + dram_usage.dram_free_mem = (prop->dram_size - dram_kmd_size) - + atomic64_read(&hdev->dram_used_mem); + if (hpriv->ctx) + dram_usage.ctx_dram_mem = + atomic64_read(&hpriv->ctx->dram_phys_mem); + + return copy_to_user(out, &dram_usage, + min((size_t) max_size, sizeof(dram_usage))) ? -EFAULT : 0; +} + +static int hw_idle(struct hl_device *hdev, struct hl_info_args *args) +{ + struct hl_info_hw_idle hw_idle = {0}; + u32 max_size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + + if ((!max_size) || (!out)) + return -EINVAL; + + hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev, + &hw_idle.busy_engines_mask_ext, NULL); + hw_idle.busy_engines_mask = + lower_32_bits(hw_idle.busy_engines_mask_ext); + + return copy_to_user(out, &hw_idle, + min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0; +} + +static int debug_coresight(struct hl_device *hdev, struct hl_debug_args *args) +{ + struct hl_debug_params *params; + void *input = NULL, *output = NULL; + int rc; + + params = kzalloc(sizeof(*params), GFP_KERNEL); + if (!params) + return -ENOMEM; + + params->reg_idx = args->reg_idx; + params->enable = args->enable; + params->op = args->op; + + if (args->input_ptr && args->input_size) { + input = kzalloc(hl_debug_struct_size[args->op], GFP_KERNEL); + if (!input) { + rc = -ENOMEM; + goto out; + } + + if (copy_from_user(input, u64_to_user_ptr(args->input_ptr), + args->input_size)) { + rc = -EFAULT; + dev_err(hdev->dev, "failed to copy input debug data\n"); + goto out; + } + + params->input = input; + } + + if (args->output_ptr && args->output_size) { + output = kzalloc(args->output_size, GFP_KERNEL); + if (!output) { + rc = -ENOMEM; + goto out; + } + + params->output = output; + params->output_size = args->output_size; + } + + rc = hdev->asic_funcs->debug_coresight(hdev, params); + if (rc) { + dev_err(hdev->dev, + "debug coresight operation failed %d\n", rc); + goto out; + } + + if (output && copy_to_user((void __user *) (uintptr_t) args->output_ptr, + output, args->output_size)) { + dev_err(hdev->dev, "copy to user failed in debug ioctl\n"); + rc = -EFAULT; + goto out; + } + + +out: + kfree(params); + kfree(output); + kfree(input); + + return rc; +} + +static int device_utilization(struct hl_device *hdev, struct hl_info_args *args) +{ + struct hl_info_device_utilization device_util = {0}; + u32 max_size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + + if ((!max_size) || (!out)) + return -EINVAL; + + if ((args->period_ms < 100) || (args->period_ms > 1000) || + (args->period_ms % 100)) { + dev_err(hdev->dev, + "period %u must be between 100 - 1000 and must be divisible by 100\n", + args->period_ms); + return -EINVAL; + } + + device_util.utilization = hl_device_utilization(hdev, args->period_ms); + + return copy_to_user(out, &device_util, + min((size_t) max_size, sizeof(device_util))) ? -EFAULT : 0; +} + +static int get_clk_rate(struct hl_device *hdev, struct hl_info_args *args) +{ + struct hl_info_clk_rate clk_rate = {0}; + u32 max_size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + int rc; + + if ((!max_size) || (!out)) + return -EINVAL; + + rc = hdev->asic_funcs->get_clk_rate(hdev, &clk_rate.cur_clk_rate_mhz, + &clk_rate.max_clk_rate_mhz); + if (rc) + return rc; + + return copy_to_user(out, &clk_rate, + min((size_t) max_size, sizeof(clk_rate))) ? -EFAULT : 0; +} + +static int get_reset_count(struct hl_device *hdev, struct hl_info_args *args) +{ + struct hl_info_reset_count reset_count = {0}; + u32 max_size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + + if ((!max_size) || (!out)) + return -EINVAL; + + reset_count.hard_reset_cnt = hdev->hard_reset_cnt; + reset_count.soft_reset_cnt = hdev->soft_reset_cnt; + + return copy_to_user(out, &reset_count, + min((size_t) max_size, sizeof(reset_count))) ? -EFAULT : 0; +} + +static int time_sync_info(struct hl_device *hdev, struct hl_info_args *args) +{ + struct hl_info_time_sync time_sync = {0}; + u32 max_size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + + if ((!max_size) || (!out)) + return -EINVAL; + + time_sync.device_time = hdev->asic_funcs->get_device_time(hdev); + time_sync.host_time = ktime_get_raw_ns(); + + return copy_to_user(out, &time_sync, + min((size_t) max_size, sizeof(time_sync))) ? -EFAULT : 0; +} + +static int pci_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + struct hl_device *hdev = hpriv->hdev; + struct hl_info_pci_counters pci_counters = {0}; + u32 max_size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + int rc; + + if ((!max_size) || (!out)) + return -EINVAL; + + rc = hl_fw_cpucp_pci_counters_get(hdev, &pci_counters); + if (rc) + return rc; + + return copy_to_user(out, &pci_counters, + min((size_t) max_size, sizeof(pci_counters))) ? -EFAULT : 0; +} + +static int clk_throttle_info(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + struct hl_device *hdev = hpriv->hdev; + struct hl_info_clk_throttle clk_throttle = {0}; + u32 max_size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + + if ((!max_size) || (!out)) + return -EINVAL; + + clk_throttle.clk_throttling_reason = hdev->clk_throttling_reason; + + return copy_to_user(out, &clk_throttle, + min((size_t) max_size, sizeof(clk_throttle))) ? -EFAULT : 0; +} + +static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + struct hl_device *hdev = hpriv->hdev; + struct hl_info_cs_counters cs_counters = { {0} }; + u32 max_size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + + if ((!max_size) || (!out)) + return -EINVAL; + + memcpy(&cs_counters.cs_counters, &hdev->aggregated_cs_counters, + sizeof(struct hl_cs_counters)); + + if (hpriv->ctx) + memcpy(&cs_counters.ctx_cs_counters, &hpriv->ctx->cs_counters, + sizeof(struct hl_cs_counters)); + + return copy_to_user(out, &cs_counters, + min((size_t) max_size, sizeof(cs_counters))) ? -EFAULT : 0; +} + +static int sync_manager_info(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + struct hl_device *hdev = hpriv->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_info_sync_manager sm_info = {0}; + u32 max_size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + + if ((!max_size) || (!out)) + return -EINVAL; + + if (args->dcore_id >= HL_MAX_DCORES) + return -EINVAL; + + sm_info.first_available_sync_object = + prop->first_available_user_sob[args->dcore_id]; + sm_info.first_available_monitor = + prop->first_available_user_mon[args->dcore_id]; + + + return copy_to_user(out, &sm_info, min_t(size_t, (size_t) max_size, + sizeof(sm_info))) ? -EFAULT : 0; +} + +static int total_energy_consumption_info(struct hl_fpriv *hpriv, + struct hl_info_args *args) +{ + struct hl_device *hdev = hpriv->hdev; + struct hl_info_energy total_energy = {0}; + u32 max_size = args->return_size; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + int rc; + + if ((!max_size) || (!out)) + return -EINVAL; + + rc = hl_fw_cpucp_total_energy_get(hdev, + &total_energy.total_energy_consumption); + if (rc) + return rc; + + return copy_to_user(out, &total_energy, + min((size_t) max_size, sizeof(total_energy))) ? -EFAULT : 0; +} + +static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, + struct device *dev) +{ + struct hl_info_args *args = data; + struct hl_device *hdev = hpriv->hdev; + int rc; + + /* + * Information is returned for the following opcodes even if the device + * is disabled or in reset. + */ + switch (args->op) { + case HL_INFO_HW_IP_INFO: + return hw_ip_info(hdev, args); + + case HL_INFO_DEVICE_STATUS: + return device_status_info(hdev, args); + + case HL_INFO_RESET_COUNT: + return get_reset_count(hdev, args); + + default: + break; + } + + if (hl_device_disabled_or_in_reset(hdev)) { + dev_warn_ratelimited(dev, + "Device is %s. Can't execute INFO IOCTL\n", + atomic_read(&hdev->in_reset) ? "in_reset" : "disabled"); + return -EBUSY; + } + + switch (args->op) { + case HL_INFO_HW_EVENTS: + rc = hw_events_info(hdev, false, args); + break; + + case HL_INFO_DRAM_USAGE: + rc = dram_usage_info(hpriv, args); + break; + + case HL_INFO_HW_IDLE: + rc = hw_idle(hdev, args); + break; + + case HL_INFO_DEVICE_UTILIZATION: + rc = device_utilization(hdev, args); + break; + + case HL_INFO_HW_EVENTS_AGGREGATE: + rc = hw_events_info(hdev, true, args); + break; + + case HL_INFO_CLK_RATE: + rc = get_clk_rate(hdev, args); + break; + + case HL_INFO_TIME_SYNC: + return time_sync_info(hdev, args); + + case HL_INFO_CS_COUNTERS: + return cs_counters_info(hpriv, args); + + case HL_INFO_PCI_COUNTERS: + return pci_counters_info(hpriv, args); + + case HL_INFO_CLK_THROTTLE_REASON: + return clk_throttle_info(hpriv, args); + + case HL_INFO_SYNC_MANAGER: + return sync_manager_info(hpriv, args); + + case HL_INFO_TOTAL_ENERGY: + return total_energy_consumption_info(hpriv, args); + + default: + dev_err(dev, "Invalid request %d\n", args->op); + rc = -ENOTTY; + break; + } + + return rc; +} + +static int hl_info_ioctl(struct hl_fpriv *hpriv, void *data) +{ + return _hl_info_ioctl(hpriv, data, hpriv->hdev->dev); +} + +static int hl_info_ioctl_control(struct hl_fpriv *hpriv, void *data) +{ + return _hl_info_ioctl(hpriv, data, hpriv->hdev->dev_ctrl); +} + +static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data) +{ + struct hl_debug_args *args = data; + struct hl_device *hdev = hpriv->hdev; + int rc = 0; + + if (hl_device_disabled_or_in_reset(hdev)) { + dev_warn_ratelimited(hdev->dev, + "Device is %s. Can't execute DEBUG IOCTL\n", + atomic_read(&hdev->in_reset) ? "in_reset" : "disabled"); + return -EBUSY; + } + + switch (args->op) { + case HL_DEBUG_OP_ETR: + case HL_DEBUG_OP_ETF: + case HL_DEBUG_OP_STM: + case HL_DEBUG_OP_FUNNEL: + case HL_DEBUG_OP_BMON: + case HL_DEBUG_OP_SPMU: + case HL_DEBUG_OP_TIMESTAMP: + if (!hdev->in_debug) { + dev_err_ratelimited(hdev->dev, + "Rejecting debug configuration request because device not in debug mode\n"); + return -EFAULT; + } + args->input_size = + min(args->input_size, hl_debug_struct_size[args->op]); + rc = debug_coresight(hdev, args); + break; + case HL_DEBUG_OP_SET_MODE: + rc = hl_device_set_debug_mode(hdev, (bool) args->enable); + break; + default: + dev_err(hdev->dev, "Invalid request %d\n", args->op); + rc = -ENOTTY; + break; + } + + return rc; +} + +#define HL_IOCTL_DEF(ioctl, _func) \ + [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func} + +static const struct hl_ioctl_desc hl_ioctls[] = { + HL_IOCTL_DEF(HL_IOCTL_INFO, hl_info_ioctl), + HL_IOCTL_DEF(HL_IOCTL_CB, hl_cb_ioctl), + HL_IOCTL_DEF(HL_IOCTL_CS, hl_cs_ioctl), + HL_IOCTL_DEF(HL_IOCTL_WAIT_CS, hl_cs_wait_ioctl), + HL_IOCTL_DEF(HL_IOCTL_MEMORY, hl_mem_ioctl), + HL_IOCTL_DEF(HL_IOCTL_DEBUG, hl_debug_ioctl) +}; + +static const struct hl_ioctl_desc hl_ioctls_control[] = { + HL_IOCTL_DEF(HL_IOCTL_INFO, hl_info_ioctl_control) +}; + +static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg, + const struct hl_ioctl_desc *ioctl, struct device *dev) +{ + struct hl_fpriv *hpriv = filep->private_data; + struct hl_device *hdev = hpriv->hdev; + unsigned int nr = _IOC_NR(cmd); + char stack_kdata[128] = {0}; + char *kdata = NULL; + unsigned int usize, asize; + hl_ioctl_t *func; + u32 hl_size; + int retcode; + + if (hdev->hard_reset_pending) { + dev_crit_ratelimited(hdev->dev_ctrl, + "Device HARD reset pending! Please close FD\n"); + return -ENODEV; + } + + /* Do not trust userspace, use our own definition */ + func = ioctl->func; + + if (unlikely(!func)) { + dev_dbg(dev, "no function\n"); + retcode = -ENOTTY; + goto out_err; + } + + hl_size = _IOC_SIZE(ioctl->cmd); + usize = asize = _IOC_SIZE(cmd); + if (hl_size > asize) + asize = hl_size; + + cmd = ioctl->cmd; + + if (cmd & (IOC_IN | IOC_OUT)) { + if (asize <= sizeof(stack_kdata)) { + kdata = stack_kdata; + } else { + kdata = kzalloc(asize, GFP_KERNEL); + if (!kdata) { + retcode = -ENOMEM; + goto out_err; + } + } + } + + if (cmd & IOC_IN) { + if (copy_from_user(kdata, (void __user *)arg, usize)) { + retcode = -EFAULT; + goto out_err; + } + } else if (cmd & IOC_OUT) { + memset(kdata, 0, usize); + } + + retcode = func(hpriv, kdata); + + if ((cmd & IOC_OUT) && copy_to_user((void __user *)arg, kdata, usize)) + retcode = -EFAULT; + +out_err: + if (retcode) + dev_dbg(dev, "error in ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n", + task_pid_nr(current), cmd, nr); + + if (kdata != stack_kdata) + kfree(kdata); + + return retcode; +} + +long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) +{ + struct hl_fpriv *hpriv = filep->private_data; + struct hl_device *hdev = hpriv->hdev; + const struct hl_ioctl_desc *ioctl = NULL; + unsigned int nr = _IOC_NR(cmd); + + if ((nr >= HL_COMMAND_START) && (nr < HL_COMMAND_END)) { + ioctl = &hl_ioctls[nr]; + } else { + dev_err(hdev->dev, "invalid ioctl: pid=%d, nr=0x%02x\n", + task_pid_nr(current), nr); + return -ENOTTY; + } + + return _hl_ioctl(filep, cmd, arg, ioctl, hdev->dev); +} + +long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg) +{ + struct hl_fpriv *hpriv = filep->private_data; + struct hl_device *hdev = hpriv->hdev; + const struct hl_ioctl_desc *ioctl = NULL; + unsigned int nr = _IOC_NR(cmd); + + if (nr == _IOC_NR(HL_IOCTL_INFO)) { + ioctl = &hl_ioctls_control[nr]; + } else { + dev_err(hdev->dev_ctrl, "invalid ioctl: pid=%d, nr=0x%02x\n", + task_pid_nr(current), nr); + return -ENOTTY; + } + + return _hl_ioctl(filep, cmd, arg, ioctl, hdev->dev_ctrl); +} diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c new file mode 100644 index 000000000..250cf9cef --- /dev/null +++ b/drivers/misc/habanalabs/common/hw_queue.c @@ -0,0 +1,917 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include "habanalabs.h" + +#include <linux/slab.h> + +/* + * hl_queue_add_ptr - add to pi or ci and checks if it wraps around + * + * @ptr: the current pi/ci value + * @val: the amount to add + * + * Add val to ptr. It can go until twice the queue length. + */ +inline u32 hl_hw_queue_add_ptr(u32 ptr, u16 val) +{ + ptr += val; + ptr &= ((HL_QUEUE_LENGTH << 1) - 1); + return ptr; +} +static inline int queue_ci_get(atomic_t *ci, u32 queue_len) +{ + return atomic_read(ci) & ((queue_len << 1) - 1); +} + +static inline int queue_free_slots(struct hl_hw_queue *q, u32 queue_len) +{ + int delta = (q->pi - queue_ci_get(&q->ci, queue_len)); + + if (delta >= 0) + return (queue_len - delta); + else + return (abs(delta) - queue_len); +} + +void hl_int_hw_queue_update_ci(struct hl_cs *cs) +{ + struct hl_device *hdev = cs->ctx->hdev; + struct hl_hw_queue *q; + int i; + + if (hdev->disabled) + return; + + q = &hdev->kernel_queues[0]; + for (i = 0 ; i < hdev->asic_prop.max_queues ; i++, q++) { + if (q->queue_type == QUEUE_TYPE_INT) + atomic_add(cs->jobs_in_queue_cnt[i], &q->ci); + } +} + +/* + * ext_and_hw_queue_submit_bd() - Submit a buffer descriptor to an external or a + * H/W queue. + * @hdev: pointer to habanalabs device structure + * @q: pointer to habanalabs queue structure + * @ctl: BD's control word + * @len: BD's length + * @ptr: BD's pointer + * + * This function assumes there is enough space on the queue to submit a new + * BD to it. It initializes the next BD and calls the device specific + * function to set the pi (and doorbell) + * + * This function must be called when the scheduler mutex is taken + * + */ +static void ext_and_hw_queue_submit_bd(struct hl_device *hdev, + struct hl_hw_queue *q, u32 ctl, u32 len, u64 ptr) +{ + struct hl_bd *bd; + + bd = q->kernel_address; + bd += hl_pi_2_offset(q->pi); + bd->ctl = cpu_to_le32(ctl); + bd->len = cpu_to_le32(len); + bd->ptr = cpu_to_le64(ptr); + + q->pi = hl_queue_inc_ptr(q->pi); + hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi); +} + +/* + * ext_queue_sanity_checks - perform some sanity checks on external queue + * + * @hdev : pointer to hl_device structure + * @q : pointer to hl_hw_queue structure + * @num_of_entries : how many entries to check for space + * @reserve_cq_entry : whether to reserve an entry in the cq + * + * H/W queues spinlock should be taken before calling this function + * + * Perform the following: + * - Make sure we have enough space in the h/w queue + * - Make sure we have enough space in the completion queue + * - Reserve space in the completion queue (needs to be reversed if there + * is a failure down the road before the actual submission of work). Only + * do this action if reserve_cq_entry is true + * + */ +static int ext_queue_sanity_checks(struct hl_device *hdev, + struct hl_hw_queue *q, int num_of_entries, + bool reserve_cq_entry) +{ + atomic_t *free_slots = + &hdev->completion_queue[q->cq_id].free_slots_cnt; + int free_slots_cnt; + + /* Check we have enough space in the queue */ + free_slots_cnt = queue_free_slots(q, HL_QUEUE_LENGTH); + + if (free_slots_cnt < num_of_entries) { + dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n", + q->hw_queue_id, num_of_entries); + return -EAGAIN; + } + + if (reserve_cq_entry) { + /* + * Check we have enough space in the completion queue + * Add -1 to counter (decrement) unless counter was already 0 + * In that case, CQ is full so we can't submit a new CB because + * we won't get ack on its completion + * atomic_add_unless will return 0 if counter was already 0 + */ + if (atomic_add_negative(num_of_entries * -1, free_slots)) { + dev_dbg(hdev->dev, "No space for %d on CQ %d\n", + num_of_entries, q->hw_queue_id); + atomic_add(num_of_entries, free_slots); + return -EAGAIN; + } + } + + return 0; +} + +/* + * int_queue_sanity_checks - perform some sanity checks on internal queue + * + * @hdev : pointer to hl_device structure + * @q : pointer to hl_hw_queue structure + * @num_of_entries : how many entries to check for space + * + * H/W queues spinlock should be taken before calling this function + * + * Perform the following: + * - Make sure we have enough space in the h/w queue + * + */ +static int int_queue_sanity_checks(struct hl_device *hdev, + struct hl_hw_queue *q, + int num_of_entries) +{ + int free_slots_cnt; + + if (num_of_entries > q->int_queue_len) { + dev_err(hdev->dev, + "Cannot populate queue %u with %u jobs\n", + q->hw_queue_id, num_of_entries); + return -ENOMEM; + } + + /* Check we have enough space in the queue */ + free_slots_cnt = queue_free_slots(q, q->int_queue_len); + + if (free_slots_cnt < num_of_entries) { + dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n", + q->hw_queue_id, num_of_entries); + return -EAGAIN; + } + + return 0; +} + +/* + * hw_queue_sanity_checks() - Make sure we have enough space in the h/w queue + * @hdev: Pointer to hl_device structure. + * @q: Pointer to hl_hw_queue structure. + * @num_of_entries: How many entries to check for space. + * + * Notice: We do not reserve queue entries so this function mustn't be called + * more than once per CS for the same queue + * + */ +static int hw_queue_sanity_checks(struct hl_device *hdev, struct hl_hw_queue *q, + int num_of_entries) +{ + int free_slots_cnt; + + /* Check we have enough space in the queue */ + free_slots_cnt = queue_free_slots(q, HL_QUEUE_LENGTH); + + if (free_slots_cnt < num_of_entries) { + dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n", + q->hw_queue_id, num_of_entries); + return -EAGAIN; + } + + return 0; +} + +/* + * hl_hw_queue_send_cb_no_cmpl - send a single CB (not a JOB) without completion + * + * @hdev: pointer to hl_device structure + * @hw_queue_id: Queue's type + * @cb_size: size of CB + * @cb_ptr: pointer to CB location + * + * This function sends a single CB, that must NOT generate a completion entry + * + */ +int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id, + u32 cb_size, u64 cb_ptr) +{ + struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id]; + int rc = 0; + + /* + * The CPU queue is a synchronous queue with an effective depth of + * a single entry (although it is allocated with room for multiple + * entries). Therefore, there is a different lock, called + * send_cpu_message_lock, that serializes accesses to the CPU queue. + * As a result, we don't need to lock the access to the entire H/W + * queues module when submitting a JOB to the CPU queue + */ + if (q->queue_type != QUEUE_TYPE_CPU) + hdev->asic_funcs->hw_queues_lock(hdev); + + if (hdev->disabled) { + rc = -EPERM; + goto out; + } + + /* + * hl_hw_queue_send_cb_no_cmpl() is called for queues of a H/W queue + * type only on init phase, when the queues are empty and being tested, + * so there is no need for sanity checks. + */ + if (q->queue_type != QUEUE_TYPE_HW) { + rc = ext_queue_sanity_checks(hdev, q, 1, false); + if (rc) + goto out; + } + + ext_and_hw_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr); + +out: + if (q->queue_type != QUEUE_TYPE_CPU) + hdev->asic_funcs->hw_queues_unlock(hdev); + + return rc; +} + +/* + * ext_queue_schedule_job - submit a JOB to an external queue + * + * @job: pointer to the job that needs to be submitted to the queue + * + * This function must be called when the scheduler mutex is taken + * + */ +static void ext_queue_schedule_job(struct hl_cs_job *job) +{ + struct hl_device *hdev = job->cs->ctx->hdev; + struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id]; + struct hl_cq_entry cq_pkt; + struct hl_cq *cq; + u64 cq_addr; + struct hl_cb *cb; + u32 ctl; + u32 len; + u64 ptr; + + /* + * Update the JOB ID inside the BD CTL so the device would know what + * to write in the completion queue + */ + ctl = ((q->pi << BD_CTL_SHADOW_INDEX_SHIFT) & BD_CTL_SHADOW_INDEX_MASK); + + cb = job->patched_cb; + len = job->job_cb_size; + ptr = cb->bus_address; + + cq_pkt.data = cpu_to_le32( + ((q->pi << CQ_ENTRY_SHADOW_INDEX_SHIFT) + & CQ_ENTRY_SHADOW_INDEX_MASK) | + FIELD_PREP(CQ_ENTRY_SHADOW_INDEX_VALID_MASK, 1) | + FIELD_PREP(CQ_ENTRY_READY_MASK, 1)); + + /* + * No need to protect pi_offset because scheduling to the + * H/W queues is done under the scheduler mutex + * + * No need to check if CQ is full because it was already + * checked in ext_queue_sanity_checks + */ + cq = &hdev->completion_queue[q->cq_id]; + cq_addr = cq->bus_address + cq->pi * sizeof(struct hl_cq_entry); + + hdev->asic_funcs->add_end_of_cb_packets(hdev, cb->kernel_address, len, + cq_addr, + le32_to_cpu(cq_pkt.data), + q->msi_vec, + job->contains_dma_pkt); + + q->shadow_queue[hl_pi_2_offset(q->pi)] = job; + + cq->pi = hl_cq_inc_ptr(cq->pi); + + ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr); +} + +/* + * int_queue_schedule_job - submit a JOB to an internal queue + * + * @job: pointer to the job that needs to be submitted to the queue + * + * This function must be called when the scheduler mutex is taken + * + */ +static void int_queue_schedule_job(struct hl_cs_job *job) +{ + struct hl_device *hdev = job->cs->ctx->hdev; + struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id]; + struct hl_bd bd; + __le64 *pi; + + bd.ctl = 0; + bd.len = cpu_to_le32(job->job_cb_size); + bd.ptr = cpu_to_le64((u64) (uintptr_t) job->user_cb); + + pi = q->kernel_address + (q->pi & (q->int_queue_len - 1)) * sizeof(bd); + + q->pi++; + q->pi &= ((q->int_queue_len << 1) - 1); + + hdev->asic_funcs->pqe_write(hdev, pi, &bd); + + hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi); +} + +/* + * hw_queue_schedule_job - submit a JOB to a H/W queue + * + * @job: pointer to the job that needs to be submitted to the queue + * + * This function must be called when the scheduler mutex is taken + * + */ +static void hw_queue_schedule_job(struct hl_cs_job *job) +{ + struct hl_device *hdev = job->cs->ctx->hdev; + struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id]; + u64 ptr; + u32 offset, ctl, len; + + /* + * Upon PQE completion, COMP_DATA is used as the write data to the + * completion queue (QMAN HBW message), and COMP_OFFSET is used as the + * write address offset in the SM block (QMAN LBW message). + * The write address offset is calculated as "COMP_OFFSET << 2". + */ + offset = job->cs->sequence & (hdev->asic_prop.max_pending_cs - 1); + ctl = ((offset << BD_CTL_COMP_OFFSET_SHIFT) & BD_CTL_COMP_OFFSET_MASK) | + ((q->pi << BD_CTL_COMP_DATA_SHIFT) & BD_CTL_COMP_DATA_MASK); + + len = job->job_cb_size; + + /* + * A patched CB is created only if a user CB was allocated by driver and + * MMU is disabled. If MMU is enabled, the user CB should be used + * instead. If the user CB wasn't allocated by driver, assume that it + * holds an address. + */ + if (job->patched_cb) + ptr = job->patched_cb->bus_address; + else if (job->is_kernel_allocated_cb) + ptr = job->user_cb->bus_address; + else + ptr = (u64) (uintptr_t) job->user_cb; + + ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr); +} + +/* + * init_signal_wait_cs - initialize a signal/wait CS + * @cs: pointer to the signal/wait CS + * + * H/W queues spinlock should be taken before calling this function + */ +static void init_signal_wait_cs(struct hl_cs *cs) +{ + struct hl_ctx *ctx = cs->ctx; + struct hl_device *hdev = ctx->hdev; + struct hl_hw_queue *hw_queue; + struct hl_cs_compl *cs_cmpl = + container_of(cs->fence, struct hl_cs_compl, base_fence); + + struct hl_hw_sob *hw_sob; + struct hl_cs_job *job; + u32 q_idx; + + /* There is only one job in a signal/wait CS */ + job = list_first_entry(&cs->job_list, struct hl_cs_job, + cs_node); + q_idx = job->hw_queue_id; + hw_queue = &hdev->kernel_queues[q_idx]; + + if (cs->type & CS_TYPE_SIGNAL) { + hw_sob = &hw_queue->hw_sob[hw_queue->curr_sob_offset]; + + cs_cmpl->hw_sob = hw_sob; + cs_cmpl->sob_val = hw_queue->next_sob_val++; + + dev_dbg(hdev->dev, + "generate signal CB, sob_id: %d, sob val: 0x%x, q_idx: %d\n", + cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx); + + hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb, + cs_cmpl->hw_sob->sob_id); + + kref_get(&hw_sob->kref); + + /* check for wraparound */ + if (hw_queue->next_sob_val == HL_MAX_SOB_VAL) { + /* + * Decrement as we reached the max value. + * The release function won't be called here as we've + * just incremented the refcount. + */ + kref_put(&hw_sob->kref, hl_sob_reset_error); + hw_queue->next_sob_val = 1; + /* only two SOBs are currently in use */ + hw_queue->curr_sob_offset = + (hw_queue->curr_sob_offset + 1) % + HL_RSVD_SOBS_IN_USE; + + dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n", + hw_queue->curr_sob_offset, q_idx); + } + } else if (cs->type & CS_TYPE_WAIT) { + struct hl_cs_compl *signal_cs_cmpl; + + signal_cs_cmpl = container_of(cs->signal_fence, + struct hl_cs_compl, + base_fence); + + /* copy the the SOB id and value of the signal CS */ + cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob; + cs_cmpl->sob_val = signal_cs_cmpl->sob_val; + + dev_dbg(hdev->dev, + "generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: %d, q_idx: %d\n", + cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, + hw_queue->base_mon_id, q_idx); + + hdev->asic_funcs->gen_wait_cb(hdev, job->patched_cb, + cs_cmpl->hw_sob->sob_id, + cs_cmpl->sob_val, + hw_queue->base_mon_id, + q_idx); + + kref_get(&cs_cmpl->hw_sob->kref); + /* + * Must put the signal fence after the SOB refcnt increment so + * the SOB refcnt won't turn 0 and reset the SOB before the + * wait CS was submitted. + */ + mb(); + hl_fence_put(cs->signal_fence); + cs->signal_fence = NULL; + } +} + +/* + * hl_hw_queue_schedule_cs - schedule a command submission + * @cs: pointer to the CS + */ +int hl_hw_queue_schedule_cs(struct hl_cs *cs) +{ + struct hl_ctx *ctx = cs->ctx; + struct hl_device *hdev = ctx->hdev; + struct hl_cs_job *job, *tmp; + struct hl_hw_queue *q; + u32 max_queues; + int rc = 0, i, cq_cnt; + + hdev->asic_funcs->hw_queues_lock(hdev); + + if (hl_device_disabled_or_in_reset(hdev)) { + ctx->cs_counters.device_in_reset_drop_cnt++; + dev_err(hdev->dev, + "device is disabled or in reset, CS rejected!\n"); + rc = -EPERM; + goto out; + } + + max_queues = hdev->asic_prop.max_queues; + + q = &hdev->kernel_queues[0]; + for (i = 0, cq_cnt = 0 ; i < max_queues ; i++, q++) { + if (cs->jobs_in_queue_cnt[i]) { + switch (q->queue_type) { + case QUEUE_TYPE_EXT: + rc = ext_queue_sanity_checks(hdev, q, + cs->jobs_in_queue_cnt[i], true); + break; + case QUEUE_TYPE_INT: + rc = int_queue_sanity_checks(hdev, q, + cs->jobs_in_queue_cnt[i]); + break; + case QUEUE_TYPE_HW: + rc = hw_queue_sanity_checks(hdev, q, + cs->jobs_in_queue_cnt[i]); + break; + default: + dev_err(hdev->dev, "Queue type %d is invalid\n", + q->queue_type); + rc = -EINVAL; + break; + } + + if (rc) { + ctx->cs_counters.queue_full_drop_cnt++; + goto unroll_cq_resv; + } + + if (q->queue_type == QUEUE_TYPE_EXT) + cq_cnt++; + } + } + + if ((cs->type == CS_TYPE_SIGNAL) || (cs->type == CS_TYPE_WAIT)) + init_signal_wait_cs(cs); + + spin_lock(&hdev->hw_queues_mirror_lock); + list_add_tail(&cs->mirror_node, &hdev->hw_queues_mirror_list); + + /* Queue TDR if the CS is the first entry and if timeout is wanted */ + if ((hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT) && + (list_first_entry(&hdev->hw_queues_mirror_list, + struct hl_cs, mirror_node) == cs)) { + cs->tdr_active = true; + schedule_delayed_work(&cs->work_tdr, hdev->timeout_jiffies); + spin_unlock(&hdev->hw_queues_mirror_lock); + } else { + spin_unlock(&hdev->hw_queues_mirror_lock); + } + + if (!hdev->cs_active_cnt++) { + struct hl_device_idle_busy_ts *ts; + + ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx]; + ts->busy_to_idle_ts = ktime_set(0, 0); + ts->idle_to_busy_ts = ktime_get(); + } + + list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) + switch (job->queue_type) { + case QUEUE_TYPE_EXT: + ext_queue_schedule_job(job); + break; + case QUEUE_TYPE_INT: + int_queue_schedule_job(job); + break; + case QUEUE_TYPE_HW: + hw_queue_schedule_job(job); + break; + default: + break; + } + + cs->submitted = true; + + goto out; + +unroll_cq_resv: + q = &hdev->kernel_queues[0]; + for (i = 0 ; (i < max_queues) && (cq_cnt > 0) ; i++, q++) { + if ((q->queue_type == QUEUE_TYPE_EXT) && + (cs->jobs_in_queue_cnt[i])) { + atomic_t *free_slots = + &hdev->completion_queue[i].free_slots_cnt; + atomic_add(cs->jobs_in_queue_cnt[i], free_slots); + cq_cnt--; + } + } + +out: + hdev->asic_funcs->hw_queues_unlock(hdev); + + return rc; +} + +/* + * hl_hw_queue_inc_ci_kernel - increment ci for kernel's queue + * + * @hdev: pointer to hl_device structure + * @hw_queue_id: which queue to increment its ci + */ +void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id) +{ + struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id]; + + atomic_inc(&q->ci); +} + +static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q, + bool is_cpu_queue) +{ + void *p; + int rc; + + if (is_cpu_queue) + p = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, + HL_QUEUE_SIZE_IN_BYTES, + &q->bus_address); + else + p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, + HL_QUEUE_SIZE_IN_BYTES, + &q->bus_address, + GFP_KERNEL | __GFP_ZERO); + if (!p) + return -ENOMEM; + + q->kernel_address = p; + + q->shadow_queue = kmalloc_array(HL_QUEUE_LENGTH, + sizeof(*q->shadow_queue), + GFP_KERNEL); + if (!q->shadow_queue) { + dev_err(hdev->dev, + "Failed to allocate shadow queue for H/W queue %d\n", + q->hw_queue_id); + rc = -ENOMEM; + goto free_queue; + } + + /* Make sure read/write pointers are initialized to start of queue */ + atomic_set(&q->ci, 0); + q->pi = 0; + + return 0; + +free_queue: + if (is_cpu_queue) + hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, + HL_QUEUE_SIZE_IN_BYTES, + q->kernel_address); + else + hdev->asic_funcs->asic_dma_free_coherent(hdev, + HL_QUEUE_SIZE_IN_BYTES, + q->kernel_address, + q->bus_address); + + return rc; +} + +static int int_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) +{ + void *p; + + p = hdev->asic_funcs->get_int_queue_base(hdev, q->hw_queue_id, + &q->bus_address, &q->int_queue_len); + if (!p) { + dev_err(hdev->dev, + "Failed to get base address for internal queue %d\n", + q->hw_queue_id); + return -EFAULT; + } + + q->kernel_address = p; + q->pi = 0; + atomic_set(&q->ci, 0); + + return 0; +} + +static int cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) +{ + return ext_and_cpu_queue_init(hdev, q, true); +} + +static int ext_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) +{ + return ext_and_cpu_queue_init(hdev, q, false); +} + +static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) +{ + void *p; + + p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, + HL_QUEUE_SIZE_IN_BYTES, + &q->bus_address, + GFP_KERNEL | __GFP_ZERO); + if (!p) + return -ENOMEM; + + q->kernel_address = p; + + /* Make sure read/write pointers are initialized to start of queue */ + atomic_set(&q->ci, 0); + q->pi = 0; + + return 0; +} + +static void sync_stream_queue_init(struct hl_device *hdev, u32 q_idx) +{ + struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx]; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_hw_sob *hw_sob; + int sob, queue_idx = hdev->sync_stream_queue_idx++; + + hw_queue->base_sob_id = + prop->sync_stream_first_sob + queue_idx * HL_RSVD_SOBS; + hw_queue->base_mon_id = + prop->sync_stream_first_mon + queue_idx * HL_RSVD_MONS; + hw_queue->next_sob_val = 1; + hw_queue->curr_sob_offset = 0; + + for (sob = 0 ; sob < HL_RSVD_SOBS ; sob++) { + hw_sob = &hw_queue->hw_sob[sob]; + hw_sob->hdev = hdev; + hw_sob->sob_id = hw_queue->base_sob_id + sob; + hw_sob->q_idx = q_idx; + kref_init(&hw_sob->kref); + } +} + +static void sync_stream_queue_reset(struct hl_device *hdev, u32 q_idx) +{ + struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx]; + + /* + * In case we got here due to a stuck CS, the refcnt might be bigger + * than 1 and therefore we reset it. + */ + kref_init(&hw_queue->hw_sob[hw_queue->curr_sob_offset].kref); + hw_queue->curr_sob_offset = 0; + hw_queue->next_sob_val = 1; +} + +/* + * queue_init - main initialization function for H/W queue object + * + * @hdev: pointer to hl_device device structure + * @q: pointer to hl_hw_queue queue structure + * @hw_queue_id: The id of the H/W queue + * + * Allocate dma-able memory for the queue and initialize fields + * Returns 0 on success + */ +static int queue_init(struct hl_device *hdev, struct hl_hw_queue *q, + u32 hw_queue_id) +{ + int rc; + + q->hw_queue_id = hw_queue_id; + + switch (q->queue_type) { + case QUEUE_TYPE_EXT: + rc = ext_queue_init(hdev, q); + break; + case QUEUE_TYPE_INT: + rc = int_queue_init(hdev, q); + break; + case QUEUE_TYPE_CPU: + rc = cpu_queue_init(hdev, q); + break; + case QUEUE_TYPE_HW: + rc = hw_queue_init(hdev, q); + break; + case QUEUE_TYPE_NA: + q->valid = 0; + return 0; + default: + dev_crit(hdev->dev, "wrong queue type %d during init\n", + q->queue_type); + rc = -EINVAL; + break; + } + + if (q->supports_sync_stream) + sync_stream_queue_init(hdev, q->hw_queue_id); + + if (rc) + return rc; + + q->valid = 1; + + return 0; +} + +/* + * hw_queue_fini - destroy queue + * + * @hdev: pointer to hl_device device structure + * @q: pointer to hl_hw_queue queue structure + * + * Free the queue memory + */ +static void queue_fini(struct hl_device *hdev, struct hl_hw_queue *q) +{ + if (!q->valid) + return; + + /* + * If we arrived here, there are no jobs waiting on this queue + * so we can safely remove it. + * This is because this function can only called when: + * 1. Either a context is deleted, which only can occur if all its + * jobs were finished + * 2. A context wasn't able to be created due to failure or timeout, + * which means there are no jobs on the queue yet + * + * The only exception are the queues of the kernel context, but + * if they are being destroyed, it means that the entire module is + * being removed. If the module is removed, it means there is no open + * user context. It also means that if a job was submitted by + * the kernel driver (e.g. context creation), the job itself was + * released by the kernel driver when a timeout occurred on its + * Completion. Thus, we don't need to release it again. + */ + + if (q->queue_type == QUEUE_TYPE_INT) + return; + + kfree(q->shadow_queue); + + if (q->queue_type == QUEUE_TYPE_CPU) + hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, + HL_QUEUE_SIZE_IN_BYTES, + q->kernel_address); + else + hdev->asic_funcs->asic_dma_free_coherent(hdev, + HL_QUEUE_SIZE_IN_BYTES, + q->kernel_address, + q->bus_address); +} + +int hl_hw_queues_create(struct hl_device *hdev) +{ + struct asic_fixed_properties *asic = &hdev->asic_prop; + struct hl_hw_queue *q; + int i, rc, q_ready_cnt; + + hdev->kernel_queues = kcalloc(asic->max_queues, + sizeof(*hdev->kernel_queues), GFP_KERNEL); + + if (!hdev->kernel_queues) { + dev_err(hdev->dev, "Not enough memory for H/W queues\n"); + return -ENOMEM; + } + + /* Initialize the H/W queues */ + for (i = 0, q_ready_cnt = 0, q = hdev->kernel_queues; + i < asic->max_queues ; i++, q_ready_cnt++, q++) { + + q->queue_type = asic->hw_queues_props[i].type; + q->supports_sync_stream = + asic->hw_queues_props[i].supports_sync_stream; + rc = queue_init(hdev, q, i); + if (rc) { + dev_err(hdev->dev, + "failed to initialize queue %d\n", i); + goto release_queues; + } + } + + return 0; + +release_queues: + for (i = 0, q = hdev->kernel_queues ; i < q_ready_cnt ; i++, q++) + queue_fini(hdev, q); + + kfree(hdev->kernel_queues); + + return rc; +} + +void hl_hw_queues_destroy(struct hl_device *hdev) +{ + struct hl_hw_queue *q; + u32 max_queues = hdev->asic_prop.max_queues; + int i; + + for (i = 0, q = hdev->kernel_queues ; i < max_queues ; i++, q++) + queue_fini(hdev, q); + + kfree(hdev->kernel_queues); +} + +void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset) +{ + struct hl_hw_queue *q; + u32 max_queues = hdev->asic_prop.max_queues; + int i; + + for (i = 0, q = hdev->kernel_queues ; i < max_queues ; i++, q++) { + if ((!q->valid) || + ((!hard_reset) && (q->queue_type == QUEUE_TYPE_CPU))) + continue; + q->pi = 0; + atomic_set(&q->ci, 0); + + if (q->supports_sync_stream) + sync_stream_queue_reset(hdev, q->hw_queue_id); + } +} diff --git a/drivers/misc/habanalabs/common/hwmon.c b/drivers/misc/habanalabs/common/hwmon.c new file mode 100644 index 000000000..2ac29cb2f --- /dev/null +++ b/drivers/misc/habanalabs/common/hwmon.c @@ -0,0 +1,578 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include "habanalabs.h" + +#include <linux/pci.h> +#include <linux/hwmon.h> + +#define HWMON_NR_SENSOR_TYPES (hwmon_pwm + 1) + +int hl_build_hwmon_channel_info(struct hl_device *hdev, + struct cpucp_sensor *sensors_arr) +{ + u32 counts[HWMON_NR_SENSOR_TYPES] = {0}; + u32 *sensors_by_type[HWMON_NR_SENSOR_TYPES] = {NULL}; + u32 sensors_by_type_next_index[HWMON_NR_SENSOR_TYPES] = {0}; + struct hwmon_channel_info **channels_info; + u32 num_sensors_for_type, num_active_sensor_types = 0, + arr_size = 0, *curr_arr; + enum hwmon_sensor_types type; + int rc, i, j; + + for (i = 0 ; i < CPUCP_MAX_SENSORS ; i++) { + type = le32_to_cpu(sensors_arr[i].type); + + if ((type == 0) && (sensors_arr[i].flags == 0)) + break; + + if (type >= HWMON_NR_SENSOR_TYPES) { + dev_err(hdev->dev, + "Got wrong sensor type %d from device\n", type); + return -EINVAL; + } + + counts[type]++; + arr_size++; + } + + for (i = 0 ; i < HWMON_NR_SENSOR_TYPES ; i++) { + if (counts[i] == 0) + continue; + + num_sensors_for_type = counts[i] + 1; + curr_arr = kcalloc(num_sensors_for_type, sizeof(*curr_arr), + GFP_KERNEL); + if (!curr_arr) { + rc = -ENOMEM; + goto sensors_type_err; + } + + num_active_sensor_types++; + sensors_by_type[i] = curr_arr; + } + + for (i = 0 ; i < arr_size ; i++) { + type = le32_to_cpu(sensors_arr[i].type); + curr_arr = sensors_by_type[type]; + curr_arr[sensors_by_type_next_index[type]++] = + le32_to_cpu(sensors_arr[i].flags); + } + + channels_info = kcalloc(num_active_sensor_types + 1, + sizeof(*channels_info), GFP_KERNEL); + if (!channels_info) { + rc = -ENOMEM; + goto channels_info_array_err; + } + + for (i = 0 ; i < num_active_sensor_types ; i++) { + channels_info[i] = kzalloc(sizeof(*channels_info[i]), + GFP_KERNEL); + if (!channels_info[i]) { + rc = -ENOMEM; + goto channel_info_err; + } + } + + for (i = 0, j = 0 ; i < HWMON_NR_SENSOR_TYPES ; i++) { + if (!sensors_by_type[i]) + continue; + + channels_info[j]->type = i; + channels_info[j]->config = sensors_by_type[i]; + j++; + } + + hdev->hl_chip_info->info = + (const struct hwmon_channel_info **)channels_info; + + return 0; + +channel_info_err: + for (i = 0 ; i < num_active_sensor_types ; i++) + if (channels_info[i]) { + kfree(channels_info[i]->config); + kfree(channels_info[i]); + } + kfree(channels_info); +channels_info_array_err: +sensors_type_err: + for (i = 0 ; i < HWMON_NR_SENSOR_TYPES ; i++) + kfree(sensors_by_type[i]); + + return rc; +} + +static int hl_read(struct device *dev, enum hwmon_sensor_types type, + u32 attr, int channel, long *val) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + int rc; + + if (hl_device_disabled_or_in_reset(hdev)) + return -ENODEV; + + switch (type) { + case hwmon_temp: + switch (attr) { + case hwmon_temp_input: + case hwmon_temp_max: + case hwmon_temp_crit: + case hwmon_temp_max_hyst: + case hwmon_temp_crit_hyst: + case hwmon_temp_offset: + case hwmon_temp_highest: + break; + default: + return -EINVAL; + } + + rc = hl_get_temperature(hdev, channel, attr, val); + break; + case hwmon_in: + switch (attr) { + case hwmon_in_input: + case hwmon_in_min: + case hwmon_in_max: + case hwmon_in_highest: + break; + default: + return -EINVAL; + } + + rc = hl_get_voltage(hdev, channel, attr, val); + break; + case hwmon_curr: + switch (attr) { + case hwmon_curr_input: + case hwmon_curr_min: + case hwmon_curr_max: + case hwmon_curr_highest: + break; + default: + return -EINVAL; + } + + rc = hl_get_current(hdev, channel, attr, val); + break; + case hwmon_fan: + switch (attr) { + case hwmon_fan_input: + case hwmon_fan_min: + case hwmon_fan_max: + break; + default: + return -EINVAL; + } + rc = hl_get_fan_speed(hdev, channel, attr, val); + break; + case hwmon_pwm: + switch (attr) { + case hwmon_pwm_input: + case hwmon_pwm_enable: + break; + default: + return -EINVAL; + } + rc = hl_get_pwm_info(hdev, channel, attr, val); + break; + default: + return -EINVAL; + } + return rc; +} + +static int hl_write(struct device *dev, enum hwmon_sensor_types type, + u32 attr, int channel, long val) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + if (hl_device_disabled_or_in_reset(hdev)) + return -ENODEV; + + switch (type) { + case hwmon_temp: + switch (attr) { + case hwmon_temp_offset: + case hwmon_temp_reset_history: + break; + default: + return -EINVAL; + } + hl_set_temperature(hdev, channel, attr, val); + break; + case hwmon_pwm: + switch (attr) { + case hwmon_pwm_input: + case hwmon_pwm_enable: + break; + default: + return -EINVAL; + } + hl_set_pwm_info(hdev, channel, attr, val); + break; + case hwmon_in: + switch (attr) { + case hwmon_in_reset_history: + break; + default: + return -EINVAL; + } + hl_set_voltage(hdev, channel, attr, val); + break; + case hwmon_curr: + switch (attr) { + case hwmon_curr_reset_history: + break; + default: + return -EINVAL; + } + hl_set_current(hdev, channel, attr, val); + break; + default: + return -EINVAL; + } + return 0; +} + +static umode_t hl_is_visible(const void *data, enum hwmon_sensor_types type, + u32 attr, int channel) +{ + switch (type) { + case hwmon_temp: + switch (attr) { + case hwmon_temp_input: + case hwmon_temp_max: + case hwmon_temp_max_hyst: + case hwmon_temp_crit: + case hwmon_temp_crit_hyst: + case hwmon_temp_highest: + return 0444; + case hwmon_temp_offset: + return 0644; + case hwmon_temp_reset_history: + return 0200; + } + break; + case hwmon_in: + switch (attr) { + case hwmon_in_input: + case hwmon_in_min: + case hwmon_in_max: + case hwmon_in_highest: + return 0444; + case hwmon_in_reset_history: + return 0200; + } + break; + case hwmon_curr: + switch (attr) { + case hwmon_curr_input: + case hwmon_curr_min: + case hwmon_curr_max: + case hwmon_curr_highest: + return 0444; + case hwmon_curr_reset_history: + return 0200; + } + break; + case hwmon_fan: + switch (attr) { + case hwmon_fan_input: + case hwmon_fan_min: + case hwmon_fan_max: + return 0444; + } + break; + case hwmon_pwm: + switch (attr) { + case hwmon_pwm_input: + case hwmon_pwm_enable: + return 0644; + } + break; + default: + break; + } + return 0; +} + +static const struct hwmon_ops hl_hwmon_ops = { + .is_visible = hl_is_visible, + .read = hl_read, + .write = hl_write +}; + +int hl_get_temperature(struct hl_device *hdev, + int sensor_index, u32 attr, long *value) +{ + struct cpucp_packet pkt; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEMPERATURE_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); + pkt.sensor_index = __cpu_to_le16(sensor_index); + pkt.type = __cpu_to_le16(attr); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + 0, value); + + if (rc) { + dev_err(hdev->dev, + "Failed to get temperature from sensor %d, error %d\n", + sensor_index, rc); + *value = 0; + } + + return rc; +} + +int hl_set_temperature(struct hl_device *hdev, + int sensor_index, u32 attr, long value) +{ + struct cpucp_packet pkt; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEMPERATURE_SET << + CPUCP_PKT_CTL_OPCODE_SHIFT); + pkt.sensor_index = __cpu_to_le16(sensor_index); + pkt.type = __cpu_to_le16(attr); + pkt.value = __cpu_to_le64(value); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + 0, NULL); + + if (rc) + dev_err(hdev->dev, + "Failed to set temperature of sensor %d, error %d\n", + sensor_index, rc); + + return rc; +} + +int hl_get_voltage(struct hl_device *hdev, + int sensor_index, u32 attr, long *value) +{ + struct cpucp_packet pkt; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(CPUCP_PACKET_VOLTAGE_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); + pkt.sensor_index = __cpu_to_le16(sensor_index); + pkt.type = __cpu_to_le16(attr); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + 0, value); + + if (rc) { + dev_err(hdev->dev, + "Failed to get voltage from sensor %d, error %d\n", + sensor_index, rc); + *value = 0; + } + + return rc; +} + +int hl_get_current(struct hl_device *hdev, + int sensor_index, u32 attr, long *value) +{ + struct cpucp_packet pkt; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(CPUCP_PACKET_CURRENT_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); + pkt.sensor_index = __cpu_to_le16(sensor_index); + pkt.type = __cpu_to_le16(attr); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + 0, value); + + if (rc) { + dev_err(hdev->dev, + "Failed to get current from sensor %d, error %d\n", + sensor_index, rc); + *value = 0; + } + + return rc; +} + +int hl_get_fan_speed(struct hl_device *hdev, + int sensor_index, u32 attr, long *value) +{ + struct cpucp_packet pkt; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(CPUCP_PACKET_FAN_SPEED_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); + pkt.sensor_index = __cpu_to_le16(sensor_index); + pkt.type = __cpu_to_le16(attr); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + 0, value); + + if (rc) { + dev_err(hdev->dev, + "Failed to get fan speed from sensor %d, error %d\n", + sensor_index, rc); + *value = 0; + } + + return rc; +} + +int hl_get_pwm_info(struct hl_device *hdev, + int sensor_index, u32 attr, long *value) +{ + struct cpucp_packet pkt; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(CPUCP_PACKET_PWM_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); + pkt.sensor_index = __cpu_to_le16(sensor_index); + pkt.type = __cpu_to_le16(attr); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + 0, value); + + if (rc) { + dev_err(hdev->dev, + "Failed to get pwm info from sensor %d, error %d\n", + sensor_index, rc); + *value = 0; + } + + return rc; +} + +void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, + long value) +{ + struct cpucp_packet pkt; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(CPUCP_PACKET_PWM_SET << + CPUCP_PKT_CTL_OPCODE_SHIFT); + pkt.sensor_index = __cpu_to_le16(sensor_index); + pkt.type = __cpu_to_le16(attr); + pkt.value = cpu_to_le64(value); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + 0, NULL); + + if (rc) + dev_err(hdev->dev, + "Failed to set pwm info to sensor %d, error %d\n", + sensor_index, rc); +} + +int hl_set_voltage(struct hl_device *hdev, + int sensor_index, u32 attr, long value) +{ + struct cpucp_packet pkt; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(CPUCP_PACKET_VOLTAGE_SET << + CPUCP_PKT_CTL_OPCODE_SHIFT); + pkt.sensor_index = __cpu_to_le16(sensor_index); + pkt.type = __cpu_to_le16(attr); + pkt.value = __cpu_to_le64(value); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + 0, NULL); + + if (rc) + dev_err(hdev->dev, + "Failed to set voltage of sensor %d, error %d\n", + sensor_index, rc); + + return rc; +} + +int hl_set_current(struct hl_device *hdev, + int sensor_index, u32 attr, long value) +{ + struct cpucp_packet pkt; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(CPUCP_PACKET_CURRENT_SET << + CPUCP_PKT_CTL_OPCODE_SHIFT); + pkt.sensor_index = __cpu_to_le16(sensor_index); + pkt.type = __cpu_to_le16(attr); + pkt.value = __cpu_to_le64(value); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + 0, NULL); + + if (rc) + dev_err(hdev->dev, + "Failed to set current of sensor %d, error %d\n", + sensor_index, rc); + + return rc; +} + +int hl_hwmon_init(struct hl_device *hdev) +{ + struct device *dev = hdev->pdev ? &hdev->pdev->dev : hdev->dev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + int rc; + + if ((hdev->hwmon_initialized) || !(hdev->fw_loading)) + return 0; + + if (hdev->hl_chip_info->info) { + hdev->hl_chip_info->ops = &hl_hwmon_ops; + + hdev->hwmon_dev = hwmon_device_register_with_info(dev, + prop->cpucp_info.card_name, hdev, + hdev->hl_chip_info, NULL); + if (IS_ERR(hdev->hwmon_dev)) { + rc = PTR_ERR(hdev->hwmon_dev); + dev_err(hdev->dev, + "Unable to register hwmon device: %d\n", rc); + return rc; + } + + dev_info(hdev->dev, "%s: add sensors information\n", + dev_name(hdev->hwmon_dev)); + + hdev->hwmon_initialized = true; + } else { + dev_info(hdev->dev, "no available sensors\n"); + } + + return 0; +} + +void hl_hwmon_fini(struct hl_device *hdev) +{ + if (!hdev->hwmon_initialized) + return; + + hwmon_device_unregister(hdev->hwmon_dev); +} diff --git a/drivers/misc/habanalabs/common/irq.c b/drivers/misc/habanalabs/common/irq.c new file mode 100644 index 000000000..de53fb5f9 --- /dev/null +++ b/drivers/misc/habanalabs/common/irq.c @@ -0,0 +1,321 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include "habanalabs.h" + +#include <linux/slab.h> + +/** + * struct hl_eqe_work - This structure is used to schedule work of EQ + * entry and cpucp_reset event + * + * @eq_work: workqueue object to run when EQ entry is received + * @hdev: pointer to device structure + * @eq_entry: copy of the EQ entry + */ +struct hl_eqe_work { + struct work_struct eq_work; + struct hl_device *hdev; + struct hl_eq_entry eq_entry; +}; + +/** + * hl_cq_inc_ptr - increment ci or pi of cq + * + * @ptr: the current ci or pi value of the completion queue + * + * Increment ptr by 1. If it reaches the number of completion queue + * entries, set it to 0 + */ +inline u32 hl_cq_inc_ptr(u32 ptr) +{ + ptr++; + if (unlikely(ptr == HL_CQ_LENGTH)) + ptr = 0; + return ptr; +} + +/** + * hl_eq_inc_ptr - increment ci of eq + * + * @ptr: the current ci value of the event queue + * + * Increment ptr by 1. If it reaches the number of event queue + * entries, set it to 0 + */ +inline u32 hl_eq_inc_ptr(u32 ptr) +{ + ptr++; + if (unlikely(ptr == HL_EQ_LENGTH)) + ptr = 0; + return ptr; +} + +static void irq_handle_eqe(struct work_struct *work) +{ + struct hl_eqe_work *eqe_work = container_of(work, struct hl_eqe_work, + eq_work); + struct hl_device *hdev = eqe_work->hdev; + + hdev->asic_funcs->handle_eqe(hdev, &eqe_work->eq_entry); + + kfree(eqe_work); +} + +/** + * hl_irq_handler_cq - irq handler for completion queue + * + * @irq: irq number + * @arg: pointer to completion queue structure + * + */ +irqreturn_t hl_irq_handler_cq(int irq, void *arg) +{ + struct hl_cq *cq = arg; + struct hl_device *hdev = cq->hdev; + struct hl_hw_queue *queue; + struct hl_cs_job *job; + bool shadow_index_valid; + u16 shadow_index; + struct hl_cq_entry *cq_entry, *cq_base; + + if (hdev->disabled) { + dev_dbg(hdev->dev, + "Device disabled but received IRQ %d for CQ %d\n", + irq, cq->hw_queue_id); + return IRQ_HANDLED; + } + + cq_base = cq->kernel_address; + + while (1) { + bool entry_ready = ((le32_to_cpu(cq_base[cq->ci].data) & + CQ_ENTRY_READY_MASK) + >> CQ_ENTRY_READY_SHIFT); + + if (!entry_ready) + break; + + cq_entry = (struct hl_cq_entry *) &cq_base[cq->ci]; + + /* Make sure we read CQ entry contents after we've + * checked the ownership bit. + */ + dma_rmb(); + + shadow_index_valid = ((le32_to_cpu(cq_entry->data) & + CQ_ENTRY_SHADOW_INDEX_VALID_MASK) + >> CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT); + + shadow_index = (u16) ((le32_to_cpu(cq_entry->data) & + CQ_ENTRY_SHADOW_INDEX_MASK) + >> CQ_ENTRY_SHADOW_INDEX_SHIFT); + + queue = &hdev->kernel_queues[cq->hw_queue_id]; + + if ((shadow_index_valid) && (!hdev->disabled)) { + job = queue->shadow_queue[hl_pi_2_offset(shadow_index)]; + queue_work(hdev->cq_wq[cq->cq_idx], &job->finish_work); + } + + atomic_inc(&queue->ci); + + /* Clear CQ entry ready bit */ + cq_entry->data = cpu_to_le32(le32_to_cpu(cq_entry->data) & + ~CQ_ENTRY_READY_MASK); + + cq->ci = hl_cq_inc_ptr(cq->ci); + + /* Increment free slots */ + atomic_inc(&cq->free_slots_cnt); + } + + return IRQ_HANDLED; +} + +/** + * hl_irq_handler_eq - irq handler for event queue + * + * @irq: irq number + * @arg: pointer to event queue structure + * + */ +irqreturn_t hl_irq_handler_eq(int irq, void *arg) +{ + struct hl_eq *eq = arg; + struct hl_device *hdev = eq->hdev; + struct hl_eq_entry *eq_entry; + struct hl_eq_entry *eq_base; + struct hl_eqe_work *handle_eqe_work; + + eq_base = eq->kernel_address; + + while (1) { + bool entry_ready = + ((le32_to_cpu(eq_base[eq->ci].hdr.ctl) & + EQ_CTL_READY_MASK) >> EQ_CTL_READY_SHIFT); + + if (!entry_ready) + break; + + eq_entry = &eq_base[eq->ci]; + + /* + * Make sure we read EQ entry contents after we've + * checked the ownership bit. + */ + dma_rmb(); + + if (hdev->disabled) { + dev_warn(hdev->dev, + "Device disabled but received IRQ %d for EQ\n", + irq); + goto skip_irq; + } + + handle_eqe_work = kmalloc(sizeof(*handle_eqe_work), GFP_ATOMIC); + if (handle_eqe_work) { + INIT_WORK(&handle_eqe_work->eq_work, irq_handle_eqe); + handle_eqe_work->hdev = hdev; + + memcpy(&handle_eqe_work->eq_entry, eq_entry, + sizeof(*eq_entry)); + + queue_work(hdev->eq_wq, &handle_eqe_work->eq_work); + } +skip_irq: + /* Clear EQ entry ready bit */ + eq_entry->hdr.ctl = + cpu_to_le32(le32_to_cpu(eq_entry->hdr.ctl) & + ~EQ_CTL_READY_MASK); + + eq->ci = hl_eq_inc_ptr(eq->ci); + + hdev->asic_funcs->update_eq_ci(hdev, eq->ci); + } + + return IRQ_HANDLED; +} + +/** + * hl_cq_init - main initialization function for an cq object + * + * @hdev: pointer to device structure + * @q: pointer to cq structure + * @hw_queue_id: The H/W queue ID this completion queue belongs to + * + * Allocate dma-able memory for the completion queue and initialize fields + * Returns 0 on success + */ +int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id) +{ + void *p; + + p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, HL_CQ_SIZE_IN_BYTES, + &q->bus_address, GFP_KERNEL | __GFP_ZERO); + if (!p) + return -ENOMEM; + + q->hdev = hdev; + q->kernel_address = p; + q->hw_queue_id = hw_queue_id; + q->ci = 0; + q->pi = 0; + + atomic_set(&q->free_slots_cnt, HL_CQ_LENGTH); + + return 0; +} + +/** + * hl_cq_fini - destroy completion queue + * + * @hdev: pointer to device structure + * @q: pointer to cq structure + * + * Free the completion queue memory + */ +void hl_cq_fini(struct hl_device *hdev, struct hl_cq *q) +{ + hdev->asic_funcs->asic_dma_free_coherent(hdev, HL_CQ_SIZE_IN_BYTES, + q->kernel_address, + q->bus_address); +} + +void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q) +{ + q->ci = 0; + q->pi = 0; + + atomic_set(&q->free_slots_cnt, HL_CQ_LENGTH); + + /* + * It's not enough to just reset the PI/CI because the H/W may have + * written valid completion entries before it was halted and therefore + * we need to clean the actual queues so we won't process old entries + * when the device is operational again + */ + + memset(q->kernel_address, 0, HL_CQ_SIZE_IN_BYTES); +} + +/** + * hl_eq_init - main initialization function for an event queue object + * + * @hdev: pointer to device structure + * @q: pointer to eq structure + * + * Allocate dma-able memory for the event queue and initialize fields + * Returns 0 on success + */ +int hl_eq_init(struct hl_device *hdev, struct hl_eq *q) +{ + void *p; + + p = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, + HL_EQ_SIZE_IN_BYTES, + &q->bus_address); + if (!p) + return -ENOMEM; + + q->hdev = hdev; + q->kernel_address = p; + q->ci = 0; + + return 0; +} + +/** + * hl_eq_fini - destroy event queue + * + * @hdev: pointer to device structure + * @q: pointer to eq structure + * + * Free the event queue memory + */ +void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q) +{ + flush_workqueue(hdev->eq_wq); + + hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, + HL_EQ_SIZE_IN_BYTES, + q->kernel_address); +} + +void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q) +{ + q->ci = 0; + + /* + * It's not enough to just reset the PI/CI because the H/W may have + * written valid completion entries before it was halted and therefore + * we need to clean the actual queues so we won't process old entries + * when the device is operational again + */ + + memset(q->kernel_address, 0, HL_EQ_SIZE_IN_BYTES); +} diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c new file mode 100644 index 000000000..bfe223abf --- /dev/null +++ b/drivers/misc/habanalabs/common/memory.c @@ -0,0 +1,1859 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include <uapi/misc/habanalabs.h> +#include "habanalabs.h" +#include "../include/hw_ip/mmu/mmu_general.h" + +#include <linux/uaccess.h> +#include <linux/slab.h> +#include <linux/genalloc.h> + +#define HL_MMU_DEBUG 0 + +/* + * The va ranges in context object contain a list with the available chunks of + * device virtual memory. + * There is one range for host allocations and one for DRAM allocations. + * + * On initialization each range contains one chunk of all of its available + * virtual range which is a half of the total device virtual range. + * + * On each mapping of physical pages, a suitable virtual range chunk (with a + * minimum size) is selected from the list. If the chunk size equals the + * requested size, the chunk is returned. Otherwise, the chunk is split into + * two chunks - one to return as result and a remainder to stay in the list. + * + * On each Unmapping of a virtual address, the relevant virtual chunk is + * returned to the list. The chunk is added to the list and if its edges match + * the edges of the adjacent chunks (means a contiguous chunk can be created), + * the chunks are merged. + * + * On finish, the list is checked to have only one chunk of all the relevant + * virtual range (which is a half of the device total virtual range). + * If not (means not all mappings were unmapped), a warning is printed. + */ + +/* + * alloc_device_memory - allocate device memory + * + * @ctx : current context + * @args : host parameters containing the requested size + * @ret_handle : result handle + * + * This function does the following: + * - Allocate the requested size rounded up to 2MB pages + * - Return unique handle + */ +static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, + u32 *ret_handle) +{ + struct hl_device *hdev = ctx->hdev; + struct hl_vm *vm = &hdev->vm; + struct hl_vm_phys_pg_pack *phys_pg_pack; + u64 paddr = 0, total_size, num_pgs, i; + u32 num_curr_pgs, page_size, page_shift; + int handle, rc; + bool contiguous; + + num_curr_pgs = 0; + page_size = hdev->asic_prop.dram_page_size; + page_shift = __ffs(page_size); + num_pgs = (args->alloc.mem_size + (page_size - 1)) >> page_shift; + total_size = num_pgs << page_shift; + + if (!total_size) { + dev_err(hdev->dev, "Cannot allocate 0 bytes\n"); + return -EINVAL; + } + + contiguous = args->flags & HL_MEM_CONTIGUOUS; + + if (contiguous) { + paddr = (u64) gen_pool_alloc(vm->dram_pg_pool, total_size); + if (!paddr) { + dev_err(hdev->dev, + "failed to allocate %llu contiguous pages with total size of %llu\n", + num_pgs, total_size); + return -ENOMEM; + } + } + + phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL); + if (!phys_pg_pack) { + rc = -ENOMEM; + goto pages_pack_err; + } + + phys_pg_pack->vm_type = VM_TYPE_PHYS_PACK; + phys_pg_pack->asid = ctx->asid; + phys_pg_pack->npages = num_pgs; + phys_pg_pack->page_size = page_size; + phys_pg_pack->total_size = total_size; + phys_pg_pack->flags = args->flags; + phys_pg_pack->contiguous = contiguous; + + phys_pg_pack->pages = kvmalloc_array(num_pgs, sizeof(u64), GFP_KERNEL); + if (ZERO_OR_NULL_PTR(phys_pg_pack->pages)) { + rc = -ENOMEM; + goto pages_arr_err; + } + + if (phys_pg_pack->contiguous) { + for (i = 0 ; i < num_pgs ; i++) + phys_pg_pack->pages[i] = paddr + i * page_size; + } else { + for (i = 0 ; i < num_pgs ; i++) { + phys_pg_pack->pages[i] = (u64) gen_pool_alloc( + vm->dram_pg_pool, + page_size); + if (!phys_pg_pack->pages[i]) { + dev_err(hdev->dev, + "Failed to allocate device memory (out of memory)\n"); + rc = -ENOMEM; + goto page_err; + } + + num_curr_pgs++; + } + } + + spin_lock(&vm->idr_lock); + handle = idr_alloc(&vm->phys_pg_pack_handles, phys_pg_pack, 1, 0, + GFP_ATOMIC); + spin_unlock(&vm->idr_lock); + + if (handle < 0) { + dev_err(hdev->dev, "Failed to get handle for page\n"); + rc = -EFAULT; + goto idr_err; + } + + for (i = 0 ; i < num_pgs ; i++) + kref_get(&vm->dram_pg_pool_refcount); + + phys_pg_pack->handle = handle; + + atomic64_add(phys_pg_pack->total_size, &ctx->dram_phys_mem); + atomic64_add(phys_pg_pack->total_size, &hdev->dram_used_mem); + + *ret_handle = handle; + + return 0; + +idr_err: +page_err: + if (!phys_pg_pack->contiguous) + for (i = 0 ; i < num_curr_pgs ; i++) + gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[i], + page_size); + + kvfree(phys_pg_pack->pages); +pages_arr_err: + kfree(phys_pg_pack); +pages_pack_err: + if (contiguous) + gen_pool_free(vm->dram_pg_pool, paddr, total_size); + + return rc; +} + +/* + * dma_map_host_va - DMA mapping of the given host virtual address. + * @hdev: habanalabs device structure + * @addr: the host virtual address of the memory area + * @size: the size of the memory area + * @p_userptr: pointer to result userptr structure + * + * This function does the following: + * - Allocate userptr structure + * - Pin the given host memory using the userptr structure + * - Perform DMA mapping to have the DMA addresses of the pages + */ +static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size, + struct hl_userptr **p_userptr) +{ + struct hl_userptr *userptr; + int rc; + + userptr = kzalloc(sizeof(*userptr), GFP_KERNEL); + if (!userptr) { + rc = -ENOMEM; + goto userptr_err; + } + + rc = hl_pin_host_memory(hdev, addr, size, userptr); + if (rc) { + dev_err(hdev->dev, "Failed to pin host memory\n"); + goto pin_err; + } + + rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl, + userptr->sgt->nents, DMA_BIDIRECTIONAL); + if (rc) { + dev_err(hdev->dev, "failed to map sgt with DMA region\n"); + goto dma_map_err; + } + + userptr->dma_mapped = true; + userptr->dir = DMA_BIDIRECTIONAL; + userptr->vm_type = VM_TYPE_USERPTR; + + *p_userptr = userptr; + + return 0; + +dma_map_err: + hl_unpin_host_memory(hdev, userptr); +pin_err: + kfree(userptr); +userptr_err: + + return rc; +} + +/* + * dma_unmap_host_va - DMA unmapping of the given host virtual address. + * @hdev: habanalabs device structure + * @userptr: userptr to free + * + * This function does the following: + * - Unpins the physical pages + * - Frees the userptr structure + */ +static void dma_unmap_host_va(struct hl_device *hdev, + struct hl_userptr *userptr) +{ + hl_unpin_host_memory(hdev, userptr); + kfree(userptr); +} + +/* + * dram_pg_pool_do_release - free DRAM pages pool + * + * @ref : pointer to reference object + * + * This function does the following: + * - Frees the idr structure of physical pages handles + * - Frees the generic pool of DRAM physical pages + */ +static void dram_pg_pool_do_release(struct kref *ref) +{ + struct hl_vm *vm = container_of(ref, struct hl_vm, + dram_pg_pool_refcount); + + /* + * free the idr here as only here we know for sure that there are no + * allocated physical pages and hence there are no handles in use + */ + idr_destroy(&vm->phys_pg_pack_handles); + gen_pool_destroy(vm->dram_pg_pool); +} + +/* + * free_phys_pg_pack - free physical page pack + * @hdev: habanalabs device structure + * @phys_pg_pack: physical page pack to free + * + * This function does the following: + * - For DRAM memory only, iterate over the pack and free each physical block + * structure by returning it to the general pool + * - Free the hl_vm_phys_pg_pack structure + */ +static void free_phys_pg_pack(struct hl_device *hdev, + struct hl_vm_phys_pg_pack *phys_pg_pack) +{ + struct hl_vm *vm = &hdev->vm; + u64 i; + + if (!phys_pg_pack->created_from_userptr) { + if (phys_pg_pack->contiguous) { + gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[0], + phys_pg_pack->total_size); + + for (i = 0; i < phys_pg_pack->npages ; i++) + kref_put(&vm->dram_pg_pool_refcount, + dram_pg_pool_do_release); + } else { + for (i = 0 ; i < phys_pg_pack->npages ; i++) { + gen_pool_free(vm->dram_pg_pool, + phys_pg_pack->pages[i], + phys_pg_pack->page_size); + kref_put(&vm->dram_pg_pool_refcount, + dram_pg_pool_do_release); + } + } + } + + kvfree(phys_pg_pack->pages); + kfree(phys_pg_pack); +} + +/* + * free_device_memory - free device memory + * + * @ctx : current context + * @handle : handle of the memory chunk to free + * + * This function does the following: + * - Free the device memory related to the given handle + */ +static int free_device_memory(struct hl_ctx *ctx, u32 handle) +{ + struct hl_device *hdev = ctx->hdev; + struct hl_vm *vm = &hdev->vm; + struct hl_vm_phys_pg_pack *phys_pg_pack; + + spin_lock(&vm->idr_lock); + phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle); + if (phys_pg_pack) { + if (atomic_read(&phys_pg_pack->mapping_cnt) > 0) { + dev_err(hdev->dev, "handle %u is mapped, cannot free\n", + handle); + spin_unlock(&vm->idr_lock); + return -EINVAL; + } + + /* + * must remove from idr before the freeing of the physical + * pages as the refcount of the pool is also the trigger of the + * idr destroy + */ + idr_remove(&vm->phys_pg_pack_handles, handle); + spin_unlock(&vm->idr_lock); + + atomic64_sub(phys_pg_pack->total_size, &ctx->dram_phys_mem); + atomic64_sub(phys_pg_pack->total_size, &hdev->dram_used_mem); + + free_phys_pg_pack(hdev, phys_pg_pack); + } else { + spin_unlock(&vm->idr_lock); + dev_err(hdev->dev, + "free device memory failed, no match for handle %u\n", + handle); + return -EINVAL; + } + + return 0; +} + +/* + * clear_va_list_locked - free virtual addresses list + * + * @hdev : habanalabs device structure + * @va_list : list of virtual addresses to free + * + * This function does the following: + * - Iterate over the list and free each virtual addresses block + * + * This function should be called only when va_list lock is taken + */ +static void clear_va_list_locked(struct hl_device *hdev, + struct list_head *va_list) +{ + struct hl_vm_va_block *va_block, *tmp; + + list_for_each_entry_safe(va_block, tmp, va_list, node) { + list_del(&va_block->node); + kfree(va_block); + } +} + +/* + * print_va_list_locked - print virtual addresses list + * + * @hdev : habanalabs device structure + * @va_list : list of virtual addresses to print + * + * This function does the following: + * - Iterate over the list and print each virtual addresses block + * + * This function should be called only when va_list lock is taken + */ +static void print_va_list_locked(struct hl_device *hdev, + struct list_head *va_list) +{ +#if HL_MMU_DEBUG + struct hl_vm_va_block *va_block; + + dev_dbg(hdev->dev, "print va list:\n"); + + list_for_each_entry(va_block, va_list, node) + dev_dbg(hdev->dev, + "va block, start: 0x%llx, end: 0x%llx, size: %llu\n", + va_block->start, va_block->end, va_block->size); +#endif +} + +/* + * merge_va_blocks_locked - merge a virtual block if possible + * + * @hdev : pointer to the habanalabs device structure + * @va_list : pointer to the virtual addresses block list + * @va_block : virtual block to merge with adjacent blocks + * + * This function does the following: + * - Merge the given blocks with the adjacent blocks if their virtual ranges + * create a contiguous virtual range + * + * This Function should be called only when va_list lock is taken + */ +static void merge_va_blocks_locked(struct hl_device *hdev, + struct list_head *va_list, struct hl_vm_va_block *va_block) +{ + struct hl_vm_va_block *prev, *next; + + prev = list_prev_entry(va_block, node); + if (&prev->node != va_list && prev->end + 1 == va_block->start) { + prev->end = va_block->end; + prev->size = prev->end - prev->start; + list_del(&va_block->node); + kfree(va_block); + va_block = prev; + } + + next = list_next_entry(va_block, node); + if (&next->node != va_list && va_block->end + 1 == next->start) { + next->start = va_block->start; + next->size = next->end - next->start; + list_del(&va_block->node); + kfree(va_block); + } +} + +/* + * add_va_block_locked - add a virtual block to the virtual addresses list + * + * @hdev : pointer to the habanalabs device structure + * @va_list : pointer to the virtual addresses block list + * @start : start virtual address + * @end : end virtual address + * + * This function does the following: + * - Add the given block to the virtual blocks list and merge with other + * blocks if a contiguous virtual block can be created + * + * This Function should be called only when va_list lock is taken + */ +static int add_va_block_locked(struct hl_device *hdev, + struct list_head *va_list, u64 start, u64 end) +{ + struct hl_vm_va_block *va_block, *res = NULL; + u64 size = end - start; + + print_va_list_locked(hdev, va_list); + + list_for_each_entry(va_block, va_list, node) { + /* TODO: remove upon matureness */ + if (hl_mem_area_crosses_range(start, size, va_block->start, + va_block->end)) { + dev_err(hdev->dev, + "block crossing ranges at start 0x%llx, end 0x%llx\n", + va_block->start, va_block->end); + return -EINVAL; + } + + if (va_block->end < start) + res = va_block; + } + + va_block = kmalloc(sizeof(*va_block), GFP_KERNEL); + if (!va_block) + return -ENOMEM; + + va_block->start = start; + va_block->end = end; + va_block->size = size; + + if (!res) + list_add(&va_block->node, va_list); + else + list_add(&va_block->node, &res->node); + + merge_va_blocks_locked(hdev, va_list, va_block); + + print_va_list_locked(hdev, va_list); + + return 0; +} + +/* + * add_va_block - wrapper for add_va_block_locked + * + * @hdev : pointer to the habanalabs device structure + * @va_list : pointer to the virtual addresses block list + * @start : start virtual address + * @end : end virtual address + * + * This function does the following: + * - Takes the list lock and calls add_va_block_locked + */ +static inline int add_va_block(struct hl_device *hdev, + struct hl_va_range *va_range, u64 start, u64 end) +{ + int rc; + + mutex_lock(&va_range->lock); + rc = add_va_block_locked(hdev, &va_range->list, start, end); + mutex_unlock(&va_range->lock); + + return rc; +} + +/* + * get_va_block() - get a virtual block for the given size and alignment. + * @hdev: pointer to the habanalabs device structure. + * @va_range: pointer to the virtual addresses range. + * @size: requested block size. + * @hint_addr: hint for requested address by the user. + * @va_block_align: required alignment of the virtual block start address. + * + * This function does the following: + * - Iterate on the virtual block list to find a suitable virtual block for the + * given size and alignment. + * - Reserve the requested block and update the list. + * - Return the start address of the virtual block. + */ +static u64 get_va_block(struct hl_device *hdev, struct hl_va_range *va_range, + u64 size, u64 hint_addr, u32 va_block_align) +{ + struct hl_vm_va_block *va_block, *new_va_block = NULL; + u64 valid_start, valid_size, prev_start, prev_end, align_mask, + res_valid_start = 0, res_valid_size = 0; + bool add_prev = false; + + align_mask = ~((u64)va_block_align - 1); + + /* check if hint_addr is aligned */ + if (hint_addr & (va_block_align - 1)) + hint_addr = 0; + + mutex_lock(&va_range->lock); + + print_va_list_locked(hdev, &va_range->list); + + list_for_each_entry(va_block, &va_range->list, node) { + /* calc the first possible aligned addr */ + valid_start = va_block->start; + + if (valid_start & (va_block_align - 1)) { + valid_start &= align_mask; + valid_start += va_block_align; + if (valid_start > va_block->end) + continue; + } + + valid_size = va_block->end - valid_start; + + if (valid_size >= size && + (!new_va_block || valid_size < res_valid_size)) { + new_va_block = va_block; + res_valid_start = valid_start; + res_valid_size = valid_size; + } + + if (hint_addr && hint_addr >= valid_start && + ((hint_addr + size) <= va_block->end)) { + new_va_block = va_block; + res_valid_start = hint_addr; + res_valid_size = valid_size; + break; + } + } + + if (!new_va_block) { + dev_err(hdev->dev, "no available va block for size %llu\n", + size); + goto out; + } + + if (res_valid_start > new_va_block->start) { + prev_start = new_va_block->start; + prev_end = res_valid_start - 1; + + new_va_block->start = res_valid_start; + new_va_block->size = res_valid_size; + + add_prev = true; + } + + if (new_va_block->size > size) { + new_va_block->start += size; + new_va_block->size = new_va_block->end - new_va_block->start; + } else { + list_del(&new_va_block->node); + kfree(new_va_block); + } + + if (add_prev) + add_va_block_locked(hdev, &va_range->list, prev_start, + prev_end); + + print_va_list_locked(hdev, &va_range->list); +out: + mutex_unlock(&va_range->lock); + + return res_valid_start; +} + +/* + * get_sg_info - get number of pages and the DMA address from SG list + * + * @sg : the SG list + * @dma_addr : pointer to DMA address to return + * + * Calculate the number of consecutive pages described by the SG list. Take the + * offset of the address in the first page, add to it the length and round it up + * to the number of needed pages. + */ +static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr) +{ + *dma_addr = sg_dma_address(sg); + + return ((((*dma_addr) & (PAGE_SIZE - 1)) + sg_dma_len(sg)) + + (PAGE_SIZE - 1)) >> PAGE_SHIFT; +} + +/* + * init_phys_pg_pack_from_userptr - initialize physical page pack from host + * memory + * @ctx: current context + * @userptr: userptr to initialize from + * @pphys_pg_pack: result pointer + * + * This function does the following: + * - Pin the physical pages related to the given virtual block + * - Create a physical page pack from the physical pages related to the given + * virtual block + */ +static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx, + struct hl_userptr *userptr, + struct hl_vm_phys_pg_pack **pphys_pg_pack) +{ + struct hl_vm_phys_pg_pack *phys_pg_pack; + struct scatterlist *sg; + dma_addr_t dma_addr; + u64 page_mask, total_npages; + u32 npages, page_size = PAGE_SIZE, + huge_page_size = ctx->hdev->asic_prop.pmmu_huge.page_size; + bool first = true, is_huge_page_opt = true; + int rc, i, j; + u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size); + + phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL); + if (!phys_pg_pack) + return -ENOMEM; + + phys_pg_pack->vm_type = userptr->vm_type; + phys_pg_pack->created_from_userptr = true; + phys_pg_pack->asid = ctx->asid; + atomic_set(&phys_pg_pack->mapping_cnt, 1); + + /* Only if all dma_addrs are aligned to 2MB and their + * sizes is at least 2MB, we can use huge page mapping. + * We limit the 2MB optimization to this condition, + * since later on we acquire the related VA range as one + * consecutive block. + */ + total_npages = 0; + for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) { + npages = get_sg_info(sg, &dma_addr); + + total_npages += npages; + + if ((npages % pgs_in_huge_page) || + (dma_addr & (huge_page_size - 1))) + is_huge_page_opt = false; + } + + if (is_huge_page_opt) { + page_size = huge_page_size; + do_div(total_npages, pgs_in_huge_page); + } + + page_mask = ~(((u64) page_size) - 1); + + phys_pg_pack->pages = kvmalloc_array(total_npages, sizeof(u64), + GFP_KERNEL); + if (ZERO_OR_NULL_PTR(phys_pg_pack->pages)) { + rc = -ENOMEM; + goto page_pack_arr_mem_err; + } + + phys_pg_pack->npages = total_npages; + phys_pg_pack->page_size = page_size; + phys_pg_pack->total_size = total_npages * page_size; + + j = 0; + for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) { + npages = get_sg_info(sg, &dma_addr); + + /* align down to physical page size and save the offset */ + if (first) { + first = false; + phys_pg_pack->offset = dma_addr & (page_size - 1); + dma_addr &= page_mask; + } + + while (npages) { + phys_pg_pack->pages[j++] = dma_addr; + dma_addr += page_size; + + if (is_huge_page_opt) + npages -= pgs_in_huge_page; + else + npages--; + } + } + + *pphys_pg_pack = phys_pg_pack; + + return 0; + +page_pack_arr_mem_err: + kfree(phys_pg_pack); + + return rc; +} + +/* + * map_phys_pg_pack - maps the physical page pack. + * @ctx: current context + * @vaddr: start address of the virtual area to map from + * @phys_pg_pack: the pack of physical pages to map to + * + * This function does the following: + * - Maps each chunk of virtual memory to matching physical chunk + * - Stores number of successful mappings in the given argument + * - Returns 0 on success, error code otherwise + */ +static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, + struct hl_vm_phys_pg_pack *phys_pg_pack) +{ + struct hl_device *hdev = ctx->hdev; + u64 next_vaddr = vaddr, paddr, mapped_pg_cnt = 0, i; + u32 page_size = phys_pg_pack->page_size; + int rc = 0; + + for (i = 0 ; i < phys_pg_pack->npages ; i++) { + paddr = phys_pg_pack->pages[i]; + + rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size, + (i + 1) == phys_pg_pack->npages); + if (rc) { + dev_err(hdev->dev, + "map failed for handle %u, npages: %llu, mapped: %llu", + phys_pg_pack->handle, phys_pg_pack->npages, + mapped_pg_cnt); + goto err; + } + + mapped_pg_cnt++; + next_vaddr += page_size; + } + + return 0; + +err: + next_vaddr = vaddr; + for (i = 0 ; i < mapped_pg_cnt ; i++) { + if (hl_mmu_unmap(ctx, next_vaddr, page_size, + (i + 1) == mapped_pg_cnt)) + dev_warn_ratelimited(hdev->dev, + "failed to unmap handle %u, va: 0x%llx, pa: 0x%llx, page size: %u\n", + phys_pg_pack->handle, next_vaddr, + phys_pg_pack->pages[i], page_size); + + next_vaddr += page_size; + } + + return rc; +} + +/* + * unmap_phys_pg_pack - unmaps the physical page pack + * @ctx: current context + * @vaddr: start address of the virtual area to unmap + * @phys_pg_pack: the pack of physical pages to unmap + */ +static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, + struct hl_vm_phys_pg_pack *phys_pg_pack) +{ + struct hl_device *hdev = ctx->hdev; + u64 next_vaddr, i; + u32 page_size; + + page_size = phys_pg_pack->page_size; + next_vaddr = vaddr; + + for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) { + if (hl_mmu_unmap(ctx, next_vaddr, page_size, + (i + 1) == phys_pg_pack->npages)) + dev_warn_ratelimited(hdev->dev, + "unmap failed for vaddr: 0x%llx\n", next_vaddr); + + /* + * unmapping on Palladium can be really long, so avoid a CPU + * soft lockup bug by sleeping a little between unmapping pages + */ + if (hdev->pldm) + usleep_range(500, 1000); + } +} + +static int get_paddr_from_handle(struct hl_ctx *ctx, struct hl_mem_in *args, + u64 *paddr) +{ + struct hl_device *hdev = ctx->hdev; + struct hl_vm *vm = &hdev->vm; + struct hl_vm_phys_pg_pack *phys_pg_pack; + u32 handle; + + handle = lower_32_bits(args->map_device.handle); + spin_lock(&vm->idr_lock); + phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle); + if (!phys_pg_pack) { + spin_unlock(&vm->idr_lock); + dev_err(hdev->dev, "no match for handle %u\n", handle); + return -EINVAL; + } + + *paddr = phys_pg_pack->pages[0]; + + spin_unlock(&vm->idr_lock); + + return 0; +} + +/* + * map_device_va - map the given memory + * + * @ctx : current context + * @args : host parameters with handle/host virtual address + * @device_addr : pointer to result device virtual address + * + * This function does the following: + * - If given a physical device memory handle, map to a device virtual block + * and return the start address of this block + * - If given a host virtual address and size, find the related physical pages, + * map a device virtual block to this pages and return the start address of + * this block + */ +static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, + u64 *device_addr) +{ + struct hl_device *hdev = ctx->hdev; + struct hl_vm *vm = &hdev->vm; + struct hl_vm_phys_pg_pack *phys_pg_pack; + struct hl_userptr *userptr = NULL; + struct hl_vm_hash_node *hnode; + struct hl_va_range *va_range; + enum vm_type_t *vm_type; + u64 ret_vaddr, hint_addr; + u32 handle = 0, va_block_align; + int rc; + bool is_userptr = args->flags & HL_MEM_USERPTR; + + /* Assume failure */ + *device_addr = 0; + + if (is_userptr) { + u64 addr = args->map_host.host_virt_addr, + size = args->map_host.mem_size; + u32 page_size = hdev->asic_prop.pmmu.page_size, + huge_page_size = hdev->asic_prop.pmmu_huge.page_size; + + rc = dma_map_host_va(hdev, addr, size, &userptr); + if (rc) { + dev_err(hdev->dev, "failed to get userptr from va\n"); + return rc; + } + + rc = init_phys_pg_pack_from_userptr(ctx, userptr, + &phys_pg_pack); + if (rc) { + dev_err(hdev->dev, + "unable to init page pack for vaddr 0x%llx\n", + addr); + goto init_page_pack_err; + } + + vm_type = (enum vm_type_t *) userptr; + hint_addr = args->map_host.hint_addr; + handle = phys_pg_pack->handle; + + /* get required alignment */ + if (phys_pg_pack->page_size == page_size) { + va_range = ctx->host_va_range; + + /* + * huge page alignment may be needed in case of regular + * page mapping, depending on the host VA alignment + */ + if (addr & (huge_page_size - 1)) + va_block_align = page_size; + else + va_block_align = huge_page_size; + } else { + /* + * huge page alignment is needed in case of huge page + * mapping + */ + va_range = ctx->host_huge_va_range; + va_block_align = huge_page_size; + } + } else { + handle = lower_32_bits(args->map_device.handle); + + spin_lock(&vm->idr_lock); + phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle); + if (!phys_pg_pack) { + spin_unlock(&vm->idr_lock); + dev_err(hdev->dev, + "no match for handle %u\n", handle); + return -EINVAL; + } + + /* increment now to avoid freeing device memory while mapping */ + atomic_inc(&phys_pg_pack->mapping_cnt); + + spin_unlock(&vm->idr_lock); + + vm_type = (enum vm_type_t *) phys_pg_pack; + + hint_addr = args->map_device.hint_addr; + + /* DRAM VA alignment is the same as the DRAM page size */ + va_range = ctx->dram_va_range; + va_block_align = hdev->asic_prop.dmmu.page_size; + } + + /* + * relevant for mapping device physical memory only, as host memory is + * implicitly shared + */ + if (!is_userptr && !(phys_pg_pack->flags & HL_MEM_SHARED) && + phys_pg_pack->asid != ctx->asid) { + dev_err(hdev->dev, + "Failed to map memory, handle %u is not shared\n", + handle); + rc = -EPERM; + goto shared_err; + } + + hnode = kzalloc(sizeof(*hnode), GFP_KERNEL); + if (!hnode) { + rc = -ENOMEM; + goto hnode_err; + } + + ret_vaddr = get_va_block(hdev, va_range, phys_pg_pack->total_size, + hint_addr, va_block_align); + if (!ret_vaddr) { + dev_err(hdev->dev, "no available va block for handle %u\n", + handle); + rc = -ENOMEM; + goto va_block_err; + } + + mutex_lock(&ctx->mmu_lock); + + rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack); + if (rc) { + mutex_unlock(&ctx->mmu_lock); + dev_err(hdev->dev, "mapping page pack failed for handle %u\n", + handle); + goto map_err; + } + + rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, false, *vm_type); + + mutex_unlock(&ctx->mmu_lock); + + if (rc) { + dev_err(hdev->dev, + "mapping handle %u failed due to MMU cache invalidation\n", + handle); + goto map_err; + } + + ret_vaddr += phys_pg_pack->offset; + + hnode->ptr = vm_type; + hnode->vaddr = ret_vaddr; + + mutex_lock(&ctx->mem_hash_lock); + hash_add(ctx->mem_hash, &hnode->node, ret_vaddr); + mutex_unlock(&ctx->mem_hash_lock); + + *device_addr = ret_vaddr; + + if (is_userptr) + free_phys_pg_pack(hdev, phys_pg_pack); + + return 0; + +map_err: + if (add_va_block(hdev, va_range, ret_vaddr, + ret_vaddr + phys_pg_pack->total_size - 1)) + dev_warn(hdev->dev, + "release va block failed for handle 0x%x, vaddr: 0x%llx\n", + handle, ret_vaddr); + +va_block_err: + kfree(hnode); +hnode_err: +shared_err: + atomic_dec(&phys_pg_pack->mapping_cnt); + if (is_userptr) + free_phys_pg_pack(hdev, phys_pg_pack); +init_page_pack_err: + if (is_userptr) + dma_unmap_host_va(hdev, userptr); + + return rc; +} + +/* + * unmap_device_va - unmap the given device virtual address + * + * @ctx : current context + * @vaddr : device virtual address to unmap + * @ctx_free : true if in context free flow, false otherwise. + * + * This function does the following: + * - Unmap the physical pages related to the given virtual address + * - return the device virtual block to the virtual block list + */ +static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free) +{ + struct hl_device *hdev = ctx->hdev; + struct hl_vm_phys_pg_pack *phys_pg_pack = NULL; + struct hl_vm_hash_node *hnode = NULL; + struct hl_userptr *userptr = NULL; + struct hl_va_range *va_range; + enum vm_type_t *vm_type; + bool is_userptr; + int rc = 0; + + /* protect from double entrance */ + mutex_lock(&ctx->mem_hash_lock); + hash_for_each_possible(ctx->mem_hash, hnode, node, (unsigned long)vaddr) + if (vaddr == hnode->vaddr) + break; + + if (!hnode) { + mutex_unlock(&ctx->mem_hash_lock); + dev_err(hdev->dev, + "unmap failed, no mem hnode for vaddr 0x%llx\n", + vaddr); + return -EINVAL; + } + + hash_del(&hnode->node); + mutex_unlock(&ctx->mem_hash_lock); + + vm_type = hnode->ptr; + + if (*vm_type == VM_TYPE_USERPTR) { + is_userptr = true; + userptr = hnode->ptr; + rc = init_phys_pg_pack_from_userptr(ctx, userptr, + &phys_pg_pack); + if (rc) { + dev_err(hdev->dev, + "unable to init page pack for vaddr 0x%llx\n", + vaddr); + goto vm_type_err; + } + + if (phys_pg_pack->page_size == + hdev->asic_prop.pmmu.page_size) + va_range = ctx->host_va_range; + else + va_range = ctx->host_huge_va_range; + } else if (*vm_type == VM_TYPE_PHYS_PACK) { + is_userptr = false; + va_range = ctx->dram_va_range; + phys_pg_pack = hnode->ptr; + } else { + dev_warn(hdev->dev, + "unmap failed, unknown vm desc for vaddr 0x%llx\n", + vaddr); + rc = -EFAULT; + goto vm_type_err; + } + + if (atomic_read(&phys_pg_pack->mapping_cnt) == 0) { + dev_err(hdev->dev, "vaddr 0x%llx is not mapped\n", vaddr); + rc = -EINVAL; + goto mapping_cnt_err; + } + + vaddr &= ~(((u64) phys_pg_pack->page_size) - 1); + + mutex_lock(&ctx->mmu_lock); + + unmap_phys_pg_pack(ctx, vaddr, phys_pg_pack); + + /* + * During context free this function is called in a loop to clean all + * the context mappings. Hence the cache invalidation can be called once + * at the loop end rather than for each iteration + */ + if (!ctx_free) + rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, true, + *vm_type); + + mutex_unlock(&ctx->mmu_lock); + + /* + * If the context is closing we don't need to check for the MMU cache + * invalidation return code and update the VA free list as in this flow + * we invalidate the MMU cache outside of this unmap function and the VA + * free list will be freed anyway. + */ + if (!ctx_free) { + int tmp_rc; + + if (rc) + dev_err(hdev->dev, + "unmapping vaddr 0x%llx failed due to MMU cache invalidation\n", + vaddr); + + tmp_rc = add_va_block(hdev, va_range, vaddr, + vaddr + phys_pg_pack->total_size - 1); + if (tmp_rc) { + dev_warn(hdev->dev, + "add va block failed for vaddr: 0x%llx\n", + vaddr); + if (!rc) + rc = tmp_rc; + } + } + + atomic_dec(&phys_pg_pack->mapping_cnt); + kfree(hnode); + + if (is_userptr) { + free_phys_pg_pack(hdev, phys_pg_pack); + dma_unmap_host_va(hdev, userptr); + } + + return rc; + +mapping_cnt_err: + if (is_userptr) + free_phys_pg_pack(hdev, phys_pg_pack); +vm_type_err: + mutex_lock(&ctx->mem_hash_lock); + hash_add(ctx->mem_hash, &hnode->node, vaddr); + mutex_unlock(&ctx->mem_hash_lock); + + return rc; +} + +static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args) +{ + struct hl_device *hdev = hpriv->hdev; + struct hl_ctx *ctx = hpriv->ctx; + u64 device_addr = 0; + u32 handle = 0; + int rc; + + switch (args->in.op) { + case HL_MEM_OP_ALLOC: + if (args->in.alloc.mem_size == 0) { + dev_err(hdev->dev, + "alloc size must be larger than 0\n"); + rc = -EINVAL; + goto out; + } + + /* Force contiguous as there are no real MMU + * translations to overcome physical memory gaps + */ + args->in.flags |= HL_MEM_CONTIGUOUS; + rc = alloc_device_memory(ctx, &args->in, &handle); + + memset(args, 0, sizeof(*args)); + args->out.handle = (__u64) handle; + break; + + case HL_MEM_OP_FREE: + rc = free_device_memory(ctx, args->in.free.handle); + break; + + case HL_MEM_OP_MAP: + if (args->in.flags & HL_MEM_USERPTR) { + device_addr = args->in.map_host.host_virt_addr; + rc = 0; + } else { + rc = get_paddr_from_handle(ctx, &args->in, + &device_addr); + } + + memset(args, 0, sizeof(*args)); + args->out.device_virt_addr = device_addr; + break; + + case HL_MEM_OP_UNMAP: + rc = 0; + break; + + default: + dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n"); + rc = -ENOTTY; + break; + } + +out: + return rc; +} + +int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) +{ + union hl_mem_args *args = data; + struct hl_device *hdev = hpriv->hdev; + struct hl_ctx *ctx = hpriv->ctx; + u64 device_addr = 0; + u32 handle = 0; + int rc; + + if (hl_device_disabled_or_in_reset(hdev)) { + dev_warn_ratelimited(hdev->dev, + "Device is %s. Can't execute MEMORY IOCTL\n", + atomic_read(&hdev->in_reset) ? "in_reset" : "disabled"); + return -EBUSY; + } + + if (!hdev->mmu_enable) + return mem_ioctl_no_mmu(hpriv, args); + + switch (args->in.op) { + case HL_MEM_OP_ALLOC: + if (!hdev->dram_supports_virtual_memory) { + dev_err(hdev->dev, "DRAM alloc is not supported\n"); + rc = -EINVAL; + goto out; + } + + if (args->in.alloc.mem_size == 0) { + dev_err(hdev->dev, + "alloc size must be larger than 0\n"); + rc = -EINVAL; + goto out; + } + rc = alloc_device_memory(ctx, &args->in, &handle); + + memset(args, 0, sizeof(*args)); + args->out.handle = (__u64) handle; + break; + + case HL_MEM_OP_FREE: + rc = free_device_memory(ctx, args->in.free.handle); + break; + + case HL_MEM_OP_MAP: + rc = map_device_va(ctx, &args->in, &device_addr); + + memset(args, 0, sizeof(*args)); + args->out.device_virt_addr = device_addr; + break; + + case HL_MEM_OP_UNMAP: + rc = unmap_device_va(ctx, args->in.unmap.device_virt_addr, + false); + break; + + default: + dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n"); + rc = -ENOTTY; + break; + } + +out: + return rc; +} + +static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size, + u32 npages, u64 start, u32 offset, + struct hl_userptr *userptr) +{ + int rc; + + if (!access_ok((void __user *) (uintptr_t) addr, size)) { + dev_err(hdev->dev, "user pointer is invalid - 0x%llx\n", addr); + return -EFAULT; + } + + userptr->vec = frame_vector_create(npages); + if (!userptr->vec) { + dev_err(hdev->dev, "Failed to create frame vector\n"); + return -ENOMEM; + } + + rc = get_vaddr_frames(start, npages, FOLL_FORCE | FOLL_WRITE, + userptr->vec); + + if (rc != npages) { + dev_err(hdev->dev, + "Failed to map host memory, user ptr probably wrong\n"); + if (rc < 0) + goto destroy_framevec; + rc = -EFAULT; + goto put_framevec; + } + + if (frame_vector_to_pages(userptr->vec) < 0) { + dev_err(hdev->dev, + "Failed to translate frame vector to pages\n"); + rc = -EFAULT; + goto put_framevec; + } + + rc = sg_alloc_table_from_pages(userptr->sgt, + frame_vector_pages(userptr->vec), + npages, offset, size, GFP_ATOMIC); + if (rc < 0) { + dev_err(hdev->dev, "failed to create SG table from pages\n"); + goto put_framevec; + } + + return 0; + +put_framevec: + put_vaddr_frames(userptr->vec); +destroy_framevec: + frame_vector_destroy(userptr->vec); + return rc; +} + +/* + * hl_pin_host_memory - pins a chunk of host memory. + * @hdev: pointer to the habanalabs device structure + * @addr: the host virtual address of the memory area + * @size: the size of the memory area + * @userptr: pointer to hl_userptr structure + * + * This function does the following: + * - Pins the physical pages + * - Create an SG list from those pages + */ +int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size, + struct hl_userptr *userptr) +{ + u64 start, end; + u32 npages, offset; + int rc; + + if (!size) { + dev_err(hdev->dev, "size to pin is invalid - %llu\n", size); + return -EINVAL; + } + + /* + * If the combination of the address and size requested for this memory + * region causes an integer overflow, return error. + */ + if (((addr + size) < addr) || + PAGE_ALIGN(addr + size) < (addr + size)) { + dev_err(hdev->dev, + "user pointer 0x%llx + %llu causes integer overflow\n", + addr, size); + return -EINVAL; + } + + /* + * This function can be called also from data path, hence use atomic + * always as it is not a big allocation. + */ + userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_ATOMIC); + if (!userptr->sgt) + return -ENOMEM; + + start = addr & PAGE_MASK; + offset = addr & ~PAGE_MASK; + end = PAGE_ALIGN(addr + size); + npages = (end - start) >> PAGE_SHIFT; + + userptr->size = size; + userptr->addr = addr; + userptr->dma_mapped = false; + INIT_LIST_HEAD(&userptr->job_node); + + rc = get_user_memory(hdev, addr, size, npages, start, offset, + userptr); + if (rc) { + dev_err(hdev->dev, + "failed to get user memory for address 0x%llx\n", + addr); + goto free_sgt; + } + + hl_debugfs_add_userptr(hdev, userptr); + + return 0; + +free_sgt: + kfree(userptr->sgt); + return rc; +} + +/* + * hl_unpin_host_memory - unpins a chunk of host memory. + * @hdev: pointer to the habanalabs device structure + * @userptr: pointer to hl_userptr structure + * + * This function does the following: + * - Unpins the physical pages related to the host memory + * - Free the SG list + */ +void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr) +{ + struct page **pages; + + hl_debugfs_remove_userptr(hdev, userptr); + + if (userptr->dma_mapped) + hdev->asic_funcs->hl_dma_unmap_sg(hdev, userptr->sgt->sgl, + userptr->sgt->nents, + userptr->dir); + + pages = frame_vector_pages(userptr->vec); + if (!IS_ERR(pages)) { + int i; + + for (i = 0; i < frame_vector_count(userptr->vec); i++) + set_page_dirty_lock(pages[i]); + } + put_vaddr_frames(userptr->vec); + frame_vector_destroy(userptr->vec); + + list_del(&userptr->job_node); + + sg_free_table(userptr->sgt); + kfree(userptr->sgt); +} + +/* + * hl_userptr_delete_list - clear userptr list + * + * @hdev : pointer to the habanalabs device structure + * @userptr_list : pointer to the list to clear + * + * This function does the following: + * - Iterates over the list and unpins the host memory and frees the userptr + * structure. + */ +void hl_userptr_delete_list(struct hl_device *hdev, + struct list_head *userptr_list) +{ + struct hl_userptr *userptr, *tmp; + + list_for_each_entry_safe(userptr, tmp, userptr_list, job_node) { + hl_unpin_host_memory(hdev, userptr); + kfree(userptr); + } + + INIT_LIST_HEAD(userptr_list); +} + +/* + * hl_userptr_is_pinned - returns whether the given userptr is pinned + * + * @hdev : pointer to the habanalabs device structure + * @userptr_list : pointer to the list to clear + * @userptr : pointer to userptr to check + * + * This function does the following: + * - Iterates over the list and checks if the given userptr is in it, means is + * pinned. If so, returns true, otherwise returns false. + */ +bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, + u32 size, struct list_head *userptr_list, + struct hl_userptr **userptr) +{ + list_for_each_entry((*userptr), userptr_list, job_node) { + if ((addr == (*userptr)->addr) && (size == (*userptr)->size)) + return true; + } + + return false; +} + +/* + * va_range_init - initialize virtual addresses range + * @hdev: pointer to the habanalabs device structure + * @va_range: pointer to the range to initialize + * @start: range start address + * @end: range end address + * + * This function does the following: + * - Initializes the virtual addresses list of the given range with the given + * addresses. + */ +static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range, + u64 start, u64 end) +{ + int rc; + + INIT_LIST_HEAD(&va_range->list); + + /* PAGE_SIZE alignment */ + + if (start & (PAGE_SIZE - 1)) { + start &= PAGE_MASK; + start += PAGE_SIZE; + } + + if (end & (PAGE_SIZE - 1)) + end &= PAGE_MASK; + + if (start >= end) { + dev_err(hdev->dev, "too small vm range for va list\n"); + return -EFAULT; + } + + rc = add_va_block(hdev, va_range, start, end); + + if (rc) { + dev_err(hdev->dev, "Failed to init host va list\n"); + return rc; + } + + va_range->start_addr = start; + va_range->end_addr = end; + + return 0; +} + +/* + * va_range_fini() - clear a virtual addresses range + * @hdev: pointer to the habanalabs structure + * va_range: pointer to virtual addresses range + * + * This function does the following: + * - Frees the virtual addresses block list and its lock + */ +static void va_range_fini(struct hl_device *hdev, + struct hl_va_range *va_range) +{ + mutex_lock(&va_range->lock); + clear_va_list_locked(hdev, &va_range->list); + mutex_unlock(&va_range->lock); + + mutex_destroy(&va_range->lock); + kfree(va_range); +} + +/* + * vm_ctx_init_with_ranges() - initialize virtual memory for context + * @ctx: pointer to the habanalabs context structure + * @host_range_start: host virtual addresses range start. + * @host_range_end: host virtual addresses range end. + * @host_huge_range_start: host virtual addresses range start for memory + * allocated with huge pages. + * @host_huge_range_end: host virtual addresses range end for memory allocated + * with huge pages. + * @dram_range_start: dram virtual addresses range start. + * @dram_range_end: dram virtual addresses range end. + * + * This function initializes the following: + * - MMU for context + * - Virtual address to area descriptor hashtable + * - Virtual block list of available virtual memory + */ +static int vm_ctx_init_with_ranges(struct hl_ctx *ctx, + u64 host_range_start, + u64 host_range_end, + u64 host_huge_range_start, + u64 host_huge_range_end, + u64 dram_range_start, + u64 dram_range_end) +{ + struct hl_device *hdev = ctx->hdev; + int rc; + + ctx->host_va_range = kzalloc(sizeof(*ctx->host_va_range), GFP_KERNEL); + if (!ctx->host_va_range) + return -ENOMEM; + + ctx->host_huge_va_range = kzalloc(sizeof(*ctx->host_huge_va_range), + GFP_KERNEL); + if (!ctx->host_huge_va_range) { + rc = -ENOMEM; + goto host_huge_va_range_err; + } + + ctx->dram_va_range = kzalloc(sizeof(*ctx->dram_va_range), GFP_KERNEL); + if (!ctx->dram_va_range) { + rc = -ENOMEM; + goto dram_va_range_err; + } + + rc = hl_mmu_ctx_init(ctx); + if (rc) { + dev_err(hdev->dev, "failed to init context %d\n", ctx->asid); + goto mmu_ctx_err; + } + + mutex_init(&ctx->mem_hash_lock); + hash_init(ctx->mem_hash); + + mutex_init(&ctx->host_va_range->lock); + + rc = va_range_init(hdev, ctx->host_va_range, host_range_start, + host_range_end); + if (rc) { + dev_err(hdev->dev, "failed to init host vm range\n"); + goto host_page_range_err; + } + + if (hdev->pmmu_huge_range) { + mutex_init(&ctx->host_huge_va_range->lock); + + rc = va_range_init(hdev, ctx->host_huge_va_range, + host_huge_range_start, + host_huge_range_end); + if (rc) { + dev_err(hdev->dev, + "failed to init host huge vm range\n"); + goto host_hpage_range_err; + } + } else { + kfree(ctx->host_huge_va_range); + ctx->host_huge_va_range = ctx->host_va_range; + } + + mutex_init(&ctx->dram_va_range->lock); + + rc = va_range_init(hdev, ctx->dram_va_range, dram_range_start, + dram_range_end); + if (rc) { + dev_err(hdev->dev, "failed to init dram vm range\n"); + goto dram_vm_err; + } + + hl_debugfs_add_ctx_mem_hash(hdev, ctx); + + return 0; + +dram_vm_err: + mutex_destroy(&ctx->dram_va_range->lock); + + if (hdev->pmmu_huge_range) { + mutex_lock(&ctx->host_huge_va_range->lock); + clear_va_list_locked(hdev, &ctx->host_huge_va_range->list); + mutex_unlock(&ctx->host_huge_va_range->lock); + } +host_hpage_range_err: + if (hdev->pmmu_huge_range) + mutex_destroy(&ctx->host_huge_va_range->lock); + mutex_lock(&ctx->host_va_range->lock); + clear_va_list_locked(hdev, &ctx->host_va_range->list); + mutex_unlock(&ctx->host_va_range->lock); +host_page_range_err: + mutex_destroy(&ctx->host_va_range->lock); + mutex_destroy(&ctx->mem_hash_lock); + hl_mmu_ctx_fini(ctx); +mmu_ctx_err: + kfree(ctx->dram_va_range); +dram_va_range_err: + kfree(ctx->host_huge_va_range); +host_huge_va_range_err: + kfree(ctx->host_va_range); + + return rc; +} + +int hl_vm_ctx_init(struct hl_ctx *ctx) +{ + struct asic_fixed_properties *prop = &ctx->hdev->asic_prop; + u64 host_range_start, host_range_end, host_huge_range_start, + host_huge_range_end, dram_range_start, dram_range_end; + + atomic64_set(&ctx->dram_phys_mem, 0); + + /* + * - If MMU is enabled, init the ranges as usual. + * - If MMU is disabled, in case of host mapping, the returned address + * is the given one. + * In case of DRAM mapping, the returned address is the physical + * address of the memory related to the given handle. + */ + if (ctx->hdev->mmu_enable) { + dram_range_start = prop->dmmu.start_addr; + dram_range_end = prop->dmmu.end_addr; + host_range_start = prop->pmmu.start_addr; + host_range_end = prop->pmmu.end_addr; + host_huge_range_start = prop->pmmu_huge.start_addr; + host_huge_range_end = prop->pmmu_huge.end_addr; + } else { + dram_range_start = prop->dram_user_base_address; + dram_range_end = prop->dram_end_address; + host_range_start = prop->dram_user_base_address; + host_range_end = prop->dram_end_address; + host_huge_range_start = prop->dram_user_base_address; + host_huge_range_end = prop->dram_end_address; + } + + return vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end, + host_huge_range_start, + host_huge_range_end, + dram_range_start, + dram_range_end); +} + +/* + * hl_vm_ctx_fini - virtual memory teardown of context + * + * @ctx : pointer to the habanalabs context structure + * + * This function perform teardown the following: + * - Virtual block list of available virtual memory + * - Virtual address to area descriptor hashtable + * - MMU for context + * + * In addition this function does the following: + * - Unmaps the existing hashtable nodes if the hashtable is not empty. The + * hashtable should be empty as no valid mappings should exist at this + * point. + * - Frees any existing physical page list from the idr which relates to the + * current context asid. + * - This function checks the virtual block list for correctness. At this point + * the list should contain one element which describes the whole virtual + * memory range of the context. Otherwise, a warning is printed. + */ +void hl_vm_ctx_fini(struct hl_ctx *ctx) +{ + struct hl_device *hdev = ctx->hdev; + struct hl_vm *vm = &hdev->vm; + struct hl_vm_phys_pg_pack *phys_pg_list; + struct hl_vm_hash_node *hnode; + struct hlist_node *tmp_node; + int i; + + hl_debugfs_remove_ctx_mem_hash(hdev, ctx); + + /* + * Clearly something went wrong on hard reset so no point in printing + * another side effect error + */ + if (!hdev->hard_reset_pending && !hash_empty(ctx->mem_hash)) + dev_notice(hdev->dev, + "user released device without removing its memory mappings\n"); + + hash_for_each_safe(ctx->mem_hash, i, tmp_node, hnode, node) { + dev_dbg(hdev->dev, + "hl_mem_hash_node of vaddr 0x%llx of asid %d is still alive\n", + hnode->vaddr, ctx->asid); + unmap_device_va(ctx, hnode->vaddr, true); + } + + /* invalidate the cache once after the unmapping loop */ + hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR); + hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_PHYS_PACK); + + spin_lock(&vm->idr_lock); + idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_list, i) + if (phys_pg_list->asid == ctx->asid) { + dev_dbg(hdev->dev, + "page list 0x%px of asid %d is still alive\n", + phys_pg_list, ctx->asid); + atomic64_sub(phys_pg_list->total_size, + &hdev->dram_used_mem); + free_phys_pg_pack(hdev, phys_pg_list); + idr_remove(&vm->phys_pg_pack_handles, i); + } + spin_unlock(&vm->idr_lock); + + va_range_fini(hdev, ctx->dram_va_range); + if (hdev->pmmu_huge_range) + va_range_fini(hdev, ctx->host_huge_va_range); + va_range_fini(hdev, ctx->host_va_range); + + mutex_destroy(&ctx->mem_hash_lock); + hl_mmu_ctx_fini(ctx); +} + +/* + * hl_vm_init - initialize virtual memory module + * + * @hdev : pointer to the habanalabs device structure + * + * This function initializes the following: + * - MMU module + * - DRAM physical pages pool of 2MB + * - Idr for device memory allocation handles + */ +int hl_vm_init(struct hl_device *hdev) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_vm *vm = &hdev->vm; + int rc; + + vm->dram_pg_pool = gen_pool_create(__ffs(prop->dram_page_size), -1); + if (!vm->dram_pg_pool) { + dev_err(hdev->dev, "Failed to create dram page pool\n"); + return -ENOMEM; + } + + kref_init(&vm->dram_pg_pool_refcount); + + rc = gen_pool_add(vm->dram_pg_pool, prop->dram_user_base_address, + prop->dram_end_address - prop->dram_user_base_address, + -1); + + if (rc) { + dev_err(hdev->dev, + "Failed to add memory to dram page pool %d\n", rc); + goto pool_add_err; + } + + spin_lock_init(&vm->idr_lock); + idr_init(&vm->phys_pg_pack_handles); + + atomic64_set(&hdev->dram_used_mem, 0); + + vm->init_done = true; + + return 0; + +pool_add_err: + gen_pool_destroy(vm->dram_pg_pool); + + return rc; +} + +/* + * hl_vm_fini - virtual memory module teardown + * + * @hdev : pointer to the habanalabs device structure + * + * This function perform teardown to the following: + * - Idr for device memory allocation handles + * - DRAM physical pages pool of 2MB + * - MMU module + */ +void hl_vm_fini(struct hl_device *hdev) +{ + struct hl_vm *vm = &hdev->vm; + + if (!vm->init_done) + return; + + /* + * At this point all the contexts should be freed and hence no DRAM + * memory should be in use. Hence the DRAM pool should be freed here. + */ + if (kref_put(&vm->dram_pg_pool_refcount, dram_pg_pool_do_release) != 1) + dev_warn(hdev->dev, "dram_pg_pool was not destroyed on %s\n", + __func__); + + vm->init_done = false; +} diff --git a/drivers/misc/habanalabs/common/mmu.c b/drivers/misc/habanalabs/common/mmu.c new file mode 100644 index 000000000..b5058798a --- /dev/null +++ b/drivers/misc/habanalabs/common/mmu.c @@ -0,0 +1,309 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2020 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include <linux/slab.h> + +#include "habanalabs.h" + +static bool is_dram_va(struct hl_device *hdev, u64 virt_addr) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + + return hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, + prop->dmmu.start_addr, + prop->dmmu.end_addr); +} + +/** + * hl_mmu_init() - initialize the MMU module. + * @hdev: habanalabs device structure. + * + * This function does the following: + * - Create a pool of pages for pgt_infos. + * - Create a shadow table for pgt + * + * Return: 0 for success, non-zero for failure. + */ +int hl_mmu_init(struct hl_device *hdev) +{ + if (hdev->mmu_enable) + return hdev->mmu_func.init(hdev); + + return 0; +} + +/** + * hl_mmu_fini() - release the MMU module. + * @hdev: habanalabs device structure. + * + * This function does the following: + * - Disable MMU in H/W. + * - Free the pgt_infos pool. + * + * All contexts should be freed before calling this function. + */ +void hl_mmu_fini(struct hl_device *hdev) +{ + if (hdev->mmu_enable) + hdev->mmu_func.fini(hdev); +} + +/** + * hl_mmu_ctx_init() - initialize a context for using the MMU module. + * @ctx: pointer to the context structure to initialize. + * + * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all + * page tables hops related to this context. + * Return: 0 on success, non-zero otherwise. + */ +int hl_mmu_ctx_init(struct hl_ctx *ctx) +{ + struct hl_device *hdev = ctx->hdev; + + if (hdev->mmu_enable) + return hdev->mmu_func.ctx_init(ctx); + + return 0; +} + +/* + * hl_mmu_ctx_fini - disable a ctx from using the mmu module + * + * @ctx: pointer to the context structure + * + * This function does the following: + * - Free any pgts which were not freed yet + * - Free the mutex + * - Free DRAM default page mapping hops + */ +void hl_mmu_ctx_fini(struct hl_ctx *ctx) +{ + struct hl_device *hdev = ctx->hdev; + + if (hdev->mmu_enable) + hdev->mmu_func.ctx_fini(ctx); +} + +/* + * hl_mmu_unmap - unmaps a virtual addr + * + * @ctx: pointer to the context structure + * @virt_addr: virt addr to map from + * @page_size: size of the page to unmap + * @flush_pte: whether to do a PCI flush + * + * This function does the following: + * - Check that the virt addr is mapped + * - Unmap the virt addr and frees pgts if possible + * - Returns 0 on success, -EINVAL if the given addr is not mapped + * + * Because this function changes the page tables in the device and because it + * changes the MMU hash, it must be protected by a lock. + * However, because it maps only a single page, the lock should be implemented + * in a higher level in order to protect the entire mapping of the memory area + * + * For optimization reasons PCI flush may be requested once after unmapping of + * large area. + */ +int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, + bool flush_pte) +{ + struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_mmu_properties *mmu_prop; + u64 real_virt_addr; + u32 real_page_size, npages; + int i, rc = 0; + bool is_dram_addr; + + if (!hdev->mmu_enable) + return 0; + + is_dram_addr = is_dram_va(hdev, virt_addr); + + if (is_dram_addr) + mmu_prop = &prop->dmmu; + else if ((page_size % prop->pmmu_huge.page_size) == 0) + mmu_prop = &prop->pmmu_huge; + else + mmu_prop = &prop->pmmu; + + /* + * The H/W handles mapping of specific page sizes. Hence if the page + * size is bigger, we break it to sub-pages and unmap them separately. + */ + if ((page_size % mmu_prop->page_size) == 0) { + real_page_size = mmu_prop->page_size; + } else { + dev_err(hdev->dev, + "page size of %u is not %uKB aligned, can't unmap\n", + page_size, mmu_prop->page_size >> 10); + + return -EFAULT; + } + + npages = page_size / real_page_size; + real_virt_addr = virt_addr; + + for (i = 0 ; i < npages ; i++) { + rc = hdev->mmu_func.unmap(ctx, real_virt_addr, is_dram_addr); + if (rc) + break; + + real_virt_addr += real_page_size; + } + + if (flush_pte) + hdev->mmu_func.flush(ctx); + + return rc; +} + +/* + * hl_mmu_map - maps a virtual addr to physical addr + * + * @ctx: pointer to the context structure + * @virt_addr: virt addr to map from + * @phys_addr: phys addr to map to + * @page_size: physical page size + * @flush_pte: whether to do a PCI flush + * + * This function does the following: + * - Check that the virt addr is not mapped + * - Allocate pgts as necessary in order to map the virt addr to the phys + * - Returns 0 on success, -EINVAL if addr is already mapped, or -ENOMEM. + * + * Because this function changes the page tables in the device and because it + * changes the MMU hash, it must be protected by a lock. + * However, because it maps only a single page, the lock should be implemented + * in a higher level in order to protect the entire mapping of the memory area + * + * For optimization reasons PCI flush may be requested once after mapping of + * large area. + */ +int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size, + bool flush_pte) +{ + struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_mmu_properties *mmu_prop; + u64 real_virt_addr, real_phys_addr; + u32 real_page_size, npages; + int i, rc, mapped_cnt = 0; + bool is_dram_addr; + + if (!hdev->mmu_enable) + return 0; + + is_dram_addr = is_dram_va(hdev, virt_addr); + + if (is_dram_addr) + mmu_prop = &prop->dmmu; + else if ((page_size % prop->pmmu_huge.page_size) == 0) + mmu_prop = &prop->pmmu_huge; + else + mmu_prop = &prop->pmmu; + + /* + * The H/W handles mapping of specific page sizes. Hence if the page + * size is bigger, we break it to sub-pages and map them separately. + */ + if ((page_size % mmu_prop->page_size) == 0) { + real_page_size = mmu_prop->page_size; + } else { + dev_err(hdev->dev, + "page size of %u is not %uKB aligned, can't unmap\n", + page_size, mmu_prop->page_size >> 10); + + return -EFAULT; + } + + WARN_ONCE((phys_addr & (real_page_size - 1)), + "Mapping 0x%llx with page size of 0x%x is erroneous! Address must be divisible by page size", + phys_addr, real_page_size); + + npages = page_size / real_page_size; + real_virt_addr = virt_addr; + real_phys_addr = phys_addr; + + for (i = 0 ; i < npages ; i++) { + rc = hdev->mmu_func.map(ctx, real_virt_addr, real_phys_addr, + real_page_size, is_dram_addr); + if (rc) + goto err; + + real_virt_addr += real_page_size; + real_phys_addr += real_page_size; + mapped_cnt++; + } + + if (flush_pte) + hdev->mmu_func.flush(ctx); + + return 0; + +err: + real_virt_addr = virt_addr; + for (i = 0 ; i < mapped_cnt ; i++) { + if (hdev->mmu_func.unmap(ctx, real_virt_addr, is_dram_addr)) + dev_warn_ratelimited(hdev->dev, + "failed to unmap va: 0x%llx\n", real_virt_addr); + + real_virt_addr += real_page_size; + } + + hdev->mmu_func.flush(ctx); + + return rc; +} + +/* + * hl_mmu_swap_out - marks all mapping of the given ctx as swapped out + * + * @ctx: pointer to the context structure + * + */ +void hl_mmu_swap_out(struct hl_ctx *ctx) +{ + struct hl_device *hdev = ctx->hdev; + + if (hdev->mmu_enable) + hdev->mmu_func.swap_out(ctx); +} + +/* + * hl_mmu_swap_in - marks all mapping of the given ctx as swapped in + * + * @ctx: pointer to the context structure + * + */ +void hl_mmu_swap_in(struct hl_ctx *ctx) +{ + struct hl_device *hdev = ctx->hdev; + + if (hdev->mmu_enable) + hdev->mmu_func.swap_in(ctx); +} + +int hl_mmu_if_set_funcs(struct hl_device *hdev) +{ + if (!hdev->mmu_enable) + return 0; + + switch (hdev->asic_type) { + case ASIC_GOYA: + case ASIC_GAUDI: + hl_mmu_v1_set_funcs(hdev); + break; + default: + dev_err(hdev->dev, "Unrecognized ASIC type %d\n", + hdev->asic_type); + return -EOPNOTSUPP; + } + + return 0; +} diff --git a/drivers/misc/habanalabs/common/mmu_v1.c b/drivers/misc/habanalabs/common/mmu_v1.c new file mode 100644 index 000000000..8d1eb5265 --- /dev/null +++ b/drivers/misc/habanalabs/common/mmu_v1.c @@ -0,0 +1,863 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include "habanalabs.h" +#include "../include/hw_ip/mmu/mmu_general.h" + +#include <linux/genalloc.h> +#include <linux/slab.h> + +static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr); + +static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr) +{ + struct pgt_info *pgt_info = NULL; + + hash_for_each_possible(ctx->mmu_shadow_hash, pgt_info, node, + (unsigned long) hop_addr) + if (hop_addr == pgt_info->shadow_addr) + break; + + return pgt_info; +} + +static void _free_hop(struct hl_ctx *ctx, struct pgt_info *pgt_info) +{ + struct hl_device *hdev = ctx->hdev; + + gen_pool_free(hdev->mmu_priv.mmu_pgt_pool, pgt_info->phys_addr, + hdev->asic_prop.mmu_hop_table_size); + hash_del(&pgt_info->node); + kfree((u64 *) (uintptr_t) pgt_info->shadow_addr); + kfree(pgt_info); +} + +static void free_hop(struct hl_ctx *ctx, u64 hop_addr) +{ + struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr); + + _free_hop(ctx, pgt_info); +} + +static u64 alloc_hop(struct hl_ctx *ctx) +{ + struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct pgt_info *pgt_info; + u64 phys_addr, shadow_addr; + + pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL); + if (!pgt_info) + return ULLONG_MAX; + + phys_addr = (u64) gen_pool_alloc(hdev->mmu_priv.mmu_pgt_pool, + prop->mmu_hop_table_size); + if (!phys_addr) { + dev_err(hdev->dev, "failed to allocate page\n"); + goto pool_add_err; + } + + shadow_addr = (u64) (uintptr_t) kzalloc(prop->mmu_hop_table_size, + GFP_KERNEL); + if (!shadow_addr) + goto shadow_err; + + pgt_info->phys_addr = phys_addr; + pgt_info->shadow_addr = shadow_addr; + pgt_info->ctx = ctx; + pgt_info->num_of_ptes = 0; + hash_add(ctx->mmu_shadow_hash, &pgt_info->node, shadow_addr); + + return shadow_addr; + +shadow_err: + gen_pool_free(hdev->mmu_priv.mmu_pgt_pool, phys_addr, + prop->mmu_hop_table_size); +pool_add_err: + kfree(pgt_info); + + return ULLONG_MAX; +} + +static inline u64 get_phys_hop0_addr(struct hl_ctx *ctx) +{ + return ctx->hdev->asic_prop.mmu_pgt_addr + + (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size); +} + +static inline u64 get_hop0_addr(struct hl_ctx *ctx) +{ + return (u64) (uintptr_t) ctx->hdev->mmu_priv.mmu_shadow_hop0 + + (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size); +} + +static void flush(struct hl_ctx *ctx) +{ + /* flush all writes from all cores to reach PCI */ + mb(); + ctx->hdev->asic_funcs->read_pte(ctx->hdev, get_phys_hop0_addr(ctx)); +} + +/* transform the value to physical address when writing to H/W */ +static inline void write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val) +{ + /* + * The value to write is actually the address of the next shadow hop + + * flags at the 12 LSBs. + * Hence in order to get the value to write to the physical PTE, we + * clear the 12 LSBs and translate the shadow hop to its associated + * physical hop, and add back the original 12 LSBs. + */ + u64 phys_val = get_phys_addr(ctx, val & HOP_PHYS_ADDR_MASK) | + (val & FLAGS_MASK); + + ctx->hdev->asic_funcs->write_pte(ctx->hdev, + get_phys_addr(ctx, shadow_pte_addr), + phys_val); + + *(u64 *) (uintptr_t) shadow_pte_addr = val; +} + +/* do not transform the value to physical address when writing to H/W */ +static inline void write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, + u64 val) +{ + ctx->hdev->asic_funcs->write_pte(ctx->hdev, + get_phys_addr(ctx, shadow_pte_addr), + val); + *(u64 *) (uintptr_t) shadow_pte_addr = val; +} + +/* clear the last and present bits */ +static inline void clear_pte(struct hl_ctx *ctx, u64 pte_addr) +{ + /* no need to transform the value to physical address */ + write_final_pte(ctx, pte_addr, 0); +} + +static inline void get_pte(struct hl_ctx *ctx, u64 hop_addr) +{ + get_pgt_info(ctx, hop_addr)->num_of_ptes++; +} + +/* + * put_pte - decrement the num of ptes and free the hop if possible + * + * @ctx: pointer to the context structure + * @hop_addr: addr of the hop + * + * This function returns the number of ptes left on this hop. If the number is + * 0, it means the pte was freed. + */ +static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr) +{ + struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr); + int num_of_ptes_left; + + pgt_info->num_of_ptes--; + + /* + * Need to save the number of ptes left because free_hop might free + * the pgt_info + */ + num_of_ptes_left = pgt_info->num_of_ptes; + if (!num_of_ptes_left) + _free_hop(ctx, pgt_info); + + return num_of_ptes_left; +} + +static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr, + u64 virt_addr, u64 mask, u64 shift) +{ + return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * + ((virt_addr & mask) >> shift); +} + +static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_prop, + u64 hop_addr, u64 vaddr) +{ + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop0_mask, + mmu_prop->hop0_shift); +} + +static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_prop, + u64 hop_addr, u64 vaddr) +{ + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop1_mask, + mmu_prop->hop1_shift); +} + +static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_prop, + u64 hop_addr, u64 vaddr) +{ + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop2_mask, + mmu_prop->hop2_shift); +} + +static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_prop, + u64 hop_addr, u64 vaddr) +{ + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop3_mask, + mmu_prop->hop3_shift); +} + +static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_prop, + u64 hop_addr, u64 vaddr) +{ + return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop4_mask, + mmu_prop->hop4_shift); +} + +static inline u64 get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte) +{ + if (curr_pte & PAGE_PRESENT_MASK) + return curr_pte & HOP_PHYS_ADDR_MASK; + else + return ULLONG_MAX; +} + +static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte, + bool *is_new_hop) +{ + u64 hop_addr = get_next_hop_addr(ctx, curr_pte); + + if (hop_addr == ULLONG_MAX) { + hop_addr = alloc_hop(ctx); + *is_new_hop = (hop_addr != ULLONG_MAX); + } + + return hop_addr; +} + +/* translates shadow address inside hop to a physical address */ +static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr) +{ + u64 page_mask = (ctx->hdev->asic_prop.mmu_hop_table_size - 1); + u64 shadow_hop_addr = shadow_addr & ~page_mask; + u64 pte_offset = shadow_addr & page_mask; + u64 phys_hop_addr; + + if (shadow_hop_addr != get_hop0_addr(ctx)) + phys_hop_addr = get_pgt_info(ctx, shadow_hop_addr)->phys_addr; + else + phys_hop_addr = get_phys_hop0_addr(ctx); + + return phys_hop_addr + pte_offset; +} + +static int dram_default_mapping_init(struct hl_ctx *ctx) +{ + struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr, + hop2_pte_addr, hop3_pte_addr, pte_val; + int rc, i, j, hop3_allocated = 0; + + if ((!hdev->dram_supports_virtual_memory) || + (!hdev->dram_default_page_mapping) || + (ctx->asid == HL_KERNEL_ASID_ID)) + return 0; + + num_of_hop3 = prop->dram_size_for_default_page_mapping; + do_div(num_of_hop3, prop->dram_page_size); + do_div(num_of_hop3, PTE_ENTRIES_IN_HOP); + + /* add hop1 and hop2 */ + total_hops = num_of_hop3 + 2; + + ctx->dram_default_hops = kzalloc(HL_PTE_SIZE * total_hops, GFP_KERNEL); + if (!ctx->dram_default_hops) + return -ENOMEM; + + hop0_addr = get_hop0_addr(ctx); + + hop1_addr = alloc_hop(ctx); + if (hop1_addr == ULLONG_MAX) { + dev_err(hdev->dev, "failed to alloc hop 1\n"); + rc = -ENOMEM; + goto hop1_err; + } + + ctx->dram_default_hops[total_hops - 1] = hop1_addr; + + hop2_addr = alloc_hop(ctx); + if (hop2_addr == ULLONG_MAX) { + dev_err(hdev->dev, "failed to alloc hop 2\n"); + rc = -ENOMEM; + goto hop2_err; + } + + ctx->dram_default_hops[total_hops - 2] = hop2_addr; + + for (i = 0 ; i < num_of_hop3 ; i++) { + ctx->dram_default_hops[i] = alloc_hop(ctx); + if (ctx->dram_default_hops[i] == ULLONG_MAX) { + dev_err(hdev->dev, "failed to alloc hop 3, i: %d\n", i); + rc = -ENOMEM; + goto hop3_err; + } + hop3_allocated++; + } + + /* need only pte 0 in hops 0 and 1 */ + pte_val = (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; + write_pte(ctx, hop0_addr, pte_val); + + pte_val = (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; + write_pte(ctx, hop1_addr, pte_val); + get_pte(ctx, hop1_addr); + + hop2_pte_addr = hop2_addr; + for (i = 0 ; i < num_of_hop3 ; i++) { + pte_val = (ctx->dram_default_hops[i] & HOP_PHYS_ADDR_MASK) | + PAGE_PRESENT_MASK; + write_pte(ctx, hop2_pte_addr, pte_val); + get_pte(ctx, hop2_addr); + hop2_pte_addr += HL_PTE_SIZE; + } + + pte_val = (prop->mmu_dram_default_page_addr & HOP_PHYS_ADDR_MASK) | + LAST_MASK | PAGE_PRESENT_MASK; + + for (i = 0 ; i < num_of_hop3 ; i++) { + hop3_pte_addr = ctx->dram_default_hops[i]; + for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) { + write_final_pte(ctx, hop3_pte_addr, pte_val); + get_pte(ctx, ctx->dram_default_hops[i]); + hop3_pte_addr += HL_PTE_SIZE; + } + } + + flush(ctx); + + return 0; + +hop3_err: + for (i = 0 ; i < hop3_allocated ; i++) + free_hop(ctx, ctx->dram_default_hops[i]); + + free_hop(ctx, hop2_addr); +hop2_err: + free_hop(ctx, hop1_addr); +hop1_err: + kfree(ctx->dram_default_hops); + + return rc; +} + +static void dram_default_mapping_fini(struct hl_ctx *ctx) +{ + struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr, + hop2_pte_addr, hop3_pte_addr; + int i, j; + + if ((!hdev->dram_supports_virtual_memory) || + (!hdev->dram_default_page_mapping) || + (ctx->asid == HL_KERNEL_ASID_ID)) + return; + + num_of_hop3 = prop->dram_size_for_default_page_mapping; + do_div(num_of_hop3, prop->dram_page_size); + do_div(num_of_hop3, PTE_ENTRIES_IN_HOP); + + hop0_addr = get_hop0_addr(ctx); + /* add hop1 and hop2 */ + total_hops = num_of_hop3 + 2; + hop1_addr = ctx->dram_default_hops[total_hops - 1]; + hop2_addr = ctx->dram_default_hops[total_hops - 2]; + + for (i = 0 ; i < num_of_hop3 ; i++) { + hop3_pte_addr = ctx->dram_default_hops[i]; + for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) { + clear_pte(ctx, hop3_pte_addr); + put_pte(ctx, ctx->dram_default_hops[i]); + hop3_pte_addr += HL_PTE_SIZE; + } + } + + hop2_pte_addr = hop2_addr; + hop2_pte_addr = hop2_addr; + for (i = 0 ; i < num_of_hop3 ; i++) { + clear_pte(ctx, hop2_pte_addr); + put_pte(ctx, hop2_addr); + hop2_pte_addr += HL_PTE_SIZE; + } + + clear_pte(ctx, hop1_addr); + put_pte(ctx, hop1_addr); + clear_pte(ctx, hop0_addr); + + kfree(ctx->dram_default_hops); + + flush(ctx); +} + +/** + * hl_mmu_v1_init() - initialize the MMU module. + * @hdev: habanalabs device structure. + * + * This function does the following: + * - Create a pool of pages for pgt_infos. + * - Create a shadow table for pgt + * + * Return: 0 for success, non-zero for failure. + */ +static int hl_mmu_v1_init(struct hl_device *hdev) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + int rc; + + hdev->mmu_priv.mmu_pgt_pool = + gen_pool_create(__ffs(prop->mmu_hop_table_size), -1); + + if (!hdev->mmu_priv.mmu_pgt_pool) { + dev_err(hdev->dev, "Failed to create page gen pool\n"); + return -ENOMEM; + } + + rc = gen_pool_add(hdev->mmu_priv.mmu_pgt_pool, prop->mmu_pgt_addr + + prop->mmu_hop0_tables_total_size, + prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size, + -1); + if (rc) { + dev_err(hdev->dev, "Failed to add memory to page gen pool\n"); + goto err_pool_add; + } + + hdev->mmu_priv.mmu_shadow_hop0 = kvmalloc_array(prop->max_asid, + prop->mmu_hop_table_size, + GFP_KERNEL | __GFP_ZERO); + if (ZERO_OR_NULL_PTR(hdev->mmu_priv.mmu_shadow_hop0)) { + rc = -ENOMEM; + goto err_pool_add; + } + + /* MMU H/W init will be done in device hw_init() */ + + return 0; + +err_pool_add: + gen_pool_destroy(hdev->mmu_priv.mmu_pgt_pool); + + return rc; +} + +/** + * hl_mmu_fini() - release the MMU module. + * @hdev: habanalabs device structure. + * + * This function does the following: + * - Disable MMU in H/W. + * - Free the pgt_infos pool. + * + * All contexts should be freed before calling this function. + */ +static void hl_mmu_v1_fini(struct hl_device *hdev) +{ + /* MMU H/W fini was already done in device hw_fini() */ + + kvfree(hdev->mmu_priv.mmu_shadow_hop0); + gen_pool_destroy(hdev->mmu_priv.mmu_pgt_pool); +} + +/** + * hl_mmu_ctx_init() - initialize a context for using the MMU module. + * @ctx: pointer to the context structure to initialize. + * + * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all + * page tables hops related to this context. + * Return: 0 on success, non-zero otherwise. + */ +static int hl_mmu_v1_ctx_init(struct hl_ctx *ctx) +{ + mutex_init(&ctx->mmu_lock); + hash_init(ctx->mmu_shadow_hash); + + return dram_default_mapping_init(ctx); +} + +/* + * hl_mmu_ctx_fini - disable a ctx from using the mmu module + * + * @ctx: pointer to the context structure + * + * This function does the following: + * - Free any pgts which were not freed yet + * - Free the mutex + * - Free DRAM default page mapping hops + */ +static void hl_mmu_v1_ctx_fini(struct hl_ctx *ctx) +{ + struct hl_device *hdev = ctx->hdev; + struct pgt_info *pgt_info; + struct hlist_node *tmp; + int i; + + dram_default_mapping_fini(ctx); + + if (!hash_empty(ctx->mmu_shadow_hash)) + dev_err(hdev->dev, "ctx %d is freed while it has pgts in use\n", + ctx->asid); + + hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) { + dev_err_ratelimited(hdev->dev, + "pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n", + pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes); + _free_hop(ctx, pgt_info); + } + + mutex_destroy(&ctx->mmu_lock); +} + +static int _hl_mmu_v1_unmap(struct hl_ctx *ctx, + u64 virt_addr, bool is_dram_addr) +{ + struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_mmu_properties *mmu_prop; + u64 hop0_addr = 0, hop0_pte_addr = 0, + hop1_addr = 0, hop1_pte_addr = 0, + hop2_addr = 0, hop2_pte_addr = 0, + hop3_addr = 0, hop3_pte_addr = 0, + hop4_addr = 0, hop4_pte_addr = 0, + curr_pte; + bool is_huge, clear_hop3 = true; + + /* shifts and masks are the same in PMMU and HPMMU, use one of them */ + mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; + + hop0_addr = get_hop0_addr(ctx); + hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr); + + curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr; + + hop1_addr = get_next_hop_addr(ctx, curr_pte); + + if (hop1_addr == ULLONG_MAX) + goto not_mapped; + + hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr); + + curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr; + + hop2_addr = get_next_hop_addr(ctx, curr_pte); + + if (hop2_addr == ULLONG_MAX) + goto not_mapped; + + hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr); + + curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr; + + hop3_addr = get_next_hop_addr(ctx, curr_pte); + + if (hop3_addr == ULLONG_MAX) + goto not_mapped; + + hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr); + + curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr; + + is_huge = curr_pte & LAST_MASK; + + if (is_dram_addr && !is_huge) { + dev_err(hdev->dev, + "DRAM unmapping should use huge pages only\n"); + return -EFAULT; + } + + if (!is_huge) { + hop4_addr = get_next_hop_addr(ctx, curr_pte); + + if (hop4_addr == ULLONG_MAX) + goto not_mapped; + + hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr, + virt_addr); + + curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr; + + clear_hop3 = false; + } + + if (hdev->dram_default_page_mapping && is_dram_addr) { + u64 default_pte = (prop->mmu_dram_default_page_addr & + HOP_PHYS_ADDR_MASK) | LAST_MASK | + PAGE_PRESENT_MASK; + if (curr_pte == default_pte) { + dev_err(hdev->dev, + "DRAM: hop3 PTE points to zero page, can't unmap, va: 0x%llx\n", + virt_addr); + goto not_mapped; + } + + if (!(curr_pte & PAGE_PRESENT_MASK)) { + dev_err(hdev->dev, + "DRAM: hop3 PTE is cleared! can't unmap, va: 0x%llx\n", + virt_addr); + goto not_mapped; + } + + write_final_pte(ctx, hop3_pte_addr, default_pte); + put_pte(ctx, hop3_addr); + } else { + if (!(curr_pte & PAGE_PRESENT_MASK)) + goto not_mapped; + + if (hop4_addr) + clear_pte(ctx, hop4_pte_addr); + else + clear_pte(ctx, hop3_pte_addr); + + if (hop4_addr && !put_pte(ctx, hop4_addr)) + clear_hop3 = true; + + if (!clear_hop3) + goto mapped; + + clear_pte(ctx, hop3_pte_addr); + + if (put_pte(ctx, hop3_addr)) + goto mapped; + + clear_pte(ctx, hop2_pte_addr); + + if (put_pte(ctx, hop2_addr)) + goto mapped; + + clear_pte(ctx, hop1_pte_addr); + + if (put_pte(ctx, hop1_addr)) + goto mapped; + + clear_pte(ctx, hop0_pte_addr); + } + +mapped: + return 0; + +not_mapped: + dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n", + virt_addr); + + return -EINVAL; +} + +static int _hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, + u32 page_size, bool is_dram_addr) +{ + struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_mmu_properties *mmu_prop; + u64 hop0_addr = 0, hop0_pte_addr = 0, + hop1_addr = 0, hop1_pte_addr = 0, + hop2_addr = 0, hop2_pte_addr = 0, + hop3_addr = 0, hop3_pte_addr = 0, + hop4_addr = 0, hop4_pte_addr = 0, + curr_pte = 0; + bool hop1_new = false, hop2_new = false, hop3_new = false, + hop4_new = false, is_huge; + int rc = -ENOMEM; + + /* + * This mapping function can map a page or a huge page. For huge page + * there are only 3 hops rather than 4. Currently the DRAM allocation + * uses huge pages only but user memory could have been allocated with + * one of the two page sizes. Since this is a common code for all the + * three cases, we need this hugs page check. + */ + if (is_dram_addr) { + mmu_prop = &prop->dmmu; + is_huge = true; + } else if (page_size == prop->pmmu_huge.page_size) { + mmu_prop = &prop->pmmu_huge; + is_huge = true; + } else { + mmu_prop = &prop->pmmu; + is_huge = false; + } + + hop0_addr = get_hop0_addr(ctx); + hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr); + curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr; + + hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new); + if (hop1_addr == ULLONG_MAX) + goto err; + + hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr); + curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr; + + hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new); + if (hop2_addr == ULLONG_MAX) + goto err; + + hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr); + curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr; + + hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new); + if (hop3_addr == ULLONG_MAX) + goto err; + + hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr); + curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr; + + if (!is_huge) { + hop4_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop4_new); + if (hop4_addr == ULLONG_MAX) + goto err; + + hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr, + virt_addr); + curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr; + } + + if (hdev->dram_default_page_mapping && is_dram_addr) { + u64 default_pte = (prop->mmu_dram_default_page_addr & + HOP_PHYS_ADDR_MASK) | LAST_MASK | + PAGE_PRESENT_MASK; + + if (curr_pte != default_pte) { + dev_err(hdev->dev, + "DRAM: mapping already exists for virt_addr 0x%llx\n", + virt_addr); + rc = -EINVAL; + goto err; + } + + if (hop1_new || hop2_new || hop3_new || hop4_new) { + dev_err(hdev->dev, + "DRAM mapping should not allocate more hops\n"); + rc = -EFAULT; + goto err; + } + } else if (curr_pte & PAGE_PRESENT_MASK) { + dev_err(hdev->dev, + "mapping already exists for virt_addr 0x%llx\n", + virt_addr); + + dev_dbg(hdev->dev, "hop0 pte: 0x%llx (0x%llx)\n", + *(u64 *) (uintptr_t) hop0_pte_addr, hop0_pte_addr); + dev_dbg(hdev->dev, "hop1 pte: 0x%llx (0x%llx)\n", + *(u64 *) (uintptr_t) hop1_pte_addr, hop1_pte_addr); + dev_dbg(hdev->dev, "hop2 pte: 0x%llx (0x%llx)\n", + *(u64 *) (uintptr_t) hop2_pte_addr, hop2_pte_addr); + dev_dbg(hdev->dev, "hop3 pte: 0x%llx (0x%llx)\n", + *(u64 *) (uintptr_t) hop3_pte_addr, hop3_pte_addr); + + if (!is_huge) + dev_dbg(hdev->dev, "hop4 pte: 0x%llx (0x%llx)\n", + *(u64 *) (uintptr_t) hop4_pte_addr, + hop4_pte_addr); + + rc = -EINVAL; + goto err; + } + + curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | LAST_MASK + | PAGE_PRESENT_MASK; + + if (is_huge) + write_final_pte(ctx, hop3_pte_addr, curr_pte); + else + write_final_pte(ctx, hop4_pte_addr, curr_pte); + + if (hop1_new) { + curr_pte = + (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; + write_pte(ctx, hop0_pte_addr, curr_pte); + } + if (hop2_new) { + curr_pte = + (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; + write_pte(ctx, hop1_pte_addr, curr_pte); + get_pte(ctx, hop1_addr); + } + if (hop3_new) { + curr_pte = + (hop3_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; + write_pte(ctx, hop2_pte_addr, curr_pte); + get_pte(ctx, hop2_addr); + } + + if (!is_huge) { + if (hop4_new) { + curr_pte = (hop4_addr & HOP_PHYS_ADDR_MASK) | + PAGE_PRESENT_MASK; + write_pte(ctx, hop3_pte_addr, curr_pte); + get_pte(ctx, hop3_addr); + } + + get_pte(ctx, hop4_addr); + } else { + get_pte(ctx, hop3_addr); + } + + return 0; + +err: + if (hop4_new) + free_hop(ctx, hop4_addr); + if (hop3_new) + free_hop(ctx, hop3_addr); + if (hop2_new) + free_hop(ctx, hop2_addr); + if (hop1_new) + free_hop(ctx, hop1_addr); + + return rc; +} + +/* + * hl_mmu_v1_swap_out - marks all mapping of the given ctx as swapped out + * + * @ctx: pointer to the context structure + * + */ +static void hl_mmu_v1_swap_out(struct hl_ctx *ctx) +{ + +} + +/* + * hl_mmu_v1_swap_in - marks all mapping of the given ctx as swapped in + * + * @ctx: pointer to the context structure + * + */ +static void hl_mmu_v1_swap_in(struct hl_ctx *ctx) +{ + +} + +/* + * hl_mmu_v1_prepare - prepare mmu for working with mmu v1 + * + * @hdev: pointer to the device structure + */ +void hl_mmu_v1_set_funcs(struct hl_device *hdev) +{ + struct hl_mmu_funcs *mmu = &hdev->mmu_func; + + mmu->init = hl_mmu_v1_init; + mmu->fini = hl_mmu_v1_fini; + mmu->ctx_init = hl_mmu_v1_ctx_init; + mmu->ctx_fini = hl_mmu_v1_ctx_fini; + mmu->map = _hl_mmu_v1_map; + mmu->unmap = _hl_mmu_v1_unmap; + mmu->flush = flush; + mmu->swap_out = hl_mmu_v1_swap_out; + mmu->swap_in = hl_mmu_v1_swap_in; +} diff --git a/drivers/misc/habanalabs/common/pci.c b/drivers/misc/habanalabs/common/pci.c new file mode 100644 index 000000000..607f9a11f --- /dev/null +++ b/drivers/misc/habanalabs/common/pci.c @@ -0,0 +1,421 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include "habanalabs.h" +#include "../include/hw_ip/pci/pci_general.h" + +#include <linux/pci.h> + +#define HL_PLDM_PCI_ELBI_TIMEOUT_MSEC (HL_PCI_ELBI_TIMEOUT_MSEC * 10) + +#define IATU_REGION_CTRL_REGION_EN_MASK BIT(31) +#define IATU_REGION_CTRL_MATCH_MODE_MASK BIT(30) +#define IATU_REGION_CTRL_NUM_MATCH_EN_MASK BIT(19) +#define IATU_REGION_CTRL_BAR_NUM_MASK GENMASK(10, 8) + +/** + * hl_pci_bars_map() - Map PCI BARs. + * @hdev: Pointer to hl_device structure. + * @name: Array of BAR names. + * @is_wc: Array with flag per BAR whether a write-combined mapping is needed. + * + * Request PCI regions and map them to kernel virtual addresses. + * + * Return: 0 on success, non-zero for failure. + */ +int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3], + bool is_wc[3]) +{ + struct pci_dev *pdev = hdev->pdev; + int rc, i, bar; + + rc = pci_request_regions(pdev, HL_NAME); + if (rc) { + dev_err(hdev->dev, "Cannot obtain PCI resources\n"); + return rc; + } + + for (i = 0 ; i < 3 ; i++) { + bar = i * 2; /* 64-bit BARs */ + hdev->pcie_bar[bar] = is_wc[i] ? + pci_ioremap_wc_bar(pdev, bar) : + pci_ioremap_bar(pdev, bar); + if (!hdev->pcie_bar[bar]) { + dev_err(hdev->dev, "pci_ioremap%s_bar failed for %s\n", + is_wc[i] ? "_wc" : "", name[i]); + rc = -ENODEV; + goto err; + } + } + + return 0; + +err: + for (i = 2 ; i >= 0 ; i--) { + bar = i * 2; /* 64-bit BARs */ + if (hdev->pcie_bar[bar]) + iounmap(hdev->pcie_bar[bar]); + } + + pci_release_regions(pdev); + + return rc; +} + +/** + * hl_pci_bars_unmap() - Unmap PCI BARS. + * @hdev: Pointer to hl_device structure. + * + * Release all PCI BARs and unmap their virtual addresses. + */ +static void hl_pci_bars_unmap(struct hl_device *hdev) +{ + struct pci_dev *pdev = hdev->pdev; + int i, bar; + + for (i = 2 ; i >= 0 ; i--) { + bar = i * 2; /* 64-bit BARs */ + iounmap(hdev->pcie_bar[bar]); + } + + pci_release_regions(pdev); +} + +/** + * hl_pci_elbi_write() - Write through the ELBI interface. + * @hdev: Pointer to hl_device structure. + * @addr: Address to write to + * @data: Data to write + * + * Return: 0 on success, negative value for failure. + */ +static int hl_pci_elbi_write(struct hl_device *hdev, u64 addr, u32 data) +{ + struct pci_dev *pdev = hdev->pdev; + ktime_t timeout; + u64 msec; + u32 val; + + if (hdev->pldm) + msec = HL_PLDM_PCI_ELBI_TIMEOUT_MSEC; + else + msec = HL_PCI_ELBI_TIMEOUT_MSEC; + + /* Clear previous status */ + pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, 0); + + pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_ADDR, (u32) addr); + pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_DATA, data); + pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_CTRL, + PCI_CONFIG_ELBI_CTRL_WRITE); + + timeout = ktime_add_ms(ktime_get(), msec); + for (;;) { + pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, &val); + if (val & PCI_CONFIG_ELBI_STS_MASK) + break; + if (ktime_compare(ktime_get(), timeout) > 0) { + pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, + &val); + break; + } + + usleep_range(300, 500); + } + + if ((val & PCI_CONFIG_ELBI_STS_MASK) == PCI_CONFIG_ELBI_STS_DONE) + return 0; + + if (val & PCI_CONFIG_ELBI_STS_ERR) + return -EIO; + + if (!(val & PCI_CONFIG_ELBI_STS_MASK)) { + dev_err(hdev->dev, "ELBI write didn't finish in time\n"); + return -EIO; + } + + dev_err(hdev->dev, "ELBI write has undefined bits in status\n"); + return -EIO; +} + +/** + * hl_pci_iatu_write() - iatu write routine. + * @hdev: Pointer to hl_device structure. + * @addr: Address to write to + * @data: Data to write + * + * Return: 0 on success, negative value for failure. + */ +int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + u32 dbi_offset; + int rc; + + dbi_offset = addr & 0xFFF; + + /* Ignore result of writing to pcie_aux_dbi_reg_addr as it could fail + * in case the firmware security is enabled + */ + hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0x00300000); + + rc = hl_pci_elbi_write(hdev, prop->pcie_dbi_base_address + dbi_offset, + data); + + if (rc) + return -EIO; + + return 0; +} + +/** + * hl_pci_reset_link_through_bridge() - Reset PCI link. + * @hdev: Pointer to hl_device structure. + */ +static void hl_pci_reset_link_through_bridge(struct hl_device *hdev) +{ + struct pci_dev *pdev = hdev->pdev; + struct pci_dev *parent_port; + u16 val; + + parent_port = pdev->bus->self; + pci_read_config_word(parent_port, PCI_BRIDGE_CONTROL, &val); + val |= PCI_BRIDGE_CTL_BUS_RESET; + pci_write_config_word(parent_port, PCI_BRIDGE_CONTROL, val); + ssleep(1); + + val &= ~(PCI_BRIDGE_CTL_BUS_RESET); + pci_write_config_word(parent_port, PCI_BRIDGE_CONTROL, val); + ssleep(3); +} + +/** + * hl_pci_set_inbound_region() - Configure inbound region + * @hdev: Pointer to hl_device structure. + * @region: Inbound region number. + * @pci_region: Inbound region parameters. + * + * Configure the iATU inbound region. + * + * Return: 0 on success, negative value for failure. + */ +int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region, + struct hl_inbound_pci_region *pci_region) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + u64 bar_phys_base, region_base, region_end_address; + u32 offset, ctrl_reg_val; + int rc = 0; + + /* region offset */ + offset = (0x200 * region) + 0x100; + + if (pci_region->mode == PCI_ADDRESS_MATCH_MODE) { + bar_phys_base = hdev->pcie_bar_phys[pci_region->bar]; + region_base = bar_phys_base + pci_region->offset_in_bar; + region_end_address = region_base + pci_region->size - 1; + + rc |= hl_pci_iatu_write(hdev, offset + 0x8, + lower_32_bits(region_base)); + rc |= hl_pci_iatu_write(hdev, offset + 0xC, + upper_32_bits(region_base)); + rc |= hl_pci_iatu_write(hdev, offset + 0x10, + lower_32_bits(region_end_address)); + } + + /* Point to the specified address */ + rc |= hl_pci_iatu_write(hdev, offset + 0x14, + lower_32_bits(pci_region->addr)); + rc |= hl_pci_iatu_write(hdev, offset + 0x18, + upper_32_bits(pci_region->addr)); + rc |= hl_pci_iatu_write(hdev, offset + 0x0, 0); + + /* Enable + bar/address match + match enable + bar number */ + ctrl_reg_val = FIELD_PREP(IATU_REGION_CTRL_REGION_EN_MASK, 1); + ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_MATCH_MODE_MASK, + pci_region->mode); + ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_NUM_MATCH_EN_MASK, 1); + + if (pci_region->mode == PCI_BAR_MATCH_MODE) + ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_BAR_NUM_MASK, + pci_region->bar); + + rc |= hl_pci_iatu_write(hdev, offset + 0x4, ctrl_reg_val); + + /* Return the DBI window to the default location + * Ignore result of writing to pcie_aux_dbi_reg_addr as it could fail + * in case the firmware security is enabled + */ + hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0); + + if (rc) + dev_err(hdev->dev, "failed to map bar %u to 0x%08llx\n", + pci_region->bar, pci_region->addr); + + return rc; +} + +/** + * hl_pci_set_outbound_region() - Configure outbound region 0 + * @hdev: Pointer to hl_device structure. + * @pci_region: Outbound region parameters. + * + * Configure the iATU outbound region 0. + * + * Return: 0 on success, negative value for failure. + */ +int hl_pci_set_outbound_region(struct hl_device *hdev, + struct hl_outbound_pci_region *pci_region) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + u64 outbound_region_end_address; + int rc = 0; + + /* Outbound Region 0 */ + outbound_region_end_address = + pci_region->addr + pci_region->size - 1; + rc |= hl_pci_iatu_write(hdev, 0x008, + lower_32_bits(pci_region->addr)); + rc |= hl_pci_iatu_write(hdev, 0x00C, + upper_32_bits(pci_region->addr)); + rc |= hl_pci_iatu_write(hdev, 0x010, + lower_32_bits(outbound_region_end_address)); + rc |= hl_pci_iatu_write(hdev, 0x014, 0); + + if ((hdev->power9_64bit_dma_enable) && (hdev->dma_mask == 64)) + rc |= hl_pci_iatu_write(hdev, 0x018, 0x08000000); + else + rc |= hl_pci_iatu_write(hdev, 0x018, 0); + + rc |= hl_pci_iatu_write(hdev, 0x020, + upper_32_bits(outbound_region_end_address)); + /* Increase region size */ + rc |= hl_pci_iatu_write(hdev, 0x000, 0x00002000); + /* Enable */ + rc |= hl_pci_iatu_write(hdev, 0x004, 0x80000000); + + /* Return the DBI window to the default location + * Ignore result of writing to pcie_aux_dbi_reg_addr as it could fail + * in case the firmware security is enabled + */ + hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0); + + return rc; +} + +/** + * hl_pci_set_dma_mask() - Set DMA masks for the device. + * @hdev: Pointer to hl_device structure. + * + * This function sets the DMA masks (regular and consistent) for a specified + * value. If it doesn't succeed, it tries to set it to a fall-back value + * + * Return: 0 on success, non-zero for failure. + */ +static int hl_pci_set_dma_mask(struct hl_device *hdev) +{ + struct pci_dev *pdev = hdev->pdev; + int rc; + + /* set DMA mask */ + rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(hdev->dma_mask)); + if (rc) { + dev_err(hdev->dev, + "Failed to set pci dma mask to %d bits, error %d\n", + hdev->dma_mask, rc); + return rc; + } + + rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(hdev->dma_mask)); + if (rc) { + dev_err(hdev->dev, + "Failed to set pci consistent dma mask to %d bits, error %d\n", + hdev->dma_mask, rc); + return rc; + } + + return 0; +} + +/** + * hl_pci_init() - PCI initialization code. + * @hdev: Pointer to hl_device structure. + * @cpu_boot_status_reg: status register of the device's CPU + * @boot_err0_reg: boot error register of the device's CPU + * @preboot_ver_timeout: how much to wait before bailing out on reading + * the preboot version + * + * Set DMA masks, initialize the PCI controller and map the PCI BARs. + * + * Return: 0 on success, non-zero for failure. + */ +int hl_pci_init(struct hl_device *hdev, u32 cpu_boot_status_reg, + u32 boot_err0_reg, u32 preboot_ver_timeout) +{ + struct pci_dev *pdev = hdev->pdev; + int rc; + + if (hdev->reset_pcilink) + hl_pci_reset_link_through_bridge(hdev); + + rc = pci_enable_device_mem(pdev); + if (rc) { + dev_err(hdev->dev, "can't enable PCI device\n"); + return rc; + } + + pci_set_master(pdev); + + rc = hdev->asic_funcs->pci_bars_map(hdev); + if (rc) { + dev_err(hdev->dev, "Failed to initialize PCI BARs\n"); + goto disable_device; + } + + rc = hdev->asic_funcs->init_iatu(hdev); + if (rc) { + dev_err(hdev->dev, "Failed to initialize iATU\n"); + goto unmap_pci_bars; + } + + rc = hl_pci_set_dma_mask(hdev); + if (rc) + goto unmap_pci_bars; + + /* Before continuing in the initialization, we need to read the preboot + * version to determine whether we run with a security-enabled firmware + * The check will be done in each ASIC's specific code + */ + rc = hl_fw_read_preboot_ver(hdev, cpu_boot_status_reg, boot_err0_reg, + preboot_ver_timeout); + if (rc) + goto unmap_pci_bars; + + return 0; + +unmap_pci_bars: + hl_pci_bars_unmap(hdev); +disable_device: + pci_clear_master(pdev); + pci_disable_device(pdev); + + return rc; +} + +/** + * hl_fw_fini() - PCI finalization code. + * @hdev: Pointer to hl_device structure + * + * Unmap PCI bars and disable PCI device. + */ +void hl_pci_fini(struct hl_device *hdev) +{ + hl_pci_bars_unmap(hdev); + + pci_clear_master(hdev->pdev); + pci_disable_device(hdev->pdev); +} diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c new file mode 100644 index 000000000..3ceae8701 --- /dev/null +++ b/drivers/misc/habanalabs/common/sysfs.c @@ -0,0 +1,463 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2019 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include "habanalabs.h" + +#include <linux/pci.h> + +long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) +{ + struct cpucp_packet pkt; + long result; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + + if (curr) + pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_CURR_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); + else + pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); + pkt.pll_index = cpu_to_le32(pll_index); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + 0, &result); + + if (rc) { + dev_err(hdev->dev, + "Failed to get frequency of PLL %d, error %d\n", + pll_index, rc); + result = rc; + } + + return result; +} + +void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq) +{ + struct cpucp_packet pkt; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_SET << + CPUCP_PKT_CTL_OPCODE_SHIFT); + pkt.pll_index = cpu_to_le32(pll_index); + pkt.value = cpu_to_le64(freq); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + 0, NULL); + + if (rc) + dev_err(hdev->dev, + "Failed to set frequency to PLL %d, error %d\n", + pll_index, rc); +} + +u64 hl_get_max_power(struct hl_device *hdev) +{ + struct cpucp_packet pkt; + long result; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + 0, &result); + + if (rc) { + dev_err(hdev->dev, "Failed to get max power, error %d\n", rc); + result = rc; + } + + return result; +} + +void hl_set_max_power(struct hl_device *hdev) +{ + struct cpucp_packet pkt; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_SET << + CPUCP_PKT_CTL_OPCODE_SHIFT); + pkt.value = cpu_to_le64(hdev->max_power); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + 0, NULL); + + if (rc) + dev_err(hdev->dev, "Failed to set max power, error %d\n", rc); +} + +static ssize_t uboot_ver_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + return sprintf(buf, "%s\n", hdev->asic_prop.uboot_ver); +} + +static ssize_t armcp_kernel_ver_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + return sprintf(buf, "%s", hdev->asic_prop.cpucp_info.kernel_version); +} + +static ssize_t armcp_ver_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + return sprintf(buf, "%s\n", hdev->asic_prop.cpucp_info.cpucp_version); +} + +static ssize_t cpld_ver_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + return sprintf(buf, "0x%08x\n", + hdev->asic_prop.cpucp_info.cpld_version); +} + +static ssize_t cpucp_kernel_ver_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + return sprintf(buf, "%s", hdev->asic_prop.cpucp_info.kernel_version); +} + +static ssize_t cpucp_ver_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + return sprintf(buf, "%s\n", hdev->asic_prop.cpucp_info.cpucp_version); +} + +static ssize_t infineon_ver_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + return sprintf(buf, "0x%04x\n", + hdev->asic_prop.cpucp_info.infineon_version); +} + +static ssize_t fuse_ver_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + return sprintf(buf, "%s\n", hdev->asic_prop.cpucp_info.fuse_version); +} + +static ssize_t thermal_ver_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + return sprintf(buf, "%s", hdev->asic_prop.cpucp_info.thermal_version); +} + +static ssize_t preboot_btl_ver_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + return sprintf(buf, "%s\n", hdev->asic_prop.preboot_ver); +} + +static ssize_t soft_reset_store(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + long value; + int rc; + + rc = kstrtoul(buf, 0, &value); + + if (rc) { + count = -EINVAL; + goto out; + } + + if (!hdev->supports_soft_reset) { + dev_err(hdev->dev, "Device does not support soft-reset\n"); + goto out; + } + + dev_warn(hdev->dev, "Soft-Reset requested through sysfs\n"); + + hl_device_reset(hdev, false, false); + +out: + return count; +} + +static ssize_t hard_reset_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + long value; + int rc; + + rc = kstrtoul(buf, 0, &value); + + if (rc) { + count = -EINVAL; + goto out; + } + + dev_warn(hdev->dev, "Hard-Reset requested through sysfs\n"); + + hl_device_reset(hdev, true, false); + +out: + return count; +} + +static ssize_t device_type_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + char *str; + + switch (hdev->asic_type) { + case ASIC_GOYA: + str = "GOYA"; + break; + case ASIC_GAUDI: + str = "GAUDI"; + break; + default: + dev_err(hdev->dev, "Unrecognized ASIC type %d\n", + hdev->asic_type); + return -EINVAL; + } + + return sprintf(buf, "%s\n", str); +} + +static ssize_t pci_addr_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + return sprintf(buf, "%04x:%02x:%02x.%x\n", + pci_domain_nr(hdev->pdev->bus), + hdev->pdev->bus->number, + PCI_SLOT(hdev->pdev->devfn), + PCI_FUNC(hdev->pdev->devfn)); +} + +static ssize_t status_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + char *str; + + if (atomic_read(&hdev->in_reset)) + str = "In reset"; + else if (hdev->disabled) + str = "Malfunction"; + else + str = "Operational"; + + return sprintf(buf, "%s\n", str); +} + +static ssize_t soft_reset_cnt_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + return sprintf(buf, "%d\n", hdev->soft_reset_cnt); +} + +static ssize_t hard_reset_cnt_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + return sprintf(buf, "%d\n", hdev->hard_reset_cnt); +} + +static ssize_t max_power_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + long val; + + if (hl_device_disabled_or_in_reset(hdev)) + return -ENODEV; + + val = hl_get_max_power(hdev); + + return sprintf(buf, "%lu\n", val); +} + +static ssize_t max_power_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + unsigned long value; + int rc; + + if (hl_device_disabled_or_in_reset(hdev)) { + count = -ENODEV; + goto out; + } + + rc = kstrtoul(buf, 0, &value); + + if (rc) { + count = -EINVAL; + goto out; + } + + hdev->max_power = value; + hl_set_max_power(hdev); + +out: + return count; +} + +static ssize_t eeprom_read_handler(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, loff_t offset, + size_t max_size) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct hl_device *hdev = dev_get_drvdata(dev); + char *data; + int rc; + + if (hl_device_disabled_or_in_reset(hdev)) + return -ENODEV; + + if (!max_size) + return -EINVAL; + + data = kzalloc(max_size, GFP_KERNEL); + if (!data) + return -ENOMEM; + + rc = hdev->asic_funcs->get_eeprom_data(hdev, data, max_size); + if (rc) + goto out; + + memcpy(buf, data, max_size); + +out: + kfree(data); + + return max_size; +} + +static DEVICE_ATTR_RO(armcp_kernel_ver); +static DEVICE_ATTR_RO(armcp_ver); +static DEVICE_ATTR_RO(cpld_ver); +static DEVICE_ATTR_RO(cpucp_kernel_ver); +static DEVICE_ATTR_RO(cpucp_ver); +static DEVICE_ATTR_RO(device_type); +static DEVICE_ATTR_RO(fuse_ver); +static DEVICE_ATTR_WO(hard_reset); +static DEVICE_ATTR_RO(hard_reset_cnt); +static DEVICE_ATTR_RO(infineon_ver); +static DEVICE_ATTR_RW(max_power); +static DEVICE_ATTR_RO(pci_addr); +static DEVICE_ATTR_RO(preboot_btl_ver); +static DEVICE_ATTR_WO(soft_reset); +static DEVICE_ATTR_RO(soft_reset_cnt); +static DEVICE_ATTR_RO(status); +static DEVICE_ATTR_RO(thermal_ver); +static DEVICE_ATTR_RO(uboot_ver); + +static struct bin_attribute bin_attr_eeprom = { + .attr = {.name = "eeprom", .mode = (0444)}, + .size = PAGE_SIZE, + .read = eeprom_read_handler +}; + +static struct attribute *hl_dev_attrs[] = { + &dev_attr_armcp_kernel_ver.attr, + &dev_attr_armcp_ver.attr, + &dev_attr_cpld_ver.attr, + &dev_attr_cpucp_kernel_ver.attr, + &dev_attr_cpucp_ver.attr, + &dev_attr_device_type.attr, + &dev_attr_fuse_ver.attr, + &dev_attr_hard_reset.attr, + &dev_attr_hard_reset_cnt.attr, + &dev_attr_infineon_ver.attr, + &dev_attr_max_power.attr, + &dev_attr_pci_addr.attr, + &dev_attr_preboot_btl_ver.attr, + &dev_attr_soft_reset.attr, + &dev_attr_soft_reset_cnt.attr, + &dev_attr_status.attr, + &dev_attr_thermal_ver.attr, + &dev_attr_uboot_ver.attr, + NULL, +}; + +static struct bin_attribute *hl_dev_bin_attrs[] = { + &bin_attr_eeprom, + NULL +}; + +static struct attribute_group hl_dev_attr_group = { + .attrs = hl_dev_attrs, + .bin_attrs = hl_dev_bin_attrs, +}; + +static struct attribute_group hl_dev_clks_attr_group; + +static const struct attribute_group *hl_dev_attr_groups[] = { + &hl_dev_attr_group, + &hl_dev_clks_attr_group, + NULL, +}; + +int hl_sysfs_init(struct hl_device *hdev) +{ + int rc; + + if (hdev->asic_type == ASIC_GOYA) + hdev->pm_mng_profile = PM_AUTO; + else + hdev->pm_mng_profile = PM_MANUAL; + + hdev->max_power = hdev->asic_prop.max_power_default; + + hdev->asic_funcs->add_device_attr(hdev, &hl_dev_clks_attr_group); + + rc = device_add_groups(hdev->dev, hl_dev_attr_groups); + if (rc) { + dev_err(hdev->dev, + "Failed to add groups to device, error %d\n", rc); + return rc; + } + + return 0; +} + +void hl_sysfs_fini(struct hl_device *hdev) +{ + device_remove_groups(hdev->dev, hl_dev_attr_groups); +} |