summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c322
1 files changed, 322 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
new file mode 100644
index 000000000..3ac599f74
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -0,0 +1,322 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2014-2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "kfd_priv.h"
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/idr.h>
+
+/*
+ * This extension supports a kernel level doorbells management for the
+ * kernel queues using the first doorbell page reserved for the kernel.
+ */
+
+/*
+ * Each device exposes a doorbell aperture, a PCI MMIO aperture that
+ * receives 32-bit writes that are passed to queues as wptr values.
+ * The doorbells are intended to be written by applications as part
+ * of queueing work on user-mode queues.
+ * We assign doorbells to applications in PAGE_SIZE-sized and aligned chunks.
+ * We map the doorbell address space into user-mode when a process creates
+ * its first queue on each device.
+ * Although the mapping is done by KFD, it is equivalent to an mmap of
+ * the /dev/kfd with the particular device encoded in the mmap offset.
+ * There will be other uses for mmap of /dev/kfd, so only a range of
+ * offsets (KFD_MMAP_DOORBELL_START-END) is used for doorbells.
+ */
+
+/* # of doorbell bytes allocated for each process. */
+size_t kfd_doorbell_process_slice(struct kfd_dev *kfd)
+{
+ if (!kfd->shared_resources.enable_mes)
+ return roundup(kfd->device_info.doorbell_size *
+ KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
+ PAGE_SIZE);
+ else
+ return amdgpu_mes_doorbell_process_slice(
+ (struct amdgpu_device *)kfd->adev);
+}
+
+/* Doorbell calculations for device init. */
+int kfd_doorbell_init(struct kfd_dev *kfd)
+{
+ size_t doorbell_start_offset;
+ size_t doorbell_aperture_size;
+ size_t doorbell_process_limit;
+
+ /*
+ * With MES enabled, just set the doorbell base as it is needed
+ * to calculate doorbell physical address.
+ */
+ if (kfd->shared_resources.enable_mes) {
+ kfd->doorbell_base =
+ kfd->shared_resources.doorbell_physical_address;
+ return 0;
+ }
+
+ /*
+ * We start with calculations in bytes because the input data might
+ * only be byte-aligned.
+ * Only after we have done the rounding can we assume any alignment.
+ */
+
+ doorbell_start_offset =
+ roundup(kfd->shared_resources.doorbell_start_offset,
+ kfd_doorbell_process_slice(kfd));
+
+ doorbell_aperture_size =
+ rounddown(kfd->shared_resources.doorbell_aperture_size,
+ kfd_doorbell_process_slice(kfd));
+
+ if (doorbell_aperture_size > doorbell_start_offset)
+ doorbell_process_limit =
+ (doorbell_aperture_size - doorbell_start_offset) /
+ kfd_doorbell_process_slice(kfd);
+ else
+ return -ENOSPC;
+
+ if (!kfd->max_doorbell_slices ||
+ doorbell_process_limit < kfd->max_doorbell_slices)
+ kfd->max_doorbell_slices = doorbell_process_limit;
+
+ kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address +
+ doorbell_start_offset;
+
+ kfd->doorbell_base_dw_offset = doorbell_start_offset / sizeof(u32);
+
+ kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,
+ kfd_doorbell_process_slice(kfd));
+
+ if (!kfd->doorbell_kernel_ptr)
+ return -ENOMEM;
+
+ pr_debug("Doorbell initialization:\n");
+ pr_debug("doorbell base == 0x%08lX\n",
+ (uintptr_t)kfd->doorbell_base);
+
+ pr_debug("doorbell_base_dw_offset == 0x%08lX\n",
+ kfd->doorbell_base_dw_offset);
+
+ pr_debug("doorbell_process_limit == 0x%08lX\n",
+ doorbell_process_limit);
+
+ pr_debug("doorbell_kernel_offset == 0x%08lX\n",
+ (uintptr_t)kfd->doorbell_base);
+
+ pr_debug("doorbell aperture size == 0x%08lX\n",
+ kfd->shared_resources.doorbell_aperture_size);
+
+ pr_debug("doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr);
+
+ return 0;
+}
+
+void kfd_doorbell_fini(struct kfd_dev *kfd)
+{
+ if (kfd->doorbell_kernel_ptr)
+ iounmap(kfd->doorbell_kernel_ptr);
+}
+
+int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
+ struct vm_area_struct *vma)
+{
+ phys_addr_t address;
+ struct kfd_process_device *pdd;
+
+ /*
+ * For simplicitly we only allow mapping of the entire doorbell
+ * allocation of a single device & process.
+ */
+ if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev))
+ return -EINVAL;
+
+ pdd = kfd_get_process_device_data(dev, process);
+ if (!pdd)
+ return -EINVAL;
+
+ /* Calculate physical address of doorbell */
+ address = kfd_get_process_doorbells(pdd);
+ if (!address)
+ return -ENOMEM;
+ vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
+ VM_DONTDUMP | VM_PFNMAP;
+
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ pr_debug("Mapping doorbell page\n"
+ " target user address == 0x%08llX\n"
+ " physical address == 0x%08llX\n"
+ " vm_flags == 0x%04lX\n"
+ " size == 0x%04lX\n",
+ (unsigned long long) vma->vm_start, address, vma->vm_flags,
+ kfd_doorbell_process_slice(dev));
+
+
+ return io_remap_pfn_range(vma,
+ vma->vm_start,
+ address >> PAGE_SHIFT,
+ kfd_doorbell_process_slice(dev),
+ vma->vm_page_prot);
+}
+
+
+/* get kernel iomem pointer for a doorbell */
+void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
+ unsigned int *doorbell_off)
+{
+ u32 inx;
+
+ mutex_lock(&kfd->doorbell_mutex);
+ inx = find_first_zero_bit(kfd->doorbell_available_index,
+ KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
+
+ __set_bit(inx, kfd->doorbell_available_index);
+ mutex_unlock(&kfd->doorbell_mutex);
+
+ if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
+ return NULL;
+
+ inx *= kfd->device_info.doorbell_size / sizeof(u32);
+
+ /*
+ * Calculating the kernel doorbell offset using the first
+ * doorbell page.
+ */
+ *doorbell_off = kfd->doorbell_base_dw_offset + inx;
+
+ pr_debug("Get kernel queue doorbell\n"
+ " doorbell offset == 0x%08X\n"
+ " doorbell index == 0x%x\n",
+ *doorbell_off, inx);
+
+ return kfd->doorbell_kernel_ptr + inx;
+}
+
+void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
+{
+ unsigned int inx;
+
+ inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr)
+ * sizeof(u32) / kfd->device_info.doorbell_size;
+
+ mutex_lock(&kfd->doorbell_mutex);
+ __clear_bit(inx, kfd->doorbell_available_index);
+ mutex_unlock(&kfd->doorbell_mutex);
+}
+
+void write_kernel_doorbell(void __iomem *db, u32 value)
+{
+ if (db) {
+ writel(value, db);
+ pr_debug("Writing %d to doorbell address %p\n", value, db);
+ }
+}
+
+void write_kernel_doorbell64(void __iomem *db, u64 value)
+{
+ if (db) {
+ WARN(((unsigned long)db & 7) != 0,
+ "Unaligned 64-bit doorbell");
+ writeq(value, (u64 __iomem *)db);
+ pr_debug("writing %llu to doorbell address %p\n", value, db);
+ }
+}
+
+unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
+ struct kfd_process_device *pdd,
+ unsigned int doorbell_id)
+{
+ /*
+ * doorbell_base_dw_offset accounts for doorbells taken by KGD.
+ * index * kfd_doorbell_process_slice/sizeof(u32) adjusts to
+ * the process's doorbells. The offset returned is in dword
+ * units regardless of the ASIC-dependent doorbell size.
+ */
+ if (!kfd->shared_resources.enable_mes)
+ return kfd->doorbell_base_dw_offset +
+ pdd->doorbell_index
+ * kfd_doorbell_process_slice(kfd) / sizeof(u32) +
+ doorbell_id *
+ kfd->device_info.doorbell_size / sizeof(u32);
+ else
+ return amdgpu_mes_get_doorbell_dw_offset_in_bar(
+ (struct amdgpu_device *)kfd->adev,
+ pdd->doorbell_index, doorbell_id);
+}
+
+uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
+{
+ uint64_t num_of_elems = (kfd->shared_resources.doorbell_aperture_size -
+ kfd->shared_resources.doorbell_start_offset) /
+ kfd_doorbell_process_slice(kfd) + 1;
+
+ return num_of_elems;
+
+}
+
+phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd)
+{
+ if (!pdd->doorbell_index) {
+ int r = kfd_alloc_process_doorbells(pdd->dev,
+ &pdd->doorbell_index);
+ if (r < 0)
+ return 0;
+ }
+
+ return pdd->dev->doorbell_base +
+ pdd->doorbell_index * kfd_doorbell_process_slice(pdd->dev);
+}
+
+int kfd_alloc_process_doorbells(struct kfd_dev *kfd, unsigned int *doorbell_index)
+{
+ int r = 0;
+
+ if (!kfd->shared_resources.enable_mes)
+ r = ida_simple_get(&kfd->doorbell_ida, 1,
+ kfd->max_doorbell_slices, GFP_KERNEL);
+ else
+ r = amdgpu_mes_alloc_process_doorbells(
+ (struct amdgpu_device *)kfd->adev,
+ doorbell_index);
+
+ if (r > 0)
+ *doorbell_index = r;
+
+ if (r < 0)
+ pr_err("Failed to allocate process doorbells\n");
+
+ return r;
+}
+
+void kfd_free_process_doorbells(struct kfd_dev *kfd, unsigned int doorbell_index)
+{
+ if (doorbell_index) {
+ if (!kfd->shared_resources.enable_mes)
+ ida_simple_remove(&kfd->doorbell_ida, doorbell_index);
+ else
+ amdgpu_mes_free_process_doorbells(
+ (struct amdgpu_device *)kfd->adev,
+ doorbell_index);
+ }
+}